diff --git a/checkpoint-10075/config.json b/checkpoint-10075/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dbe4666ca61b271d1e7307d3cdd7e387a877b181 --- /dev/null +++ b/checkpoint-10075/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "models/openai/whisper-large-v2/finetune/jacob_filter/checkpoint-6717", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-10075/generation_config.json b/checkpoint-10075/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-10075/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-10075/model-00001-of-00002.safetensors b/checkpoint-10075/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c5f872331842c3730dc26ca07f2c9c49d5158ed --- /dev/null +++ b/checkpoint-10075/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f3ff75421447334fd0a2ead84a356ec07b488e43b6492c807caeff59f8c7641 +size 4992706480 diff --git a/checkpoint-10075/model-00002-of-00002.safetensors b/checkpoint-10075/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76be57a0d12bdfa1173694203c4eb56a8f762677 --- /dev/null +++ b/checkpoint-10075/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29e8bfbe4544338e59c49c157627ff9fd563af558246fe0940e874b402a9e880 +size 1180663192 diff --git a/checkpoint-10075/model.safetensors.index.json b/checkpoint-10075/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-10075/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-10075/optimizer.pt b/checkpoint-10075/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ac34772dfe8c4346bfbe57a68e81147f7f22b41 --- /dev/null +++ b/checkpoint-10075/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e094cf541543b4e1a8038f87df9518b3b8a73e713aee1760587b0f404e45739 +size 3095074288 diff --git a/checkpoint-10075/preprocessor_config.json b/checkpoint-10075/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-10075/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-10075/rng_state.pth b/checkpoint-10075/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..de5b8bb10cec2e531f589e8167d0502f629ace10 --- /dev/null +++ b/checkpoint-10075/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a5fd5470791ad73efaef8471cd3e2f2da4ef4c2cff54ddd562898211ac51825 +size 14244 diff --git a/checkpoint-10075/scheduler.pt b/checkpoint-10075/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9547cce9e52a13ebb2b03e745c79a47df8d94e13 --- /dev/null +++ b/checkpoint-10075/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d17eded206da68210dfc655022b88a19b27a6dc1deea237f15ca64d17aae0dc8 +size 1064 diff --git a/checkpoint-10075/trainer_state.json b/checkpoint-10075/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8ba2b8bfb4dfd196369c8c82cfa1efecd54cd653 --- /dev/null +++ b/checkpoint-10075/trainer_state.json @@ -0,0 +1,645 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4999255620068483, + "eval_steps": 500, + "global_step": 3358, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.722084367245657e-08, + "loss": 0.2836, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 7.692307692307692e-08, + "loss": 0.2111, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.1662531017369727e-07, + "loss": 0.2145, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 1.563275434243176e-07, + "loss": 0.2375, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 1.9602977667493795e-07, + "loss": 0.1839, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 2.3573200992555832e-07, + "loss": 0.2864, + "step": 192 + }, + { + "epoch": 0.03, + "learning_rate": 2.7543424317617863e-07, + "loss": 0.1799, + "step": 224 + }, + { + "epoch": 0.04, + "learning_rate": 3.1513647642679897e-07, + "loss": 0.1609, + "step": 256 + }, + { + "epoch": 0.04, + "learning_rate": 3.5483870967741936e-07, + "loss": 0.1434, + "step": 288 + }, + { + "epoch": 0.05, + "learning_rate": 3.945409429280397e-07, + "loss": 0.1699, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 4.3424317617866004e-07, + "loss": 0.1898, + "step": 352 + }, + { + "epoch": 0.06, + "learning_rate": 4.739454094292804e-07, + "loss": 0.1665, + "step": 384 + }, + { + "epoch": 0.06, + "learning_rate": 5.136476426799007e-07, + "loss": 0.1358, + "step": 416 + }, + { + "epoch": 0.07, + "learning_rate": 5.533498759305211e-07, + "loss": 0.1713, + "step": 448 + }, + { + "epoch": 0.07, + "learning_rate": 5.930521091811415e-07, + "loss": 0.1817, + "step": 480 + }, + { + "epoch": 0.08, + "learning_rate": 6.327543424317618e-07, + "loss": 0.149, + "step": 512 + }, + { + "epoch": 0.08, + "learning_rate": 6.724565756823821e-07, + "loss": 0.165, + "step": 544 + }, + { + "epoch": 0.09, + "learning_rate": 7.121588089330024e-07, + "loss": 0.2282, + "step": 576 + }, + { + "epoch": 0.09, + "learning_rate": 7.518610421836227e-07, + "loss": 0.1666, + "step": 608 + }, + { + "epoch": 0.1, + "learning_rate": 7.915632754342431e-07, + "loss": 0.1229, + "step": 640 + }, + { + "epoch": 0.1, + "learning_rate": 8.312655086848634e-07, + "loss": 0.2118, + "step": 672 + }, + { + "epoch": 0.1, + "learning_rate": 8.709677419354838e-07, + "loss": 0.1869, + "step": 704 + }, + { + "epoch": 0.11, + "learning_rate": 9.106699751861042e-07, + "loss": 0.2196, + "step": 736 + }, + { + "epoch": 0.11, + "learning_rate": 9.503722084367245e-07, + "loss": 0.0936, + "step": 768 + }, + { + "epoch": 0.12, + "learning_rate": 9.90074441687345e-07, + "loss": 0.1839, + "step": 800 + }, + { + "epoch": 0.12, + "learning_rate": 9.980994615140957e-07, + "loss": 0.1675, + "step": 832 + }, + { + "epoch": 0.13, + "learning_rate": 9.955654101995564e-07, + "loss": 0.1595, + "step": 864 + }, + { + "epoch": 0.13, + "learning_rate": 9.930313588850174e-07, + "loss": 0.1556, + "step": 896 + }, + { + "epoch": 0.14, + "learning_rate": 9.905764966740576e-07, + "loss": 0.1711, + "step": 928 + }, + { + "epoch": 0.14, + "learning_rate": 9.880424453595185e-07, + "loss": 0.1777, + "step": 960 + }, + { + "epoch": 0.15, + "learning_rate": 9.855083940449792e-07, + "loss": 0.2031, + "step": 992 + }, + { + "epoch": 0.15, + "learning_rate": 9.829743427304402e-07, + "loss": 0.1529, + "step": 1024 + }, + { + "epoch": 0.16, + "learning_rate": 9.804402914159011e-07, + "loss": 0.1365, + "step": 1056 + }, + { + "epoch": 0.16, + "learning_rate": 9.77906240101362e-07, + "loss": 0.1922, + "step": 1088 + }, + { + "epoch": 0.17, + "learning_rate": 9.75372188786823e-07, + "loss": 0.133, + "step": 1120 + }, + { + "epoch": 0.17, + "learning_rate": 9.728381374722837e-07, + "loss": 0.1692, + "step": 1152 + }, + { + "epoch": 0.18, + "learning_rate": 9.703040861577447e-07, + "loss": 0.1022, + "step": 1184 + }, + { + "epoch": 0.18, + "learning_rate": 9.677700348432054e-07, + "loss": 0.2052, + "step": 1216 + }, + { + "epoch": 0.19, + "learning_rate": 9.652359835286664e-07, + "loss": 0.1546, + "step": 1248 + }, + { + "epoch": 0.19, + "learning_rate": 9.627019322141273e-07, + "loss": 0.149, + "step": 1280 + }, + { + "epoch": 0.2, + "learning_rate": 9.601678808995883e-07, + "loss": 0.1281, + "step": 1312 + }, + { + "epoch": 0.2, + "learning_rate": 9.57633829585049e-07, + "loss": 0.1437, + "step": 1344 + }, + { + "epoch": 0.2, + "learning_rate": 9.5509977827051e-07, + "loss": 0.2097, + "step": 1376 + }, + { + "epoch": 0.21, + "learning_rate": 9.525657269559708e-07, + "loss": 0.1308, + "step": 1408 + }, + { + "epoch": 0.21, + "learning_rate": 9.500316756414317e-07, + "loss": 0.1691, + "step": 1440 + }, + { + "epoch": 0.22, + "learning_rate": 9.474976243268927e-07, + "loss": 0.2319, + "step": 1472 + }, + { + "epoch": 0.22, + "learning_rate": 9.449635730123534e-07, + "loss": 0.2226, + "step": 1504 + }, + { + "epoch": 0.23, + "learning_rate": 9.424295216978143e-07, + "loss": 0.1789, + "step": 1536 + }, + { + "epoch": 0.23, + "learning_rate": 9.398954703832752e-07, + "loss": 0.1932, + "step": 1568 + }, + { + "epoch": 0.24, + "learning_rate": 9.373614190687361e-07, + "loss": 0.1718, + "step": 1600 + }, + { + "epoch": 0.24, + "learning_rate": 9.34827367754197e-07, + "loss": 0.156, + "step": 1632 + }, + { + "epoch": 0.25, + "learning_rate": 9.322933164396578e-07, + "loss": 0.1512, + "step": 1664 + }, + { + "epoch": 0.25, + "learning_rate": 9.297592651251187e-07, + "loss": 0.0968, + "step": 1696 + }, + { + "epoch": 0.26, + "learning_rate": 9.272252138105796e-07, + "loss": 0.0932, + "step": 1728 + }, + { + "epoch": 0.26, + "learning_rate": 9.246911624960405e-07, + "loss": 0.2464, + "step": 1760 + }, + { + "epoch": 0.27, + "learning_rate": 9.221571111815014e-07, + "loss": 0.2036, + "step": 1792 + }, + { + "epoch": 0.27, + "learning_rate": 9.196230598669623e-07, + "loss": 0.1245, + "step": 1824 + }, + { + "epoch": 0.28, + "learning_rate": 9.170890085524232e-07, + "loss": 0.1097, + "step": 1856 + }, + { + "epoch": 0.28, + "learning_rate": 9.14554957237884e-07, + "loss": 0.1844, + "step": 1888 + }, + { + "epoch": 0.29, + "learning_rate": 9.120209059233449e-07, + "loss": 0.1114, + "step": 1920 + }, + { + "epoch": 0.29, + "learning_rate": 9.094868546088058e-07, + "loss": 0.1992, + "step": 1952 + }, + { + "epoch": 0.3, + "learning_rate": 9.069528032942667e-07, + "loss": 0.1721, + "step": 1984 + }, + { + "epoch": 0.3, + "learning_rate": 9.044187519797275e-07, + "loss": 0.1473, + "step": 2016 + }, + { + "epoch": 0.3, + "learning_rate": 9.018847006651884e-07, + "loss": 0.1865, + "step": 2048 + }, + { + "epoch": 0.31, + "learning_rate": 8.993506493506493e-07, + "loss": 0.1583, + "step": 2080 + }, + { + "epoch": 0.31, + "learning_rate": 8.968165980361102e-07, + "loss": 0.1866, + "step": 2112 + }, + { + "epoch": 0.32, + "learning_rate": 8.942825467215711e-07, + "loss": 0.1617, + "step": 2144 + }, + { + "epoch": 0.32, + "learning_rate": 8.917484954070319e-07, + "loss": 0.1189, + "step": 2176 + }, + { + "epoch": 0.33, + "learning_rate": 8.892144440924928e-07, + "loss": 0.148, + "step": 2208 + }, + { + "epoch": 0.33, + "learning_rate": 8.866803927779537e-07, + "loss": 0.131, + "step": 2240 + }, + { + "epoch": 0.34, + "learning_rate": 8.841463414634146e-07, + "loss": 0.2261, + "step": 2272 + }, + { + "epoch": 0.34, + "learning_rate": 8.816122901488755e-07, + "loss": 0.1742, + "step": 2304 + }, + { + "epoch": 0.35, + "learning_rate": 8.790782388343364e-07, + "loss": 0.164, + "step": 2336 + }, + { + "epoch": 0.35, + "learning_rate": 8.765441875197972e-07, + "loss": 0.1161, + "step": 2368 + }, + { + "epoch": 0.36, + "learning_rate": 8.74010136205258e-07, + "loss": 0.1636, + "step": 2400 + }, + { + "epoch": 0.36, + "learning_rate": 8.71476084890719e-07, + "loss": 0.2416, + "step": 2432 + }, + { + "epoch": 0.37, + "learning_rate": 8.689420335761799e-07, + "loss": 0.1632, + "step": 2464 + }, + { + "epoch": 0.37, + "learning_rate": 8.664079822616408e-07, + "loss": 0.1477, + "step": 2496 + }, + { + "epoch": 0.38, + "learning_rate": 8.638739309471016e-07, + "loss": 0.2083, + "step": 2528 + }, + { + "epoch": 0.38, + "learning_rate": 8.613398796325625e-07, + "loss": 0.1599, + "step": 2560 + }, + { + "epoch": 0.39, + "learning_rate": 8.588058283180234e-07, + "loss": 0.1817, + "step": 2592 + }, + { + "epoch": 0.39, + "learning_rate": 8.562717770034843e-07, + "loss": 0.1005, + "step": 2624 + }, + { + "epoch": 0.4, + "learning_rate": 8.537377256889452e-07, + "loss": 0.168, + "step": 2656 + }, + { + "epoch": 0.4, + "learning_rate": 8.51203674374406e-07, + "loss": 0.2418, + "step": 2688 + }, + { + "epoch": 0.4, + "learning_rate": 8.486696230598669e-07, + "loss": 0.1881, + "step": 2720 + }, + { + "epoch": 0.41, + "learning_rate": 8.461355717453278e-07, + "loss": 0.1829, + "step": 2752 + }, + { + "epoch": 0.41, + "learning_rate": 8.436015204307887e-07, + "loss": 0.1073, + "step": 2784 + }, + { + "epoch": 0.42, + "learning_rate": 8.410674691162496e-07, + "loss": 0.1324, + "step": 2816 + }, + { + "epoch": 0.42, + "learning_rate": 8.385334178017105e-07, + "loss": 0.2077, + "step": 2848 + }, + { + "epoch": 0.43, + "learning_rate": 8.359993664871713e-07, + "loss": 0.2248, + "step": 2880 + }, + { + "epoch": 0.43, + "learning_rate": 8.334653151726322e-07, + "loss": 0.1337, + "step": 2912 + }, + { + "epoch": 0.44, + "learning_rate": 8.30931263858093e-07, + "loss": 0.1906, + "step": 2944 + }, + { + "epoch": 0.44, + "learning_rate": 8.28397212543554e-07, + "loss": 0.1893, + "step": 2976 + }, + { + "epoch": 0.45, + "learning_rate": 8.259423503325942e-07, + "loss": 0.2029, + "step": 3008 + }, + { + "epoch": 0.45, + "learning_rate": 8.234082990180551e-07, + "loss": 0.157, + "step": 3040 + }, + { + "epoch": 0.46, + "learning_rate": 8.208742477035159e-07, + "loss": 0.1433, + "step": 3072 + }, + { + "epoch": 0.46, + "learning_rate": 8.183401963889769e-07, + "loss": 0.1689, + "step": 3104 + }, + { + "epoch": 0.47, + "learning_rate": 8.158061450744377e-07, + "loss": 0.2012, + "step": 3136 + }, + { + "epoch": 0.47, + "learning_rate": 8.132720937598986e-07, + "loss": 0.175, + "step": 3168 + }, + { + "epoch": 0.48, + "learning_rate": 8.107380424453595e-07, + "loss": 0.1961, + "step": 3200 + }, + { + "epoch": 0.48, + "learning_rate": 8.082039911308203e-07, + "loss": 0.2547, + "step": 3232 + }, + { + "epoch": 0.49, + "learning_rate": 8.056699398162813e-07, + "loss": 0.1935, + "step": 3264 + }, + { + "epoch": 0.49, + "learning_rate": 8.031358885017421e-07, + "loss": 0.2149, + "step": 3296 + }, + { + "epoch": 0.5, + "learning_rate": 8.00601837187203e-07, + "loss": 0.1809, + "step": 3328 + } + ], + "logging_steps": 32, + "max_steps": 13434, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 3358, + "total_flos": 1.42592165756928e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-10075/training_args.bin b/checkpoint-10075/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2c662ef5476ddd668bfd2aa78effb1b4e6129c1 --- /dev/null +++ b/checkpoint-10075/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e4db6f6fcaf2005bb3f583cfbd6e7afdd0dfdcdb7d4db7e107d5c22865fcf47 +size 5048 diff --git a/checkpoint-13433/config.json b/checkpoint-13433/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dbe4666ca61b271d1e7307d3cdd7e387a877b181 --- /dev/null +++ b/checkpoint-13433/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "models/openai/whisper-large-v2/finetune/jacob_filter/checkpoint-6717", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-13433/generation_config.json b/checkpoint-13433/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-13433/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-13433/model-00001-of-00002.safetensors b/checkpoint-13433/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b034101576c2ffb25a902d13ec2dacbea020d9a --- /dev/null +++ b/checkpoint-13433/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee34cf01664c4d09992b7898eb4d3550b0cf01fcc207114c2c92d32c1cfb3be5 +size 4992706480 diff --git a/checkpoint-13433/model-00002-of-00002.safetensors b/checkpoint-13433/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6290f678cee2c9ad385f41da6a6819c88f22eb23 --- /dev/null +++ b/checkpoint-13433/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:959fce97317119d41ad9ef50f8a03f01d8c15f04bc22a6389145eb90e67c0253 +size 1180663192 diff --git a/checkpoint-13433/model.safetensors.index.json b/checkpoint-13433/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-13433/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-13433/optimizer.pt b/checkpoint-13433/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f6fdd1cebd4a182df4b2f007c02e98e37f50e3b --- /dev/null +++ b/checkpoint-13433/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9acbb2427a1e0414f01f93b19473b6320113c10042867d2a0bc0334e09e6cb73 +size 3095074288 diff --git a/checkpoint-13433/preprocessor_config.json b/checkpoint-13433/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-13433/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-13433/rng_state.pth b/checkpoint-13433/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..36cfc1367b19ad5ee964f93e1b70954412df3823 --- /dev/null +++ b/checkpoint-13433/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e1cbb1a645a82041618b79e1039e0d9f582f753f274baed7d07ea4e6cedee30 +size 14244 diff --git a/checkpoint-13433/scheduler.pt b/checkpoint-13433/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e723bfe32b6a8906dfa2fd2bb78403ae6c6dc0f --- /dev/null +++ b/checkpoint-13433/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb000772249d98077622e512b0edbd5ca3709d74fd76af0bc1b708185f853f8f +size 1064 diff --git a/checkpoint-13433/trainer_state.json b/checkpoint-13433/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b36b5fd4f811da9cbb6e717941705df6860d5513 --- /dev/null +++ b/checkpoint-13433/trainer_state.json @@ -0,0 +1,1275 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9998511240136966, + "eval_steps": 500, + "global_step": 6716, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.722084367245657e-08, + "loss": 0.2836, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 7.692307692307692e-08, + "loss": 0.2111, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.1662531017369727e-07, + "loss": 0.2145, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 1.563275434243176e-07, + "loss": 0.2375, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 1.9602977667493795e-07, + "loss": 0.1839, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 2.3573200992555832e-07, + "loss": 0.2864, + "step": 192 + }, + { + "epoch": 0.03, + "learning_rate": 2.7543424317617863e-07, + "loss": 0.1799, + "step": 224 + }, + { + "epoch": 0.04, + "learning_rate": 3.1513647642679897e-07, + "loss": 0.1609, + "step": 256 + }, + { + "epoch": 0.04, + "learning_rate": 3.5483870967741936e-07, + "loss": 0.1434, + "step": 288 + }, + { + "epoch": 0.05, + "learning_rate": 3.945409429280397e-07, + "loss": 0.1699, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 4.3424317617866004e-07, + "loss": 0.1898, + "step": 352 + }, + { + "epoch": 0.06, + "learning_rate": 4.739454094292804e-07, + "loss": 0.1665, + "step": 384 + }, + { + "epoch": 0.06, + "learning_rate": 5.136476426799007e-07, + "loss": 0.1358, + "step": 416 + }, + { + "epoch": 0.07, + "learning_rate": 5.533498759305211e-07, + "loss": 0.1713, + "step": 448 + }, + { + "epoch": 0.07, + "learning_rate": 5.930521091811415e-07, + "loss": 0.1817, + "step": 480 + }, + { + "epoch": 0.08, + "learning_rate": 6.327543424317618e-07, + "loss": 0.149, + "step": 512 + }, + { + "epoch": 0.08, + "learning_rate": 6.724565756823821e-07, + "loss": 0.165, + "step": 544 + }, + { + "epoch": 0.09, + "learning_rate": 7.121588089330024e-07, + "loss": 0.2282, + "step": 576 + }, + { + "epoch": 0.09, + "learning_rate": 7.518610421836227e-07, + "loss": 0.1666, + "step": 608 + }, + { + "epoch": 0.1, + "learning_rate": 7.915632754342431e-07, + "loss": 0.1229, + "step": 640 + }, + { + "epoch": 0.1, + "learning_rate": 8.312655086848634e-07, + "loss": 0.2118, + "step": 672 + }, + { + "epoch": 0.1, + "learning_rate": 8.709677419354838e-07, + "loss": 0.1869, + "step": 704 + }, + { + "epoch": 0.11, + "learning_rate": 9.106699751861042e-07, + "loss": 0.2196, + "step": 736 + }, + { + "epoch": 0.11, + "learning_rate": 9.503722084367245e-07, + "loss": 0.0936, + "step": 768 + }, + { + "epoch": 0.12, + "learning_rate": 9.90074441687345e-07, + "loss": 0.1839, + "step": 800 + }, + { + "epoch": 0.12, + "learning_rate": 9.980994615140957e-07, + "loss": 0.1675, + "step": 832 + }, + { + "epoch": 0.13, + "learning_rate": 9.955654101995564e-07, + "loss": 0.1595, + "step": 864 + }, + { + "epoch": 0.13, + "learning_rate": 9.930313588850174e-07, + "loss": 0.1556, + "step": 896 + }, + { + "epoch": 0.14, + "learning_rate": 9.905764966740576e-07, + "loss": 0.1711, + "step": 928 + }, + { + "epoch": 0.14, + "learning_rate": 9.880424453595185e-07, + "loss": 0.1777, + "step": 960 + }, + { + "epoch": 0.15, + "learning_rate": 9.855083940449792e-07, + "loss": 0.2031, + "step": 992 + }, + { + "epoch": 0.15, + "learning_rate": 9.829743427304402e-07, + "loss": 0.1529, + "step": 1024 + }, + { + "epoch": 0.16, + "learning_rate": 9.804402914159011e-07, + "loss": 0.1365, + "step": 1056 + }, + { + "epoch": 0.16, + "learning_rate": 9.77906240101362e-07, + "loss": 0.1922, + "step": 1088 + }, + { + "epoch": 0.17, + "learning_rate": 9.75372188786823e-07, + "loss": 0.133, + "step": 1120 + }, + { + "epoch": 0.17, + "learning_rate": 9.728381374722837e-07, + "loss": 0.1692, + "step": 1152 + }, + { + "epoch": 0.18, + "learning_rate": 9.703040861577447e-07, + "loss": 0.1022, + "step": 1184 + }, + { + "epoch": 0.18, + "learning_rate": 9.677700348432054e-07, + "loss": 0.2052, + "step": 1216 + }, + { + "epoch": 0.19, + "learning_rate": 9.652359835286664e-07, + "loss": 0.1546, + "step": 1248 + }, + { + "epoch": 0.19, + "learning_rate": 9.627019322141273e-07, + "loss": 0.149, + "step": 1280 + }, + { + "epoch": 0.2, + "learning_rate": 9.601678808995883e-07, + "loss": 0.1281, + "step": 1312 + }, + { + "epoch": 0.2, + "learning_rate": 9.57633829585049e-07, + "loss": 0.1437, + "step": 1344 + }, + { + "epoch": 0.2, + "learning_rate": 9.5509977827051e-07, + "loss": 0.2097, + "step": 1376 + }, + { + "epoch": 0.21, + "learning_rate": 9.525657269559708e-07, + "loss": 0.1308, + "step": 1408 + }, + { + "epoch": 0.21, + "learning_rate": 9.500316756414317e-07, + "loss": 0.1691, + "step": 1440 + }, + { + "epoch": 0.22, + "learning_rate": 9.474976243268927e-07, + "loss": 0.2319, + "step": 1472 + }, + { + "epoch": 0.22, + "learning_rate": 9.449635730123534e-07, + "loss": 0.2226, + "step": 1504 + }, + { + "epoch": 0.23, + "learning_rate": 9.424295216978143e-07, + "loss": 0.1789, + "step": 1536 + }, + { + "epoch": 0.23, + "learning_rate": 9.398954703832752e-07, + "loss": 0.1932, + "step": 1568 + }, + { + "epoch": 0.24, + "learning_rate": 9.373614190687361e-07, + "loss": 0.1718, + "step": 1600 + }, + { + "epoch": 0.24, + "learning_rate": 9.34827367754197e-07, + "loss": 0.156, + "step": 1632 + }, + { + "epoch": 0.25, + "learning_rate": 9.322933164396578e-07, + "loss": 0.1512, + "step": 1664 + }, + { + "epoch": 0.25, + "learning_rate": 9.297592651251187e-07, + "loss": 0.0968, + "step": 1696 + }, + { + "epoch": 0.26, + "learning_rate": 9.272252138105796e-07, + "loss": 0.0932, + "step": 1728 + }, + { + "epoch": 0.26, + "learning_rate": 9.246911624960405e-07, + "loss": 0.2464, + "step": 1760 + }, + { + "epoch": 0.27, + "learning_rate": 9.221571111815014e-07, + "loss": 0.2036, + "step": 1792 + }, + { + "epoch": 0.27, + "learning_rate": 9.196230598669623e-07, + "loss": 0.1245, + "step": 1824 + }, + { + "epoch": 0.28, + "learning_rate": 9.170890085524232e-07, + "loss": 0.1097, + "step": 1856 + }, + { + "epoch": 0.28, + "learning_rate": 9.14554957237884e-07, + "loss": 0.1844, + "step": 1888 + }, + { + "epoch": 0.29, + "learning_rate": 9.120209059233449e-07, + "loss": 0.1114, + "step": 1920 + }, + { + "epoch": 0.29, + "learning_rate": 9.094868546088058e-07, + "loss": 0.1992, + "step": 1952 + }, + { + "epoch": 0.3, + "learning_rate": 9.069528032942667e-07, + "loss": 0.1721, + "step": 1984 + }, + { + "epoch": 0.3, + "learning_rate": 9.044187519797275e-07, + "loss": 0.1473, + "step": 2016 + }, + { + "epoch": 0.3, + "learning_rate": 9.018847006651884e-07, + "loss": 0.1865, + "step": 2048 + }, + { + "epoch": 0.31, + "learning_rate": 8.993506493506493e-07, + "loss": 0.1583, + "step": 2080 + }, + { + "epoch": 0.31, + "learning_rate": 8.968165980361102e-07, + "loss": 0.1866, + "step": 2112 + }, + { + "epoch": 0.32, + "learning_rate": 8.942825467215711e-07, + "loss": 0.1617, + "step": 2144 + }, + { + "epoch": 0.32, + "learning_rate": 8.917484954070319e-07, + "loss": 0.1189, + "step": 2176 + }, + { + "epoch": 0.33, + "learning_rate": 8.892144440924928e-07, + "loss": 0.148, + "step": 2208 + }, + { + "epoch": 0.33, + "learning_rate": 8.866803927779537e-07, + "loss": 0.131, + "step": 2240 + }, + { + "epoch": 0.34, + "learning_rate": 8.841463414634146e-07, + "loss": 0.2261, + "step": 2272 + }, + { + "epoch": 0.34, + "learning_rate": 8.816122901488755e-07, + "loss": 0.1742, + "step": 2304 + }, + { + "epoch": 0.35, + "learning_rate": 8.790782388343364e-07, + "loss": 0.164, + "step": 2336 + }, + { + "epoch": 0.35, + "learning_rate": 8.765441875197972e-07, + "loss": 0.1161, + "step": 2368 + }, + { + "epoch": 0.36, + "learning_rate": 8.74010136205258e-07, + "loss": 0.1636, + "step": 2400 + }, + { + "epoch": 0.36, + "learning_rate": 8.71476084890719e-07, + "loss": 0.2416, + "step": 2432 + }, + { + "epoch": 0.37, + "learning_rate": 8.689420335761799e-07, + "loss": 0.1632, + "step": 2464 + }, + { + "epoch": 0.37, + "learning_rate": 8.664079822616408e-07, + "loss": 0.1477, + "step": 2496 + }, + { + "epoch": 0.38, + "learning_rate": 8.638739309471016e-07, + "loss": 0.2083, + "step": 2528 + }, + { + "epoch": 0.38, + "learning_rate": 8.613398796325625e-07, + "loss": 0.1599, + "step": 2560 + }, + { + "epoch": 0.39, + "learning_rate": 8.588058283180234e-07, + "loss": 0.1817, + "step": 2592 + }, + { + "epoch": 0.39, + "learning_rate": 8.562717770034843e-07, + "loss": 0.1005, + "step": 2624 + }, + { + "epoch": 0.4, + "learning_rate": 8.537377256889452e-07, + "loss": 0.168, + "step": 2656 + }, + { + "epoch": 0.4, + "learning_rate": 8.51203674374406e-07, + "loss": 0.2418, + "step": 2688 + }, + { + "epoch": 0.4, + "learning_rate": 8.486696230598669e-07, + "loss": 0.1881, + "step": 2720 + }, + { + "epoch": 0.41, + "learning_rate": 8.461355717453278e-07, + "loss": 0.1829, + "step": 2752 + }, + { + "epoch": 0.41, + "learning_rate": 8.436015204307887e-07, + "loss": 0.1073, + "step": 2784 + }, + { + "epoch": 0.42, + "learning_rate": 8.410674691162496e-07, + "loss": 0.1324, + "step": 2816 + }, + { + "epoch": 0.42, + "learning_rate": 8.385334178017105e-07, + "loss": 0.2077, + "step": 2848 + }, + { + "epoch": 0.43, + "learning_rate": 8.359993664871713e-07, + "loss": 0.2248, + "step": 2880 + }, + { + "epoch": 0.43, + "learning_rate": 8.334653151726322e-07, + "loss": 0.1337, + "step": 2912 + }, + { + "epoch": 0.44, + "learning_rate": 8.30931263858093e-07, + "loss": 0.1906, + "step": 2944 + }, + { + "epoch": 0.44, + "learning_rate": 8.28397212543554e-07, + "loss": 0.1893, + "step": 2976 + }, + { + "epoch": 0.45, + "learning_rate": 8.259423503325942e-07, + "loss": 0.2029, + "step": 3008 + }, + { + "epoch": 0.45, + "learning_rate": 8.234082990180551e-07, + "loss": 0.157, + "step": 3040 + }, + { + "epoch": 0.46, + "learning_rate": 8.208742477035159e-07, + "loss": 0.1433, + "step": 3072 + }, + { + "epoch": 0.46, + "learning_rate": 8.183401963889769e-07, + "loss": 0.1689, + "step": 3104 + }, + { + "epoch": 0.47, + "learning_rate": 8.158061450744377e-07, + "loss": 0.2012, + "step": 3136 + }, + { + "epoch": 0.47, + "learning_rate": 8.132720937598986e-07, + "loss": 0.175, + "step": 3168 + }, + { + "epoch": 0.48, + "learning_rate": 8.107380424453595e-07, + "loss": 0.1961, + "step": 3200 + }, + { + "epoch": 0.48, + "learning_rate": 8.082039911308203e-07, + "loss": 0.2547, + "step": 3232 + }, + { + "epoch": 0.49, + "learning_rate": 8.056699398162813e-07, + "loss": 0.1935, + "step": 3264 + }, + { + "epoch": 0.49, + "learning_rate": 8.031358885017421e-07, + "loss": 0.2149, + "step": 3296 + }, + { + "epoch": 0.5, + "learning_rate": 8.00601837187203e-07, + "loss": 0.1809, + "step": 3328 + }, + { + "epoch": 0.5, + "learning_rate": 7.980677858726639e-07, + "loss": 0.2072, + "step": 3360 + }, + { + "epoch": 0.5, + "learning_rate": 7.955337345581247e-07, + "loss": 0.2116, + "step": 3392 + }, + { + "epoch": 0.51, + "learning_rate": 7.929996832435857e-07, + "loss": 0.1737, + "step": 3424 + }, + { + "epoch": 0.51, + "learning_rate": 7.904656319290464e-07, + "loss": 0.2219, + "step": 3456 + }, + { + "epoch": 0.52, + "learning_rate": 7.879315806145074e-07, + "loss": 0.1849, + "step": 3488 + }, + { + "epoch": 0.52, + "learning_rate": 7.853975292999683e-07, + "loss": 0.1884, + "step": 3520 + }, + { + "epoch": 0.53, + "learning_rate": 7.828634779854292e-07, + "loss": 0.2192, + "step": 3552 + }, + { + "epoch": 0.53, + "learning_rate": 7.803294266708901e-07, + "loss": 0.1958, + "step": 3584 + }, + { + "epoch": 0.54, + "learning_rate": 7.777953753563509e-07, + "loss": 0.1433, + "step": 3616 + }, + { + "epoch": 0.54, + "learning_rate": 7.752613240418118e-07, + "loss": 0.2151, + "step": 3648 + }, + { + "epoch": 0.55, + "learning_rate": 7.727272727272727e-07, + "loss": 0.1675, + "step": 3680 + }, + { + "epoch": 0.55, + "learning_rate": 7.701932214127336e-07, + "loss": 0.1586, + "step": 3712 + }, + { + "epoch": 0.56, + "learning_rate": 7.676591700981945e-07, + "loss": 0.2881, + "step": 3744 + }, + { + "epoch": 0.56, + "learning_rate": 7.651251187836553e-07, + "loss": 0.196, + "step": 3776 + }, + { + "epoch": 0.57, + "learning_rate": 7.625910674691162e-07, + "loss": 0.1285, + "step": 3808 + }, + { + "epoch": 0.57, + "learning_rate": 7.60057016154577e-07, + "loss": 0.2262, + "step": 3840 + }, + { + "epoch": 0.58, + "learning_rate": 7.57522964840038e-07, + "loss": 0.2309, + "step": 3872 + }, + { + "epoch": 0.58, + "learning_rate": 7.549889135254989e-07, + "loss": 0.1533, + "step": 3904 + }, + { + "epoch": 0.59, + "learning_rate": 7.524548622109597e-07, + "loss": 0.1297, + "step": 3936 + }, + { + "epoch": 0.59, + "learning_rate": 7.499208108964206e-07, + "loss": 0.1808, + "step": 3968 + }, + { + "epoch": 0.6, + "learning_rate": 7.473867595818814e-07, + "loss": 0.2401, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 7.448527082673424e-07, + "loss": 0.2507, + "step": 4032 + }, + { + "epoch": 0.61, + "learning_rate": 7.423186569528033e-07, + "loss": 0.1562, + "step": 4064 + }, + { + "epoch": 0.61, + "learning_rate": 7.397846056382642e-07, + "loss": 0.1912, + "step": 4096 + }, + { + "epoch": 0.61, + "learning_rate": 7.373297434273043e-07, + "loss": 0.1703, + "step": 4128 + }, + { + "epoch": 0.62, + "learning_rate": 7.347956921127653e-07, + "loss": 0.1471, + "step": 4160 + }, + { + "epoch": 0.62, + "learning_rate": 7.322616407982262e-07, + "loss": 0.1539, + "step": 4192 + }, + { + "epoch": 0.63, + "learning_rate": 7.297275894836869e-07, + "loss": 0.1521, + "step": 4224 + }, + { + "epoch": 0.63, + "learning_rate": 7.271935381691479e-07, + "loss": 0.2623, + "step": 4256 + }, + { + "epoch": 0.64, + "learning_rate": 7.246594868546087e-07, + "loss": 0.1753, + "step": 4288 + }, + { + "epoch": 0.64, + "learning_rate": 7.221254355400697e-07, + "loss": 0.1945, + "step": 4320 + }, + { + "epoch": 0.65, + "learning_rate": 7.195913842255306e-07, + "loss": 0.2153, + "step": 4352 + }, + { + "epoch": 0.65, + "learning_rate": 7.170573329109915e-07, + "loss": 0.2841, + "step": 4384 + }, + { + "epoch": 0.66, + "learning_rate": 7.145232815964523e-07, + "loss": 0.1759, + "step": 4416 + }, + { + "epoch": 0.66, + "learning_rate": 7.119892302819131e-07, + "loss": 0.2214, + "step": 4448 + }, + { + "epoch": 0.67, + "learning_rate": 7.094551789673741e-07, + "loss": 0.188, + "step": 4480 + }, + { + "epoch": 0.67, + "learning_rate": 7.069211276528349e-07, + "loss": 0.1579, + "step": 4512 + }, + { + "epoch": 0.68, + "learning_rate": 7.043870763382959e-07, + "loss": 0.2213, + "step": 4544 + }, + { + "epoch": 0.68, + "learning_rate": 7.018530250237567e-07, + "loss": 0.2042, + "step": 4576 + }, + { + "epoch": 0.69, + "learning_rate": 6.993189737092175e-07, + "loss": 0.1852, + "step": 4608 + }, + { + "epoch": 0.69, + "learning_rate": 6.967849223946785e-07, + "loss": 0.1716, + "step": 4640 + }, + { + "epoch": 0.7, + "learning_rate": 6.942508710801393e-07, + "loss": 0.1645, + "step": 4672 + }, + { + "epoch": 0.7, + "learning_rate": 6.917168197656003e-07, + "loss": 0.1986, + "step": 4704 + }, + { + "epoch": 0.71, + "learning_rate": 6.89182768451061e-07, + "loss": 0.2531, + "step": 4736 + }, + { + "epoch": 0.71, + "learning_rate": 6.866487171365219e-07, + "loss": 0.1792, + "step": 4768 + }, + { + "epoch": 0.71, + "learning_rate": 6.841146658219829e-07, + "loss": 0.1843, + "step": 4800 + }, + { + "epoch": 0.72, + "learning_rate": 6.815806145074437e-07, + "loss": 0.2175, + "step": 4832 + }, + { + "epoch": 0.72, + "learning_rate": 6.790465631929047e-07, + "loss": 0.2083, + "step": 4864 + }, + { + "epoch": 0.73, + "learning_rate": 6.765125118783655e-07, + "loss": 0.1729, + "step": 4896 + }, + { + "epoch": 0.73, + "learning_rate": 6.739784605638263e-07, + "loss": 0.1849, + "step": 4928 + }, + { + "epoch": 0.74, + "learning_rate": 6.714444092492873e-07, + "loss": 0.2374, + "step": 4960 + }, + { + "epoch": 0.74, + "learning_rate": 6.689103579347481e-07, + "loss": 0.241, + "step": 4992 + }, + { + "epoch": 0.75, + "learning_rate": 6.663763066202091e-07, + "loss": 0.1853, + "step": 5024 + }, + { + "epoch": 0.75, + "learning_rate": 6.638422553056699e-07, + "loss": 0.1957, + "step": 5056 + }, + { + "epoch": 0.76, + "learning_rate": 6.613082039911308e-07, + "loss": 0.2052, + "step": 5088 + }, + { + "epoch": 0.76, + "learning_rate": 6.587741526765917e-07, + "loss": 0.2321, + "step": 5120 + }, + { + "epoch": 0.77, + "learning_rate": 6.562401013620525e-07, + "loss": 0.1804, + "step": 5152 + }, + { + "epoch": 0.77, + "learning_rate": 6.537060500475135e-07, + "loss": 0.1842, + "step": 5184 + }, + { + "epoch": 0.78, + "learning_rate": 6.511719987329743e-07, + "loss": 0.2388, + "step": 5216 + }, + { + "epoch": 0.78, + "learning_rate": 6.486379474184352e-07, + "loss": 0.2417, + "step": 5248 + }, + { + "epoch": 0.79, + "learning_rate": 6.46103896103896e-07, + "loss": 0.2224, + "step": 5280 + }, + { + "epoch": 0.79, + "learning_rate": 6.435698447893569e-07, + "loss": 0.2029, + "step": 5312 + }, + { + "epoch": 0.8, + "learning_rate": 6.410357934748179e-07, + "loss": 0.2807, + "step": 5344 + }, + { + "epoch": 0.8, + "learning_rate": 6.385017421602787e-07, + "loss": 0.192, + "step": 5376 + }, + { + "epoch": 0.81, + "learning_rate": 6.359676908457397e-07, + "loss": 0.1848, + "step": 5408 + }, + { + "epoch": 0.81, + "learning_rate": 6.334336395312004e-07, + "loss": 0.2143, + "step": 5440 + }, + { + "epoch": 0.81, + "learning_rate": 6.308995882166613e-07, + "loss": 0.2421, + "step": 5472 + }, + { + "epoch": 0.82, + "learning_rate": 6.283655369021223e-07, + "loss": 0.1724, + "step": 5504 + }, + { + "epoch": 0.82, + "learning_rate": 6.258314855875831e-07, + "loss": 0.1207, + "step": 5536 + }, + { + "epoch": 0.83, + "learning_rate": 6.232974342730441e-07, + "loss": 0.2259, + "step": 5568 + }, + { + "epoch": 0.83, + "learning_rate": 6.207633829585048e-07, + "loss": 0.2504, + "step": 5600 + }, + { + "epoch": 0.84, + "learning_rate": 6.182293316439658e-07, + "loss": 0.188, + "step": 5632 + }, + { + "epoch": 0.84, + "learning_rate": 6.156952803294266e-07, + "loss": 0.1893, + "step": 5664 + }, + { + "epoch": 0.85, + "learning_rate": 6.131612290148875e-07, + "loss": 0.1905, + "step": 5696 + }, + { + "epoch": 0.85, + "learning_rate": 6.106271777003485e-07, + "loss": 0.2594, + "step": 5728 + }, + { + "epoch": 0.86, + "learning_rate": 6.080931263858092e-07, + "loss": 0.3084, + "step": 5760 + }, + { + "epoch": 0.86, + "learning_rate": 6.055590750712702e-07, + "loss": 0.1925, + "step": 5792 + }, + { + "epoch": 0.87, + "learning_rate": 6.03025023756731e-07, + "loss": 0.186, + "step": 5824 + }, + { + "epoch": 0.87, + "learning_rate": 6.004909724421919e-07, + "loss": 0.2302, + "step": 5856 + }, + { + "epoch": 0.88, + "learning_rate": 5.979569211276529e-07, + "loss": 0.1371, + "step": 5888 + }, + { + "epoch": 0.88, + "learning_rate": 5.954228698131137e-07, + "loss": 0.231, + "step": 5920 + }, + { + "epoch": 0.89, + "learning_rate": 5.928888184985746e-07, + "loss": 0.2012, + "step": 5952 + }, + { + "epoch": 0.89, + "learning_rate": 5.903547671840354e-07, + "loss": 0.2006, + "step": 5984 + }, + { + "epoch": 0.9, + "learning_rate": 5.878207158694963e-07, + "loss": 0.215, + "step": 6016 + }, + { + "epoch": 0.9, + "learning_rate": 5.852866645549572e-07, + "loss": 0.1471, + "step": 6048 + }, + { + "epoch": 0.91, + "learning_rate": 5.827526132404181e-07, + "loss": 0.2364, + "step": 6080 + }, + { + "epoch": 0.91, + "learning_rate": 5.80218561925879e-07, + "loss": 0.2881, + "step": 6112 + }, + { + "epoch": 0.91, + "learning_rate": 5.776845106113398e-07, + "loss": 0.1536, + "step": 6144 + }, + { + "epoch": 0.92, + "learning_rate": 5.751504592968008e-07, + "loss": 0.2317, + "step": 6176 + }, + { + "epoch": 0.92, + "learning_rate": 5.726164079822616e-07, + "loss": 0.1952, + "step": 6208 + }, + { + "epoch": 0.93, + "learning_rate": 5.700823566677225e-07, + "loss": 0.1602, + "step": 6240 + }, + { + "epoch": 0.93, + "learning_rate": 5.675483053531834e-07, + "loss": 0.212, + "step": 6272 + }, + { + "epoch": 0.94, + "learning_rate": 5.650142540386442e-07, + "loss": 0.2401, + "step": 6304 + }, + { + "epoch": 0.94, + "learning_rate": 5.624802027241052e-07, + "loss": 0.1992, + "step": 6336 + }, + { + "epoch": 0.95, + "learning_rate": 5.59946151409566e-07, + "loss": 0.2616, + "step": 6368 + }, + { + "epoch": 0.95, + "learning_rate": 5.574121000950269e-07, + "loss": 0.146, + "step": 6400 + }, + { + "epoch": 0.96, + "learning_rate": 5.548780487804878e-07, + "loss": 0.2081, + "step": 6432 + }, + { + "epoch": 0.96, + "learning_rate": 5.523439974659486e-07, + "loss": 0.207, + "step": 6464 + }, + { + "epoch": 0.97, + "learning_rate": 5.498099461514096e-07, + "loss": 0.2631, + "step": 6496 + }, + { + "epoch": 0.97, + "learning_rate": 5.472758948368704e-07, + "loss": 0.1721, + "step": 6528 + }, + { + "epoch": 0.98, + "learning_rate": 5.447418435223313e-07, + "loss": 0.1908, + "step": 6560 + }, + { + "epoch": 0.98, + "learning_rate": 5.422869813113715e-07, + "loss": 0.2238, + "step": 6592 + }, + { + "epoch": 0.99, + "learning_rate": 5.397529299968325e-07, + "loss": 0.2524, + "step": 6624 + }, + { + "epoch": 0.99, + "learning_rate": 5.372188786822933e-07, + "loss": 0.1968, + "step": 6656 + }, + { + "epoch": 1.0, + "learning_rate": 5.346848273677542e-07, + "loss": 0.2379, + "step": 6688 + } + ], + "logging_steps": 32, + "max_steps": 13434, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 3358, + "total_flos": 2.85184331513856e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-13433/training_args.bin b/checkpoint-13433/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2c662ef5476ddd668bfd2aa78effb1b4e6129c1 --- /dev/null +++ b/checkpoint-13433/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e4db6f6fcaf2005bb3f583cfbd6e7afdd0dfdcdb7d4db7e107d5c22865fcf47 +size 5048 diff --git a/checkpoint-16791/config.json b/checkpoint-16791/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dbe4666ca61b271d1e7307d3cdd7e387a877b181 --- /dev/null +++ b/checkpoint-16791/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "models/openai/whisper-large-v2/finetune/jacob_filter/checkpoint-6717", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-16791/generation_config.json b/checkpoint-16791/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-16791/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-16791/model-00001-of-00002.safetensors b/checkpoint-16791/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..faa097b1bf4f6f53f2dded297f9945f018383d0f --- /dev/null +++ b/checkpoint-16791/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:095b5daf6da9dafe086366063c7aa0b1a9ed8311d1231882f77ca62bfd622ff7 +size 4992706480 diff --git a/checkpoint-16791/model-00002-of-00002.safetensors b/checkpoint-16791/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0110be3868156b58e22ed9787c9d06fb88c84341 --- /dev/null +++ b/checkpoint-16791/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b9fb976d7d42bc07f0ba0dbacffaa0061ac0f0625724aba517b14008174711 +size 1180663192 diff --git a/checkpoint-16791/model.safetensors.index.json b/checkpoint-16791/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-16791/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-16791/optimizer.pt b/checkpoint-16791/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2ab587d7b6c21e204ee8ce79ea895dda584c40a --- /dev/null +++ b/checkpoint-16791/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a436d2addaadda0fea3b2c8395715e532fa1206ea3a07af9201e12fb839ab6 +size 3095074288 diff --git a/checkpoint-16791/preprocessor_config.json b/checkpoint-16791/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-16791/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-16791/rng_state.pth b/checkpoint-16791/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9a365a21fd1f8c53aabd1f1827a1611834a3f7b1 --- /dev/null +++ b/checkpoint-16791/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d6627426ed3268f8693989ae5aaa270ce7dc271867c833e122fa4a892074de +size 14244 diff --git a/checkpoint-16791/scheduler.pt b/checkpoint-16791/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a6c7941c2618122fee32d90f42a3e839fe3abf7 --- /dev/null +++ b/checkpoint-16791/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86d89410b53de562dce1ae7961ba3c63eca690064d505613a551336065d4913e +size 1064 diff --git a/checkpoint-16791/trainer_state.json b/checkpoint-16791/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3aa8ecf2ea6d856d33409fa6c4900852ecffeb7d --- /dev/null +++ b/checkpoint-16791/trainer_state.json @@ -0,0 +1,1905 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.4997766860205448, + "eval_steps": 500, + "global_step": 10074, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.722084367245657e-08, + "loss": 0.2836, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 7.692307692307692e-08, + "loss": 0.2111, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.1662531017369727e-07, + "loss": 0.2145, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 1.563275434243176e-07, + "loss": 0.2375, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 1.9602977667493795e-07, + "loss": 0.1839, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 2.3573200992555832e-07, + "loss": 0.2864, + "step": 192 + }, + { + "epoch": 0.03, + "learning_rate": 2.7543424317617863e-07, + "loss": 0.1799, + "step": 224 + }, + { + "epoch": 0.04, + "learning_rate": 3.1513647642679897e-07, + "loss": 0.1609, + "step": 256 + }, + { + "epoch": 0.04, + "learning_rate": 3.5483870967741936e-07, + "loss": 0.1434, + "step": 288 + }, + { + "epoch": 0.05, + "learning_rate": 3.945409429280397e-07, + "loss": 0.1699, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 4.3424317617866004e-07, + "loss": 0.1898, + "step": 352 + }, + { + "epoch": 0.06, + "learning_rate": 4.739454094292804e-07, + "loss": 0.1665, + "step": 384 + }, + { + "epoch": 0.06, + "learning_rate": 5.136476426799007e-07, + "loss": 0.1358, + "step": 416 + }, + { + "epoch": 0.07, + "learning_rate": 5.533498759305211e-07, + "loss": 0.1713, + "step": 448 + }, + { + "epoch": 0.07, + "learning_rate": 5.930521091811415e-07, + "loss": 0.1817, + "step": 480 + }, + { + "epoch": 0.08, + "learning_rate": 6.327543424317618e-07, + "loss": 0.149, + "step": 512 + }, + { + "epoch": 0.08, + "learning_rate": 6.724565756823821e-07, + "loss": 0.165, + "step": 544 + }, + { + "epoch": 0.09, + "learning_rate": 7.121588089330024e-07, + "loss": 0.2282, + "step": 576 + }, + { + "epoch": 0.09, + "learning_rate": 7.518610421836227e-07, + "loss": 0.1666, + "step": 608 + }, + { + "epoch": 0.1, + "learning_rate": 7.915632754342431e-07, + "loss": 0.1229, + "step": 640 + }, + { + "epoch": 0.1, + "learning_rate": 8.312655086848634e-07, + "loss": 0.2118, + "step": 672 + }, + { + "epoch": 0.1, + "learning_rate": 8.709677419354838e-07, + "loss": 0.1869, + "step": 704 + }, + { + "epoch": 0.11, + "learning_rate": 9.106699751861042e-07, + "loss": 0.2196, + "step": 736 + }, + { + "epoch": 0.11, + "learning_rate": 9.503722084367245e-07, + "loss": 0.0936, + "step": 768 + }, + { + "epoch": 0.12, + "learning_rate": 9.90074441687345e-07, + "loss": 0.1839, + "step": 800 + }, + { + "epoch": 0.12, + "learning_rate": 9.980994615140957e-07, + "loss": 0.1675, + "step": 832 + }, + { + "epoch": 0.13, + "learning_rate": 9.955654101995564e-07, + "loss": 0.1595, + "step": 864 + }, + { + "epoch": 0.13, + "learning_rate": 9.930313588850174e-07, + "loss": 0.1556, + "step": 896 + }, + { + "epoch": 0.14, + "learning_rate": 9.905764966740576e-07, + "loss": 0.1711, + "step": 928 + }, + { + "epoch": 0.14, + "learning_rate": 9.880424453595185e-07, + "loss": 0.1777, + "step": 960 + }, + { + "epoch": 0.15, + "learning_rate": 9.855083940449792e-07, + "loss": 0.2031, + "step": 992 + }, + { + "epoch": 0.15, + "learning_rate": 9.829743427304402e-07, + "loss": 0.1529, + "step": 1024 + }, + { + "epoch": 0.16, + "learning_rate": 9.804402914159011e-07, + "loss": 0.1365, + "step": 1056 + }, + { + "epoch": 0.16, + "learning_rate": 9.77906240101362e-07, + "loss": 0.1922, + "step": 1088 + }, + { + "epoch": 0.17, + "learning_rate": 9.75372188786823e-07, + "loss": 0.133, + "step": 1120 + }, + { + "epoch": 0.17, + "learning_rate": 9.728381374722837e-07, + "loss": 0.1692, + "step": 1152 + }, + { + "epoch": 0.18, + "learning_rate": 9.703040861577447e-07, + "loss": 0.1022, + "step": 1184 + }, + { + "epoch": 0.18, + "learning_rate": 9.677700348432054e-07, + "loss": 0.2052, + "step": 1216 + }, + { + "epoch": 0.19, + "learning_rate": 9.652359835286664e-07, + "loss": 0.1546, + "step": 1248 + }, + { + "epoch": 0.19, + "learning_rate": 9.627019322141273e-07, + "loss": 0.149, + "step": 1280 + }, + { + "epoch": 0.2, + "learning_rate": 9.601678808995883e-07, + "loss": 0.1281, + "step": 1312 + }, + { + "epoch": 0.2, + "learning_rate": 9.57633829585049e-07, + "loss": 0.1437, + "step": 1344 + }, + { + "epoch": 0.2, + "learning_rate": 9.5509977827051e-07, + "loss": 0.2097, + "step": 1376 + }, + { + "epoch": 0.21, + "learning_rate": 9.525657269559708e-07, + "loss": 0.1308, + "step": 1408 + }, + { + "epoch": 0.21, + "learning_rate": 9.500316756414317e-07, + "loss": 0.1691, + "step": 1440 + }, + { + "epoch": 0.22, + "learning_rate": 9.474976243268927e-07, + "loss": 0.2319, + "step": 1472 + }, + { + "epoch": 0.22, + "learning_rate": 9.449635730123534e-07, + "loss": 0.2226, + "step": 1504 + }, + { + "epoch": 0.23, + "learning_rate": 9.424295216978143e-07, + "loss": 0.1789, + "step": 1536 + }, + { + "epoch": 0.23, + "learning_rate": 9.398954703832752e-07, + "loss": 0.1932, + "step": 1568 + }, + { + "epoch": 0.24, + "learning_rate": 9.373614190687361e-07, + "loss": 0.1718, + "step": 1600 + }, + { + "epoch": 0.24, + "learning_rate": 9.34827367754197e-07, + "loss": 0.156, + "step": 1632 + }, + { + "epoch": 0.25, + "learning_rate": 9.322933164396578e-07, + "loss": 0.1512, + "step": 1664 + }, + { + "epoch": 0.25, + "learning_rate": 9.297592651251187e-07, + "loss": 0.0968, + "step": 1696 + }, + { + "epoch": 0.26, + "learning_rate": 9.272252138105796e-07, + "loss": 0.0932, + "step": 1728 + }, + { + "epoch": 0.26, + "learning_rate": 9.246911624960405e-07, + "loss": 0.2464, + "step": 1760 + }, + { + "epoch": 0.27, + "learning_rate": 9.221571111815014e-07, + "loss": 0.2036, + "step": 1792 + }, + { + "epoch": 0.27, + "learning_rate": 9.196230598669623e-07, + "loss": 0.1245, + "step": 1824 + }, + { + "epoch": 0.28, + "learning_rate": 9.170890085524232e-07, + "loss": 0.1097, + "step": 1856 + }, + { + "epoch": 0.28, + "learning_rate": 9.14554957237884e-07, + "loss": 0.1844, + "step": 1888 + }, + { + "epoch": 0.29, + "learning_rate": 9.120209059233449e-07, + "loss": 0.1114, + "step": 1920 + }, + { + "epoch": 0.29, + "learning_rate": 9.094868546088058e-07, + "loss": 0.1992, + "step": 1952 + }, + { + "epoch": 0.3, + "learning_rate": 9.069528032942667e-07, + "loss": 0.1721, + "step": 1984 + }, + { + "epoch": 0.3, + "learning_rate": 9.044187519797275e-07, + "loss": 0.1473, + "step": 2016 + }, + { + "epoch": 0.3, + "learning_rate": 9.018847006651884e-07, + "loss": 0.1865, + "step": 2048 + }, + { + "epoch": 0.31, + "learning_rate": 8.993506493506493e-07, + "loss": 0.1583, + "step": 2080 + }, + { + "epoch": 0.31, + "learning_rate": 8.968165980361102e-07, + "loss": 0.1866, + "step": 2112 + }, + { + "epoch": 0.32, + "learning_rate": 8.942825467215711e-07, + "loss": 0.1617, + "step": 2144 + }, + { + "epoch": 0.32, + "learning_rate": 8.917484954070319e-07, + "loss": 0.1189, + "step": 2176 + }, + { + "epoch": 0.33, + "learning_rate": 8.892144440924928e-07, + "loss": 0.148, + "step": 2208 + }, + { + "epoch": 0.33, + "learning_rate": 8.866803927779537e-07, + "loss": 0.131, + "step": 2240 + }, + { + "epoch": 0.34, + "learning_rate": 8.841463414634146e-07, + "loss": 0.2261, + "step": 2272 + }, + { + "epoch": 0.34, + "learning_rate": 8.816122901488755e-07, + "loss": 0.1742, + "step": 2304 + }, + { + "epoch": 0.35, + "learning_rate": 8.790782388343364e-07, + "loss": 0.164, + "step": 2336 + }, + { + "epoch": 0.35, + "learning_rate": 8.765441875197972e-07, + "loss": 0.1161, + "step": 2368 + }, + { + "epoch": 0.36, + "learning_rate": 8.74010136205258e-07, + "loss": 0.1636, + "step": 2400 + }, + { + "epoch": 0.36, + "learning_rate": 8.71476084890719e-07, + "loss": 0.2416, + "step": 2432 + }, + { + "epoch": 0.37, + "learning_rate": 8.689420335761799e-07, + "loss": 0.1632, + "step": 2464 + }, + { + "epoch": 0.37, + "learning_rate": 8.664079822616408e-07, + "loss": 0.1477, + "step": 2496 + }, + { + "epoch": 0.38, + "learning_rate": 8.638739309471016e-07, + "loss": 0.2083, + "step": 2528 + }, + { + "epoch": 0.38, + "learning_rate": 8.613398796325625e-07, + "loss": 0.1599, + "step": 2560 + }, + { + "epoch": 0.39, + "learning_rate": 8.588058283180234e-07, + "loss": 0.1817, + "step": 2592 + }, + { + "epoch": 0.39, + "learning_rate": 8.562717770034843e-07, + "loss": 0.1005, + "step": 2624 + }, + { + "epoch": 0.4, + "learning_rate": 8.537377256889452e-07, + "loss": 0.168, + "step": 2656 + }, + { + "epoch": 0.4, + "learning_rate": 8.51203674374406e-07, + "loss": 0.2418, + "step": 2688 + }, + { + "epoch": 0.4, + "learning_rate": 8.486696230598669e-07, + "loss": 0.1881, + "step": 2720 + }, + { + "epoch": 0.41, + "learning_rate": 8.461355717453278e-07, + "loss": 0.1829, + "step": 2752 + }, + { + "epoch": 0.41, + "learning_rate": 8.436015204307887e-07, + "loss": 0.1073, + "step": 2784 + }, + { + "epoch": 0.42, + "learning_rate": 8.410674691162496e-07, + "loss": 0.1324, + "step": 2816 + }, + { + "epoch": 0.42, + "learning_rate": 8.385334178017105e-07, + "loss": 0.2077, + "step": 2848 + }, + { + "epoch": 0.43, + "learning_rate": 8.359993664871713e-07, + "loss": 0.2248, + "step": 2880 + }, + { + "epoch": 0.43, + "learning_rate": 8.334653151726322e-07, + "loss": 0.1337, + "step": 2912 + }, + { + "epoch": 0.44, + "learning_rate": 8.30931263858093e-07, + "loss": 0.1906, + "step": 2944 + }, + { + "epoch": 0.44, + "learning_rate": 8.28397212543554e-07, + "loss": 0.1893, + "step": 2976 + }, + { + "epoch": 0.45, + "learning_rate": 8.259423503325942e-07, + "loss": 0.2029, + "step": 3008 + }, + { + "epoch": 0.45, + "learning_rate": 8.234082990180551e-07, + "loss": 0.157, + "step": 3040 + }, + { + "epoch": 0.46, + "learning_rate": 8.208742477035159e-07, + "loss": 0.1433, + "step": 3072 + }, + { + "epoch": 0.46, + "learning_rate": 8.183401963889769e-07, + "loss": 0.1689, + "step": 3104 + }, + { + "epoch": 0.47, + "learning_rate": 8.158061450744377e-07, + "loss": 0.2012, + "step": 3136 + }, + { + "epoch": 0.47, + "learning_rate": 8.132720937598986e-07, + "loss": 0.175, + "step": 3168 + }, + { + "epoch": 0.48, + "learning_rate": 8.107380424453595e-07, + "loss": 0.1961, + "step": 3200 + }, + { + "epoch": 0.48, + "learning_rate": 8.082039911308203e-07, + "loss": 0.2547, + "step": 3232 + }, + { + "epoch": 0.49, + "learning_rate": 8.056699398162813e-07, + "loss": 0.1935, + "step": 3264 + }, + { + "epoch": 0.49, + "learning_rate": 8.031358885017421e-07, + "loss": 0.2149, + "step": 3296 + }, + { + "epoch": 0.5, + "learning_rate": 8.00601837187203e-07, + "loss": 0.1809, + "step": 3328 + }, + { + "epoch": 0.5, + "learning_rate": 7.980677858726639e-07, + "loss": 0.2072, + "step": 3360 + }, + { + "epoch": 0.5, + "learning_rate": 7.955337345581247e-07, + "loss": 0.2116, + "step": 3392 + }, + { + "epoch": 0.51, + "learning_rate": 7.929996832435857e-07, + "loss": 0.1737, + "step": 3424 + }, + { + "epoch": 0.51, + "learning_rate": 7.904656319290464e-07, + "loss": 0.2219, + "step": 3456 + }, + { + "epoch": 0.52, + "learning_rate": 7.879315806145074e-07, + "loss": 0.1849, + "step": 3488 + }, + { + "epoch": 0.52, + "learning_rate": 7.853975292999683e-07, + "loss": 0.1884, + "step": 3520 + }, + { + "epoch": 0.53, + "learning_rate": 7.828634779854292e-07, + "loss": 0.2192, + "step": 3552 + }, + { + "epoch": 0.53, + "learning_rate": 7.803294266708901e-07, + "loss": 0.1958, + "step": 3584 + }, + { + "epoch": 0.54, + "learning_rate": 7.777953753563509e-07, + "loss": 0.1433, + "step": 3616 + }, + { + "epoch": 0.54, + "learning_rate": 7.752613240418118e-07, + "loss": 0.2151, + "step": 3648 + }, + { + "epoch": 0.55, + "learning_rate": 7.727272727272727e-07, + "loss": 0.1675, + "step": 3680 + }, + { + "epoch": 0.55, + "learning_rate": 7.701932214127336e-07, + "loss": 0.1586, + "step": 3712 + }, + { + "epoch": 0.56, + "learning_rate": 7.676591700981945e-07, + "loss": 0.2881, + "step": 3744 + }, + { + "epoch": 0.56, + "learning_rate": 7.651251187836553e-07, + "loss": 0.196, + "step": 3776 + }, + { + "epoch": 0.57, + "learning_rate": 7.625910674691162e-07, + "loss": 0.1285, + "step": 3808 + }, + { + "epoch": 0.57, + "learning_rate": 7.60057016154577e-07, + "loss": 0.2262, + "step": 3840 + }, + { + "epoch": 0.58, + "learning_rate": 7.57522964840038e-07, + "loss": 0.2309, + "step": 3872 + }, + { + "epoch": 0.58, + "learning_rate": 7.549889135254989e-07, + "loss": 0.1533, + "step": 3904 + }, + { + "epoch": 0.59, + "learning_rate": 7.524548622109597e-07, + "loss": 0.1297, + "step": 3936 + }, + { + "epoch": 0.59, + "learning_rate": 7.499208108964206e-07, + "loss": 0.1808, + "step": 3968 + }, + { + "epoch": 0.6, + "learning_rate": 7.473867595818814e-07, + "loss": 0.2401, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 7.448527082673424e-07, + "loss": 0.2507, + "step": 4032 + }, + { + "epoch": 0.61, + "learning_rate": 7.423186569528033e-07, + "loss": 0.1562, + "step": 4064 + }, + { + "epoch": 0.61, + "learning_rate": 7.397846056382642e-07, + "loss": 0.1912, + "step": 4096 + }, + { + "epoch": 0.61, + "learning_rate": 7.373297434273043e-07, + "loss": 0.1703, + "step": 4128 + }, + { + "epoch": 0.62, + "learning_rate": 7.347956921127653e-07, + "loss": 0.1471, + "step": 4160 + }, + { + "epoch": 0.62, + "learning_rate": 7.322616407982262e-07, + "loss": 0.1539, + "step": 4192 + }, + { + "epoch": 0.63, + "learning_rate": 7.297275894836869e-07, + "loss": 0.1521, + "step": 4224 + }, + { + "epoch": 0.63, + "learning_rate": 7.271935381691479e-07, + "loss": 0.2623, + "step": 4256 + }, + { + "epoch": 0.64, + "learning_rate": 7.246594868546087e-07, + "loss": 0.1753, + "step": 4288 + }, + { + "epoch": 0.64, + "learning_rate": 7.221254355400697e-07, + "loss": 0.1945, + "step": 4320 + }, + { + "epoch": 0.65, + "learning_rate": 7.195913842255306e-07, + "loss": 0.2153, + "step": 4352 + }, + { + "epoch": 0.65, + "learning_rate": 7.170573329109915e-07, + "loss": 0.2841, + "step": 4384 + }, + { + "epoch": 0.66, + "learning_rate": 7.145232815964523e-07, + "loss": 0.1759, + "step": 4416 + }, + { + "epoch": 0.66, + "learning_rate": 7.119892302819131e-07, + "loss": 0.2214, + "step": 4448 + }, + { + "epoch": 0.67, + "learning_rate": 7.094551789673741e-07, + "loss": 0.188, + "step": 4480 + }, + { + "epoch": 0.67, + "learning_rate": 7.069211276528349e-07, + "loss": 0.1579, + "step": 4512 + }, + { + "epoch": 0.68, + "learning_rate": 7.043870763382959e-07, + "loss": 0.2213, + "step": 4544 + }, + { + "epoch": 0.68, + "learning_rate": 7.018530250237567e-07, + "loss": 0.2042, + "step": 4576 + }, + { + "epoch": 0.69, + "learning_rate": 6.993189737092175e-07, + "loss": 0.1852, + "step": 4608 + }, + { + "epoch": 0.69, + "learning_rate": 6.967849223946785e-07, + "loss": 0.1716, + "step": 4640 + }, + { + "epoch": 0.7, + "learning_rate": 6.942508710801393e-07, + "loss": 0.1645, + "step": 4672 + }, + { + "epoch": 0.7, + "learning_rate": 6.917168197656003e-07, + "loss": 0.1986, + "step": 4704 + }, + { + "epoch": 0.71, + "learning_rate": 6.89182768451061e-07, + "loss": 0.2531, + "step": 4736 + }, + { + "epoch": 0.71, + "learning_rate": 6.866487171365219e-07, + "loss": 0.1792, + "step": 4768 + }, + { + "epoch": 0.71, + "learning_rate": 6.841146658219829e-07, + "loss": 0.1843, + "step": 4800 + }, + { + "epoch": 0.72, + "learning_rate": 6.815806145074437e-07, + "loss": 0.2175, + "step": 4832 + }, + { + "epoch": 0.72, + "learning_rate": 6.790465631929047e-07, + "loss": 0.2083, + "step": 4864 + }, + { + "epoch": 0.73, + "learning_rate": 6.765125118783655e-07, + "loss": 0.1729, + "step": 4896 + }, + { + "epoch": 0.73, + "learning_rate": 6.739784605638263e-07, + "loss": 0.1849, + "step": 4928 + }, + { + "epoch": 0.74, + "learning_rate": 6.714444092492873e-07, + "loss": 0.2374, + "step": 4960 + }, + { + "epoch": 0.74, + "learning_rate": 6.689103579347481e-07, + "loss": 0.241, + "step": 4992 + }, + { + "epoch": 0.75, + "learning_rate": 6.663763066202091e-07, + "loss": 0.1853, + "step": 5024 + }, + { + "epoch": 0.75, + "learning_rate": 6.638422553056699e-07, + "loss": 0.1957, + "step": 5056 + }, + { + "epoch": 0.76, + "learning_rate": 6.613082039911308e-07, + "loss": 0.2052, + "step": 5088 + }, + { + "epoch": 0.76, + "learning_rate": 6.587741526765917e-07, + "loss": 0.2321, + "step": 5120 + }, + { + "epoch": 0.77, + "learning_rate": 6.562401013620525e-07, + "loss": 0.1804, + "step": 5152 + }, + { + "epoch": 0.77, + "learning_rate": 6.537060500475135e-07, + "loss": 0.1842, + "step": 5184 + }, + { + "epoch": 0.78, + "learning_rate": 6.511719987329743e-07, + "loss": 0.2388, + "step": 5216 + }, + { + "epoch": 0.78, + "learning_rate": 6.486379474184352e-07, + "loss": 0.2417, + "step": 5248 + }, + { + "epoch": 0.79, + "learning_rate": 6.46103896103896e-07, + "loss": 0.2224, + "step": 5280 + }, + { + "epoch": 0.79, + "learning_rate": 6.435698447893569e-07, + "loss": 0.2029, + "step": 5312 + }, + { + "epoch": 0.8, + "learning_rate": 6.410357934748179e-07, + "loss": 0.2807, + "step": 5344 + }, + { + "epoch": 0.8, + "learning_rate": 6.385017421602787e-07, + "loss": 0.192, + "step": 5376 + }, + { + "epoch": 0.81, + "learning_rate": 6.359676908457397e-07, + "loss": 0.1848, + "step": 5408 + }, + { + "epoch": 0.81, + "learning_rate": 6.334336395312004e-07, + "loss": 0.2143, + "step": 5440 + }, + { + "epoch": 0.81, + "learning_rate": 6.308995882166613e-07, + "loss": 0.2421, + "step": 5472 + }, + { + "epoch": 0.82, + "learning_rate": 6.283655369021223e-07, + "loss": 0.1724, + "step": 5504 + }, + { + "epoch": 0.82, + "learning_rate": 6.258314855875831e-07, + "loss": 0.1207, + "step": 5536 + }, + { + "epoch": 0.83, + "learning_rate": 6.232974342730441e-07, + "loss": 0.2259, + "step": 5568 + }, + { + "epoch": 0.83, + "learning_rate": 6.207633829585048e-07, + "loss": 0.2504, + "step": 5600 + }, + { + "epoch": 0.84, + "learning_rate": 6.182293316439658e-07, + "loss": 0.188, + "step": 5632 + }, + { + "epoch": 0.84, + "learning_rate": 6.156952803294266e-07, + "loss": 0.1893, + "step": 5664 + }, + { + "epoch": 0.85, + "learning_rate": 6.131612290148875e-07, + "loss": 0.1905, + "step": 5696 + }, + { + "epoch": 0.85, + "learning_rate": 6.106271777003485e-07, + "loss": 0.2594, + "step": 5728 + }, + { + "epoch": 0.86, + "learning_rate": 6.080931263858092e-07, + "loss": 0.3084, + "step": 5760 + }, + { + "epoch": 0.86, + "learning_rate": 6.055590750712702e-07, + "loss": 0.1925, + "step": 5792 + }, + { + "epoch": 0.87, + "learning_rate": 6.03025023756731e-07, + "loss": 0.186, + "step": 5824 + }, + { + "epoch": 0.87, + "learning_rate": 6.004909724421919e-07, + "loss": 0.2302, + "step": 5856 + }, + { + "epoch": 0.88, + "learning_rate": 5.979569211276529e-07, + "loss": 0.1371, + "step": 5888 + }, + { + "epoch": 0.88, + "learning_rate": 5.954228698131137e-07, + "loss": 0.231, + "step": 5920 + }, + { + "epoch": 0.89, + "learning_rate": 5.928888184985746e-07, + "loss": 0.2012, + "step": 5952 + }, + { + "epoch": 0.89, + "learning_rate": 5.903547671840354e-07, + "loss": 0.2006, + "step": 5984 + }, + { + "epoch": 0.9, + "learning_rate": 5.878207158694963e-07, + "loss": 0.215, + "step": 6016 + }, + { + "epoch": 0.9, + "learning_rate": 5.852866645549572e-07, + "loss": 0.1471, + "step": 6048 + }, + { + "epoch": 0.91, + "learning_rate": 5.827526132404181e-07, + "loss": 0.2364, + "step": 6080 + }, + { + "epoch": 0.91, + "learning_rate": 5.80218561925879e-07, + "loss": 0.2881, + "step": 6112 + }, + { + "epoch": 0.91, + "learning_rate": 5.776845106113398e-07, + "loss": 0.1536, + "step": 6144 + }, + { + "epoch": 0.92, + "learning_rate": 5.751504592968008e-07, + "loss": 0.2317, + "step": 6176 + }, + { + "epoch": 0.92, + "learning_rate": 5.726164079822616e-07, + "loss": 0.1952, + "step": 6208 + }, + { + "epoch": 0.93, + "learning_rate": 5.700823566677225e-07, + "loss": 0.1602, + "step": 6240 + }, + { + "epoch": 0.93, + "learning_rate": 5.675483053531834e-07, + "loss": 0.212, + "step": 6272 + }, + { + "epoch": 0.94, + "learning_rate": 5.650142540386442e-07, + "loss": 0.2401, + "step": 6304 + }, + { + "epoch": 0.94, + "learning_rate": 5.624802027241052e-07, + "loss": 0.1992, + "step": 6336 + }, + { + "epoch": 0.95, + "learning_rate": 5.59946151409566e-07, + "loss": 0.2616, + "step": 6368 + }, + { + "epoch": 0.95, + "learning_rate": 5.574121000950269e-07, + "loss": 0.146, + "step": 6400 + }, + { + "epoch": 0.96, + "learning_rate": 5.548780487804878e-07, + "loss": 0.2081, + "step": 6432 + }, + { + "epoch": 0.96, + "learning_rate": 5.523439974659486e-07, + "loss": 0.207, + "step": 6464 + }, + { + "epoch": 0.97, + "learning_rate": 5.498099461514096e-07, + "loss": 0.2631, + "step": 6496 + }, + { + "epoch": 0.97, + "learning_rate": 5.472758948368704e-07, + "loss": 0.1721, + "step": 6528 + }, + { + "epoch": 0.98, + "learning_rate": 5.447418435223313e-07, + "loss": 0.1908, + "step": 6560 + }, + { + "epoch": 0.98, + "learning_rate": 5.422869813113715e-07, + "loss": 0.2238, + "step": 6592 + }, + { + "epoch": 0.99, + "learning_rate": 5.397529299968325e-07, + "loss": 0.2524, + "step": 6624 + }, + { + "epoch": 0.99, + "learning_rate": 5.372188786822933e-07, + "loss": 0.1968, + "step": 6656 + }, + { + "epoch": 1.0, + "learning_rate": 5.346848273677542e-07, + "loss": 0.2379, + "step": 6688 + }, + { + "epoch": 1.0, + "learning_rate": 5.32150776053215e-07, + "loss": 0.246, + "step": 6720 + }, + { + "epoch": 1.01, + "learning_rate": 5.296167247386759e-07, + "loss": 0.104, + "step": 6752 + }, + { + "epoch": 1.01, + "learning_rate": 5.270826734241369e-07, + "loss": 0.102, + "step": 6784 + }, + { + "epoch": 1.01, + "learning_rate": 5.245486221095977e-07, + "loss": 0.1432, + "step": 6816 + }, + { + "epoch": 1.02, + "learning_rate": 5.220145707950586e-07, + "loss": 0.1262, + "step": 6848 + }, + { + "epoch": 1.02, + "learning_rate": 5.194805194805194e-07, + "loss": 0.1299, + "step": 6880 + }, + { + "epoch": 1.03, + "learning_rate": 5.169464681659803e-07, + "loss": 0.1319, + "step": 6912 + }, + { + "epoch": 1.03, + "learning_rate": 5.144124168514412e-07, + "loss": 0.105, + "step": 6944 + }, + { + "epoch": 1.04, + "learning_rate": 5.118783655369021e-07, + "loss": 0.1233, + "step": 6976 + }, + { + "epoch": 1.04, + "learning_rate": 5.09344314222363e-07, + "loss": 0.0922, + "step": 7008 + }, + { + "epoch": 1.05, + "learning_rate": 5.068102629078239e-07, + "loss": 0.144, + "step": 7040 + }, + { + "epoch": 1.05, + "learning_rate": 5.042762115932847e-07, + "loss": 0.1828, + "step": 7072 + }, + { + "epoch": 1.06, + "learning_rate": 5.017421602787456e-07, + "loss": 0.1097, + "step": 7104 + }, + { + "epoch": 1.06, + "learning_rate": 4.992081089642065e-07, + "loss": 0.1867, + "step": 7136 + }, + { + "epoch": 1.07, + "learning_rate": 4.966740576496675e-07, + "loss": 0.1338, + "step": 7168 + }, + { + "epoch": 1.07, + "learning_rate": 4.941400063351283e-07, + "loss": 0.1535, + "step": 7200 + }, + { + "epoch": 1.08, + "learning_rate": 4.916059550205891e-07, + "loss": 0.1861, + "step": 7232 + }, + { + "epoch": 1.08, + "learning_rate": 4.8907190370605e-07, + "loss": 0.1284, + "step": 7264 + }, + { + "epoch": 1.09, + "learning_rate": 4.865378523915109e-07, + "loss": 0.1037, + "step": 7296 + }, + { + "epoch": 1.09, + "learning_rate": 4.840038010769719e-07, + "loss": 0.1217, + "step": 7328 + }, + { + "epoch": 1.1, + "learning_rate": 4.814697497624327e-07, + "loss": 0.1469, + "step": 7360 + }, + { + "epoch": 1.1, + "learning_rate": 4.789356984478935e-07, + "loss": 0.1218, + "step": 7392 + }, + { + "epoch": 1.11, + "learning_rate": 4.764016471333545e-07, + "loss": 0.1486, + "step": 7424 + }, + { + "epoch": 1.11, + "learning_rate": 4.738675958188153e-07, + "loss": 0.0796, + "step": 7456 + }, + { + "epoch": 1.11, + "learning_rate": 4.7141273360785554e-07, + "loss": 0.1163, + "step": 7488 + }, + { + "epoch": 1.12, + "learning_rate": 4.688786822933164e-07, + "loss": 0.0821, + "step": 7520 + }, + { + "epoch": 1.12, + "learning_rate": 4.663446309787773e-07, + "loss": 0.1701, + "step": 7552 + }, + { + "epoch": 1.13, + "learning_rate": 4.638105796642382e-07, + "loss": 0.1002, + "step": 7584 + }, + { + "epoch": 1.13, + "learning_rate": 4.613557174532784e-07, + "loss": 0.0914, + "step": 7616 + }, + { + "epoch": 1.14, + "learning_rate": 4.5882166613873927e-07, + "loss": 0.1374, + "step": 7648 + }, + { + "epoch": 1.14, + "learning_rate": 4.562876148242002e-07, + "loss": 0.1142, + "step": 7680 + }, + { + "epoch": 1.15, + "learning_rate": 4.5375356350966105e-07, + "loss": 0.1351, + "step": 7712 + }, + { + "epoch": 1.15, + "learning_rate": 4.5121951219512194e-07, + "loss": 0.1513, + "step": 7744 + }, + { + "epoch": 1.16, + "learning_rate": 4.486854608805828e-07, + "loss": 0.0998, + "step": 7776 + }, + { + "epoch": 1.16, + "learning_rate": 4.461514095660437e-07, + "loss": 0.1115, + "step": 7808 + }, + { + "epoch": 1.17, + "learning_rate": 4.4361735825150457e-07, + "loss": 0.1373, + "step": 7840 + }, + { + "epoch": 1.17, + "learning_rate": 4.4108330693696546e-07, + "loss": 0.1614, + "step": 7872 + }, + { + "epoch": 1.18, + "learning_rate": 4.3854925562242635e-07, + "loss": 0.0826, + "step": 7904 + }, + { + "epoch": 1.18, + "learning_rate": 4.3601520430788724e-07, + "loss": 0.1129, + "step": 7936 + }, + { + "epoch": 1.19, + "learning_rate": 4.334811529933481e-07, + "loss": 0.0825, + "step": 7968 + }, + { + "epoch": 1.19, + "learning_rate": 4.3094710167880897e-07, + "loss": 0.1497, + "step": 8000 + }, + { + "epoch": 1.2, + "learning_rate": 4.284130503642698e-07, + "loss": 0.1575, + "step": 8032 + }, + { + "epoch": 1.2, + "learning_rate": 4.2587899904973075e-07, + "loss": 0.1377, + "step": 8064 + }, + { + "epoch": 1.21, + "learning_rate": 4.2334494773519165e-07, + "loss": 0.152, + "step": 8096 + }, + { + "epoch": 1.21, + "learning_rate": 4.208108964206525e-07, + "loss": 0.1142, + "step": 8128 + }, + { + "epoch": 1.21, + "learning_rate": 4.182768451061134e-07, + "loss": 0.1458, + "step": 8160 + }, + { + "epoch": 1.22, + "learning_rate": 4.1574279379157427e-07, + "loss": 0.1216, + "step": 8192 + }, + { + "epoch": 1.22, + "learning_rate": 4.132087424770351e-07, + "loss": 0.112, + "step": 8224 + }, + { + "epoch": 1.23, + "learning_rate": 4.1067469116249605e-07, + "loss": 0.1517, + "step": 8256 + }, + { + "epoch": 1.23, + "learning_rate": 4.081406398479569e-07, + "loss": 0.1046, + "step": 8288 + }, + { + "epoch": 1.24, + "learning_rate": 4.056065885334178e-07, + "loss": 0.1538, + "step": 8320 + }, + { + "epoch": 1.24, + "learning_rate": 4.030725372188787e-07, + "loss": 0.1734, + "step": 8352 + }, + { + "epoch": 1.25, + "learning_rate": 4.005384859043395e-07, + "loss": 0.1236, + "step": 8384 + }, + { + "epoch": 1.25, + "learning_rate": 3.980044345898004e-07, + "loss": 0.1129, + "step": 8416 + }, + { + "epoch": 1.26, + "learning_rate": 3.9547038327526135e-07, + "loss": 0.1193, + "step": 8448 + }, + { + "epoch": 1.26, + "learning_rate": 3.929363319607222e-07, + "loss": 0.1124, + "step": 8480 + }, + { + "epoch": 1.27, + "learning_rate": 3.904022806461831e-07, + "loss": 0.103, + "step": 8512 + }, + { + "epoch": 1.27, + "learning_rate": 3.878682293316439e-07, + "loss": 0.1526, + "step": 8544 + }, + { + "epoch": 1.28, + "learning_rate": 3.853341780171048e-07, + "loss": 0.1747, + "step": 8576 + }, + { + "epoch": 1.28, + "learning_rate": 3.8280012670256575e-07, + "loss": 0.0711, + "step": 8608 + }, + { + "epoch": 1.29, + "learning_rate": 3.802660753880266e-07, + "loss": 0.131, + "step": 8640 + }, + { + "epoch": 1.29, + "learning_rate": 3.777320240734875e-07, + "loss": 0.0695, + "step": 8672 + }, + { + "epoch": 1.3, + "learning_rate": 3.751979727589484e-07, + "loss": 0.1176, + "step": 8704 + }, + { + "epoch": 1.3, + "learning_rate": 3.726639214444092e-07, + "loss": 0.1141, + "step": 8736 + }, + { + "epoch": 1.31, + "learning_rate": 3.701298701298701e-07, + "loss": 0.1437, + "step": 8768 + }, + { + "epoch": 1.31, + "learning_rate": 3.6759581881533095e-07, + "loss": 0.1273, + "step": 8800 + }, + { + "epoch": 1.31, + "learning_rate": 3.650617675007919e-07, + "loss": 0.0765, + "step": 8832 + }, + { + "epoch": 1.32, + "learning_rate": 3.625277161862528e-07, + "loss": 0.1322, + "step": 8864 + }, + { + "epoch": 1.32, + "learning_rate": 3.599936648717136e-07, + "loss": 0.1643, + "step": 8896 + }, + { + "epoch": 1.33, + "learning_rate": 3.574596135571745e-07, + "loss": 0.1005, + "step": 8928 + }, + { + "epoch": 1.33, + "learning_rate": 3.549255622426354e-07, + "loss": 0.1545, + "step": 8960 + }, + { + "epoch": 1.34, + "learning_rate": 3.523915109280963e-07, + "loss": 0.0755, + "step": 8992 + }, + { + "epoch": 1.34, + "learning_rate": 3.498574596135572e-07, + "loss": 0.0917, + "step": 9024 + }, + { + "epoch": 1.35, + "learning_rate": 3.47323408299018e-07, + "loss": 0.0893, + "step": 9056 + }, + { + "epoch": 1.35, + "learning_rate": 3.447893569844789e-07, + "loss": 0.152, + "step": 9088 + }, + { + "epoch": 1.36, + "learning_rate": 3.422553056699398e-07, + "loss": 0.1154, + "step": 9120 + }, + { + "epoch": 1.36, + "learning_rate": 3.3972125435540065e-07, + "loss": 0.0817, + "step": 9152 + }, + { + "epoch": 1.37, + "learning_rate": 3.371872030408616e-07, + "loss": 0.083, + "step": 9184 + }, + { + "epoch": 1.37, + "learning_rate": 3.346531517263225e-07, + "loss": 0.1084, + "step": 9216 + }, + { + "epoch": 1.38, + "learning_rate": 3.321191004117833e-07, + "loss": 0.1514, + "step": 9248 + }, + { + "epoch": 1.38, + "learning_rate": 3.295850490972442e-07, + "loss": 0.1195, + "step": 9280 + }, + { + "epoch": 1.39, + "learning_rate": 3.2705099778270505e-07, + "loss": 0.0939, + "step": 9312 + }, + { + "epoch": 1.39, + "learning_rate": 3.2451694646816595e-07, + "loss": 0.1227, + "step": 9344 + }, + { + "epoch": 1.4, + "learning_rate": 3.219828951536269e-07, + "loss": 0.1342, + "step": 9376 + }, + { + "epoch": 1.4, + "learning_rate": 3.1944884383908773e-07, + "loss": 0.1214, + "step": 9408 + }, + { + "epoch": 1.41, + "learning_rate": 3.169147925245486e-07, + "loss": 0.1624, + "step": 9440 + }, + { + "epoch": 1.41, + "learning_rate": 3.143807412100095e-07, + "loss": 0.1261, + "step": 9472 + }, + { + "epoch": 1.41, + "learning_rate": 3.1184668989547035e-07, + "loss": 0.1674, + "step": 9504 + }, + { + "epoch": 1.42, + "learning_rate": 3.0931263858093124e-07, + "loss": 0.1613, + "step": 9536 + }, + { + "epoch": 1.42, + "learning_rate": 3.0677858726639213e-07, + "loss": 0.1519, + "step": 9568 + }, + { + "epoch": 1.43, + "learning_rate": 3.04244535951853e-07, + "loss": 0.1107, + "step": 9600 + }, + { + "epoch": 1.43, + "learning_rate": 3.017104846373139e-07, + "loss": 0.0992, + "step": 9632 + }, + { + "epoch": 1.44, + "learning_rate": 2.9917643332277476e-07, + "loss": 0.0921, + "step": 9664 + }, + { + "epoch": 1.44, + "learning_rate": 2.9664238200823565e-07, + "loss": 0.1478, + "step": 9696 + }, + { + "epoch": 1.45, + "learning_rate": 2.9410833069369654e-07, + "loss": 0.1268, + "step": 9728 + }, + { + "epoch": 1.45, + "learning_rate": 2.9157427937915743e-07, + "loss": 0.1086, + "step": 9760 + }, + { + "epoch": 1.46, + "learning_rate": 2.890402280646183e-07, + "loss": 0.1913, + "step": 9792 + }, + { + "epoch": 1.46, + "learning_rate": 2.8650617675007916e-07, + "loss": 0.1626, + "step": 9824 + }, + { + "epoch": 1.47, + "learning_rate": 2.8397212543554005e-07, + "loss": 0.1647, + "step": 9856 + }, + { + "epoch": 1.47, + "learning_rate": 2.8143807412100095e-07, + "loss": 0.1245, + "step": 9888 + }, + { + "epoch": 1.48, + "learning_rate": 2.789040228064618e-07, + "loss": 0.1326, + "step": 9920 + }, + { + "epoch": 1.48, + "learning_rate": 2.7636997149192273e-07, + "loss": 0.1095, + "step": 9952 + }, + { + "epoch": 1.49, + "learning_rate": 2.738359201773836e-07, + "loss": 0.1089, + "step": 9984 + }, + { + "epoch": 1.49, + "learning_rate": 2.7130186886284446e-07, + "loss": 0.1074, + "step": 10016 + }, + { + "epoch": 1.5, + "learning_rate": 2.6876781754830535e-07, + "loss": 0.1084, + "step": 10048 + } + ], + "logging_steps": 32, + "max_steps": 13434, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 3358, + "total_flos": 4.27776497270784e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-16791/training_args.bin b/checkpoint-16791/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2c662ef5476ddd668bfd2aa78effb1b4e6129c1 --- /dev/null +++ b/checkpoint-16791/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e4db6f6fcaf2005bb3f583cfbd6e7afdd0dfdcdb7d4db7e107d5c22865fcf47 +size 5048 diff --git a/checkpoint-20149/config.json b/checkpoint-20149/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dbe4666ca61b271d1e7307d3cdd7e387a877b181 --- /dev/null +++ b/checkpoint-20149/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "models/openai/whisper-large-v2/finetune/jacob_filter/checkpoint-6717", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-20149/generation_config.json b/checkpoint-20149/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-20149/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-20149/model-00001-of-00002.safetensors b/checkpoint-20149/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2094e6fe61e720c8356b0cf43fa3e5dd8686e8cf --- /dev/null +++ b/checkpoint-20149/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80523576b409f4707ecea08837512fe2e8a16584f6a339a95f9daa1966598b0a +size 4992706480 diff --git a/checkpoint-20149/model-00002-of-00002.safetensors b/checkpoint-20149/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8d4e436221db30f6d002db60328b62faec9ddc90 --- /dev/null +++ b/checkpoint-20149/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b12b93d32371965d98c848ea6108dd377d01aa81a49d42a6bb585c42f1443db +size 1180663192 diff --git a/checkpoint-20149/model.safetensors.index.json b/checkpoint-20149/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-20149/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-20149/optimizer.pt b/checkpoint-20149/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..af59c3f4dfc1c93a48d1d8bb49572404d306f947 --- /dev/null +++ b/checkpoint-20149/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adb2890fea374ef932d48052fb1da275d120c07c4f8b35899d029498eec8409a +size 3095074288 diff --git a/checkpoint-20149/preprocessor_config.json b/checkpoint-20149/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-20149/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-20149/rng_state.pth b/checkpoint-20149/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa9b0e4d2c9800f61d95cba184cdf5e6296cb903 --- /dev/null +++ b/checkpoint-20149/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65e9a4671c9f32d1763620db0c5407681168d62600d835878502ed3977239b4b +size 14244 diff --git a/checkpoint-20149/scheduler.pt b/checkpoint-20149/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cfa0b78f04bc1d5bb642ddf70c4ff4dae3a59a4a --- /dev/null +++ b/checkpoint-20149/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b0da0724f15e22687f61a706af43d751e5f3ee0e884a24e5dd70969f919c159 +size 1064 diff --git a/checkpoint-20149/trainer_state.json b/checkpoint-20149/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..11e9111f7807433b17c942f3f4058d6bfe07ae23 --- /dev/null +++ b/checkpoint-20149/trainer_state.json @@ -0,0 +1,2535 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9997022480273932, + "eval_steps": 500, + "global_step": 13432, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.722084367245657e-08, + "loss": 0.2836, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 7.692307692307692e-08, + "loss": 0.2111, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.1662531017369727e-07, + "loss": 0.2145, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 1.563275434243176e-07, + "loss": 0.2375, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 1.9602977667493795e-07, + "loss": 0.1839, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 2.3573200992555832e-07, + "loss": 0.2864, + "step": 192 + }, + { + "epoch": 0.03, + "learning_rate": 2.7543424317617863e-07, + "loss": 0.1799, + "step": 224 + }, + { + "epoch": 0.04, + "learning_rate": 3.1513647642679897e-07, + "loss": 0.1609, + "step": 256 + }, + { + "epoch": 0.04, + "learning_rate": 3.5483870967741936e-07, + "loss": 0.1434, + "step": 288 + }, + { + "epoch": 0.05, + "learning_rate": 3.945409429280397e-07, + "loss": 0.1699, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 4.3424317617866004e-07, + "loss": 0.1898, + "step": 352 + }, + { + "epoch": 0.06, + "learning_rate": 4.739454094292804e-07, + "loss": 0.1665, + "step": 384 + }, + { + "epoch": 0.06, + "learning_rate": 5.136476426799007e-07, + "loss": 0.1358, + "step": 416 + }, + { + "epoch": 0.07, + "learning_rate": 5.533498759305211e-07, + "loss": 0.1713, + "step": 448 + }, + { + "epoch": 0.07, + "learning_rate": 5.930521091811415e-07, + "loss": 0.1817, + "step": 480 + }, + { + "epoch": 0.08, + "learning_rate": 6.327543424317618e-07, + "loss": 0.149, + "step": 512 + }, + { + "epoch": 0.08, + "learning_rate": 6.724565756823821e-07, + "loss": 0.165, + "step": 544 + }, + { + "epoch": 0.09, + "learning_rate": 7.121588089330024e-07, + "loss": 0.2282, + "step": 576 + }, + { + "epoch": 0.09, + "learning_rate": 7.518610421836227e-07, + "loss": 0.1666, + "step": 608 + }, + { + "epoch": 0.1, + "learning_rate": 7.915632754342431e-07, + "loss": 0.1229, + "step": 640 + }, + { + "epoch": 0.1, + "learning_rate": 8.312655086848634e-07, + "loss": 0.2118, + "step": 672 + }, + { + "epoch": 0.1, + "learning_rate": 8.709677419354838e-07, + "loss": 0.1869, + "step": 704 + }, + { + "epoch": 0.11, + "learning_rate": 9.106699751861042e-07, + "loss": 0.2196, + "step": 736 + }, + { + "epoch": 0.11, + "learning_rate": 9.503722084367245e-07, + "loss": 0.0936, + "step": 768 + }, + { + "epoch": 0.12, + "learning_rate": 9.90074441687345e-07, + "loss": 0.1839, + "step": 800 + }, + { + "epoch": 0.12, + "learning_rate": 9.980994615140957e-07, + "loss": 0.1675, + "step": 832 + }, + { + "epoch": 0.13, + "learning_rate": 9.955654101995564e-07, + "loss": 0.1595, + "step": 864 + }, + { + "epoch": 0.13, + "learning_rate": 9.930313588850174e-07, + "loss": 0.1556, + "step": 896 + }, + { + "epoch": 0.14, + "learning_rate": 9.905764966740576e-07, + "loss": 0.1711, + "step": 928 + }, + { + "epoch": 0.14, + "learning_rate": 9.880424453595185e-07, + "loss": 0.1777, + "step": 960 + }, + { + "epoch": 0.15, + "learning_rate": 9.855083940449792e-07, + "loss": 0.2031, + "step": 992 + }, + { + "epoch": 0.15, + "learning_rate": 9.829743427304402e-07, + "loss": 0.1529, + "step": 1024 + }, + { + "epoch": 0.16, + "learning_rate": 9.804402914159011e-07, + "loss": 0.1365, + "step": 1056 + }, + { + "epoch": 0.16, + "learning_rate": 9.77906240101362e-07, + "loss": 0.1922, + "step": 1088 + }, + { + "epoch": 0.17, + "learning_rate": 9.75372188786823e-07, + "loss": 0.133, + "step": 1120 + }, + { + "epoch": 0.17, + "learning_rate": 9.728381374722837e-07, + "loss": 0.1692, + "step": 1152 + }, + { + "epoch": 0.18, + "learning_rate": 9.703040861577447e-07, + "loss": 0.1022, + "step": 1184 + }, + { + "epoch": 0.18, + "learning_rate": 9.677700348432054e-07, + "loss": 0.2052, + "step": 1216 + }, + { + "epoch": 0.19, + "learning_rate": 9.652359835286664e-07, + "loss": 0.1546, + "step": 1248 + }, + { + "epoch": 0.19, + "learning_rate": 9.627019322141273e-07, + "loss": 0.149, + "step": 1280 + }, + { + "epoch": 0.2, + "learning_rate": 9.601678808995883e-07, + "loss": 0.1281, + "step": 1312 + }, + { + "epoch": 0.2, + "learning_rate": 9.57633829585049e-07, + "loss": 0.1437, + "step": 1344 + }, + { + "epoch": 0.2, + "learning_rate": 9.5509977827051e-07, + "loss": 0.2097, + "step": 1376 + }, + { + "epoch": 0.21, + "learning_rate": 9.525657269559708e-07, + "loss": 0.1308, + "step": 1408 + }, + { + "epoch": 0.21, + "learning_rate": 9.500316756414317e-07, + "loss": 0.1691, + "step": 1440 + }, + { + "epoch": 0.22, + "learning_rate": 9.474976243268927e-07, + "loss": 0.2319, + "step": 1472 + }, + { + "epoch": 0.22, + "learning_rate": 9.449635730123534e-07, + "loss": 0.2226, + "step": 1504 + }, + { + "epoch": 0.23, + "learning_rate": 9.424295216978143e-07, + "loss": 0.1789, + "step": 1536 + }, + { + "epoch": 0.23, + "learning_rate": 9.398954703832752e-07, + "loss": 0.1932, + "step": 1568 + }, + { + "epoch": 0.24, + "learning_rate": 9.373614190687361e-07, + "loss": 0.1718, + "step": 1600 + }, + { + "epoch": 0.24, + "learning_rate": 9.34827367754197e-07, + "loss": 0.156, + "step": 1632 + }, + { + "epoch": 0.25, + "learning_rate": 9.322933164396578e-07, + "loss": 0.1512, + "step": 1664 + }, + { + "epoch": 0.25, + "learning_rate": 9.297592651251187e-07, + "loss": 0.0968, + "step": 1696 + }, + { + "epoch": 0.26, + "learning_rate": 9.272252138105796e-07, + "loss": 0.0932, + "step": 1728 + }, + { + "epoch": 0.26, + "learning_rate": 9.246911624960405e-07, + "loss": 0.2464, + "step": 1760 + }, + { + "epoch": 0.27, + "learning_rate": 9.221571111815014e-07, + "loss": 0.2036, + "step": 1792 + }, + { + "epoch": 0.27, + "learning_rate": 9.196230598669623e-07, + "loss": 0.1245, + "step": 1824 + }, + { + "epoch": 0.28, + "learning_rate": 9.170890085524232e-07, + "loss": 0.1097, + "step": 1856 + }, + { + "epoch": 0.28, + "learning_rate": 9.14554957237884e-07, + "loss": 0.1844, + "step": 1888 + }, + { + "epoch": 0.29, + "learning_rate": 9.120209059233449e-07, + "loss": 0.1114, + "step": 1920 + }, + { + "epoch": 0.29, + "learning_rate": 9.094868546088058e-07, + "loss": 0.1992, + "step": 1952 + }, + { + "epoch": 0.3, + "learning_rate": 9.069528032942667e-07, + "loss": 0.1721, + "step": 1984 + }, + { + "epoch": 0.3, + "learning_rate": 9.044187519797275e-07, + "loss": 0.1473, + "step": 2016 + }, + { + "epoch": 0.3, + "learning_rate": 9.018847006651884e-07, + "loss": 0.1865, + "step": 2048 + }, + { + "epoch": 0.31, + "learning_rate": 8.993506493506493e-07, + "loss": 0.1583, + "step": 2080 + }, + { + "epoch": 0.31, + "learning_rate": 8.968165980361102e-07, + "loss": 0.1866, + "step": 2112 + }, + { + "epoch": 0.32, + "learning_rate": 8.942825467215711e-07, + "loss": 0.1617, + "step": 2144 + }, + { + "epoch": 0.32, + "learning_rate": 8.917484954070319e-07, + "loss": 0.1189, + "step": 2176 + }, + { + "epoch": 0.33, + "learning_rate": 8.892144440924928e-07, + "loss": 0.148, + "step": 2208 + }, + { + "epoch": 0.33, + "learning_rate": 8.866803927779537e-07, + "loss": 0.131, + "step": 2240 + }, + { + "epoch": 0.34, + "learning_rate": 8.841463414634146e-07, + "loss": 0.2261, + "step": 2272 + }, + { + "epoch": 0.34, + "learning_rate": 8.816122901488755e-07, + "loss": 0.1742, + "step": 2304 + }, + { + "epoch": 0.35, + "learning_rate": 8.790782388343364e-07, + "loss": 0.164, + "step": 2336 + }, + { + "epoch": 0.35, + "learning_rate": 8.765441875197972e-07, + "loss": 0.1161, + "step": 2368 + }, + { + "epoch": 0.36, + "learning_rate": 8.74010136205258e-07, + "loss": 0.1636, + "step": 2400 + }, + { + "epoch": 0.36, + "learning_rate": 8.71476084890719e-07, + "loss": 0.2416, + "step": 2432 + }, + { + "epoch": 0.37, + "learning_rate": 8.689420335761799e-07, + "loss": 0.1632, + "step": 2464 + }, + { + "epoch": 0.37, + "learning_rate": 8.664079822616408e-07, + "loss": 0.1477, + "step": 2496 + }, + { + "epoch": 0.38, + "learning_rate": 8.638739309471016e-07, + "loss": 0.2083, + "step": 2528 + }, + { + "epoch": 0.38, + "learning_rate": 8.613398796325625e-07, + "loss": 0.1599, + "step": 2560 + }, + { + "epoch": 0.39, + "learning_rate": 8.588058283180234e-07, + "loss": 0.1817, + "step": 2592 + }, + { + "epoch": 0.39, + "learning_rate": 8.562717770034843e-07, + "loss": 0.1005, + "step": 2624 + }, + { + "epoch": 0.4, + "learning_rate": 8.537377256889452e-07, + "loss": 0.168, + "step": 2656 + }, + { + "epoch": 0.4, + "learning_rate": 8.51203674374406e-07, + "loss": 0.2418, + "step": 2688 + }, + { + "epoch": 0.4, + "learning_rate": 8.486696230598669e-07, + "loss": 0.1881, + "step": 2720 + }, + { + "epoch": 0.41, + "learning_rate": 8.461355717453278e-07, + "loss": 0.1829, + "step": 2752 + }, + { + "epoch": 0.41, + "learning_rate": 8.436015204307887e-07, + "loss": 0.1073, + "step": 2784 + }, + { + "epoch": 0.42, + "learning_rate": 8.410674691162496e-07, + "loss": 0.1324, + "step": 2816 + }, + { + "epoch": 0.42, + "learning_rate": 8.385334178017105e-07, + "loss": 0.2077, + "step": 2848 + }, + { + "epoch": 0.43, + "learning_rate": 8.359993664871713e-07, + "loss": 0.2248, + "step": 2880 + }, + { + "epoch": 0.43, + "learning_rate": 8.334653151726322e-07, + "loss": 0.1337, + "step": 2912 + }, + { + "epoch": 0.44, + "learning_rate": 8.30931263858093e-07, + "loss": 0.1906, + "step": 2944 + }, + { + "epoch": 0.44, + "learning_rate": 8.28397212543554e-07, + "loss": 0.1893, + "step": 2976 + }, + { + "epoch": 0.45, + "learning_rate": 8.259423503325942e-07, + "loss": 0.2029, + "step": 3008 + }, + { + "epoch": 0.45, + "learning_rate": 8.234082990180551e-07, + "loss": 0.157, + "step": 3040 + }, + { + "epoch": 0.46, + "learning_rate": 8.208742477035159e-07, + "loss": 0.1433, + "step": 3072 + }, + { + "epoch": 0.46, + "learning_rate": 8.183401963889769e-07, + "loss": 0.1689, + "step": 3104 + }, + { + "epoch": 0.47, + "learning_rate": 8.158061450744377e-07, + "loss": 0.2012, + "step": 3136 + }, + { + "epoch": 0.47, + "learning_rate": 8.132720937598986e-07, + "loss": 0.175, + "step": 3168 + }, + { + "epoch": 0.48, + "learning_rate": 8.107380424453595e-07, + "loss": 0.1961, + "step": 3200 + }, + { + "epoch": 0.48, + "learning_rate": 8.082039911308203e-07, + "loss": 0.2547, + "step": 3232 + }, + { + "epoch": 0.49, + "learning_rate": 8.056699398162813e-07, + "loss": 0.1935, + "step": 3264 + }, + { + "epoch": 0.49, + "learning_rate": 8.031358885017421e-07, + "loss": 0.2149, + "step": 3296 + }, + { + "epoch": 0.5, + "learning_rate": 8.00601837187203e-07, + "loss": 0.1809, + "step": 3328 + }, + { + "epoch": 0.5, + "learning_rate": 7.980677858726639e-07, + "loss": 0.2072, + "step": 3360 + }, + { + "epoch": 0.5, + "learning_rate": 7.955337345581247e-07, + "loss": 0.2116, + "step": 3392 + }, + { + "epoch": 0.51, + "learning_rate": 7.929996832435857e-07, + "loss": 0.1737, + "step": 3424 + }, + { + "epoch": 0.51, + "learning_rate": 7.904656319290464e-07, + "loss": 0.2219, + "step": 3456 + }, + { + "epoch": 0.52, + "learning_rate": 7.879315806145074e-07, + "loss": 0.1849, + "step": 3488 + }, + { + "epoch": 0.52, + "learning_rate": 7.853975292999683e-07, + "loss": 0.1884, + "step": 3520 + }, + { + "epoch": 0.53, + "learning_rate": 7.828634779854292e-07, + "loss": 0.2192, + "step": 3552 + }, + { + "epoch": 0.53, + "learning_rate": 7.803294266708901e-07, + "loss": 0.1958, + "step": 3584 + }, + { + "epoch": 0.54, + "learning_rate": 7.777953753563509e-07, + "loss": 0.1433, + "step": 3616 + }, + { + "epoch": 0.54, + "learning_rate": 7.752613240418118e-07, + "loss": 0.2151, + "step": 3648 + }, + { + "epoch": 0.55, + "learning_rate": 7.727272727272727e-07, + "loss": 0.1675, + "step": 3680 + }, + { + "epoch": 0.55, + "learning_rate": 7.701932214127336e-07, + "loss": 0.1586, + "step": 3712 + }, + { + "epoch": 0.56, + "learning_rate": 7.676591700981945e-07, + "loss": 0.2881, + "step": 3744 + }, + { + "epoch": 0.56, + "learning_rate": 7.651251187836553e-07, + "loss": 0.196, + "step": 3776 + }, + { + "epoch": 0.57, + "learning_rate": 7.625910674691162e-07, + "loss": 0.1285, + "step": 3808 + }, + { + "epoch": 0.57, + "learning_rate": 7.60057016154577e-07, + "loss": 0.2262, + "step": 3840 + }, + { + "epoch": 0.58, + "learning_rate": 7.57522964840038e-07, + "loss": 0.2309, + "step": 3872 + }, + { + "epoch": 0.58, + "learning_rate": 7.549889135254989e-07, + "loss": 0.1533, + "step": 3904 + }, + { + "epoch": 0.59, + "learning_rate": 7.524548622109597e-07, + "loss": 0.1297, + "step": 3936 + }, + { + "epoch": 0.59, + "learning_rate": 7.499208108964206e-07, + "loss": 0.1808, + "step": 3968 + }, + { + "epoch": 0.6, + "learning_rate": 7.473867595818814e-07, + "loss": 0.2401, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 7.448527082673424e-07, + "loss": 0.2507, + "step": 4032 + }, + { + "epoch": 0.61, + "learning_rate": 7.423186569528033e-07, + "loss": 0.1562, + "step": 4064 + }, + { + "epoch": 0.61, + "learning_rate": 7.397846056382642e-07, + "loss": 0.1912, + "step": 4096 + }, + { + "epoch": 0.61, + "learning_rate": 7.373297434273043e-07, + "loss": 0.1703, + "step": 4128 + }, + { + "epoch": 0.62, + "learning_rate": 7.347956921127653e-07, + "loss": 0.1471, + "step": 4160 + }, + { + "epoch": 0.62, + "learning_rate": 7.322616407982262e-07, + "loss": 0.1539, + "step": 4192 + }, + { + "epoch": 0.63, + "learning_rate": 7.297275894836869e-07, + "loss": 0.1521, + "step": 4224 + }, + { + "epoch": 0.63, + "learning_rate": 7.271935381691479e-07, + "loss": 0.2623, + "step": 4256 + }, + { + "epoch": 0.64, + "learning_rate": 7.246594868546087e-07, + "loss": 0.1753, + "step": 4288 + }, + { + "epoch": 0.64, + "learning_rate": 7.221254355400697e-07, + "loss": 0.1945, + "step": 4320 + }, + { + "epoch": 0.65, + "learning_rate": 7.195913842255306e-07, + "loss": 0.2153, + "step": 4352 + }, + { + "epoch": 0.65, + "learning_rate": 7.170573329109915e-07, + "loss": 0.2841, + "step": 4384 + }, + { + "epoch": 0.66, + "learning_rate": 7.145232815964523e-07, + "loss": 0.1759, + "step": 4416 + }, + { + "epoch": 0.66, + "learning_rate": 7.119892302819131e-07, + "loss": 0.2214, + "step": 4448 + }, + { + "epoch": 0.67, + "learning_rate": 7.094551789673741e-07, + "loss": 0.188, + "step": 4480 + }, + { + "epoch": 0.67, + "learning_rate": 7.069211276528349e-07, + "loss": 0.1579, + "step": 4512 + }, + { + "epoch": 0.68, + "learning_rate": 7.043870763382959e-07, + "loss": 0.2213, + "step": 4544 + }, + { + "epoch": 0.68, + "learning_rate": 7.018530250237567e-07, + "loss": 0.2042, + "step": 4576 + }, + { + "epoch": 0.69, + "learning_rate": 6.993189737092175e-07, + "loss": 0.1852, + "step": 4608 + }, + { + "epoch": 0.69, + "learning_rate": 6.967849223946785e-07, + "loss": 0.1716, + "step": 4640 + }, + { + "epoch": 0.7, + "learning_rate": 6.942508710801393e-07, + "loss": 0.1645, + "step": 4672 + }, + { + "epoch": 0.7, + "learning_rate": 6.917168197656003e-07, + "loss": 0.1986, + "step": 4704 + }, + { + "epoch": 0.71, + "learning_rate": 6.89182768451061e-07, + "loss": 0.2531, + "step": 4736 + }, + { + "epoch": 0.71, + "learning_rate": 6.866487171365219e-07, + "loss": 0.1792, + "step": 4768 + }, + { + "epoch": 0.71, + "learning_rate": 6.841146658219829e-07, + "loss": 0.1843, + "step": 4800 + }, + { + "epoch": 0.72, + "learning_rate": 6.815806145074437e-07, + "loss": 0.2175, + "step": 4832 + }, + { + "epoch": 0.72, + "learning_rate": 6.790465631929047e-07, + "loss": 0.2083, + "step": 4864 + }, + { + "epoch": 0.73, + "learning_rate": 6.765125118783655e-07, + "loss": 0.1729, + "step": 4896 + }, + { + "epoch": 0.73, + "learning_rate": 6.739784605638263e-07, + "loss": 0.1849, + "step": 4928 + }, + { + "epoch": 0.74, + "learning_rate": 6.714444092492873e-07, + "loss": 0.2374, + "step": 4960 + }, + { + "epoch": 0.74, + "learning_rate": 6.689103579347481e-07, + "loss": 0.241, + "step": 4992 + }, + { + "epoch": 0.75, + "learning_rate": 6.663763066202091e-07, + "loss": 0.1853, + "step": 5024 + }, + { + "epoch": 0.75, + "learning_rate": 6.638422553056699e-07, + "loss": 0.1957, + "step": 5056 + }, + { + "epoch": 0.76, + "learning_rate": 6.613082039911308e-07, + "loss": 0.2052, + "step": 5088 + }, + { + "epoch": 0.76, + "learning_rate": 6.587741526765917e-07, + "loss": 0.2321, + "step": 5120 + }, + { + "epoch": 0.77, + "learning_rate": 6.562401013620525e-07, + "loss": 0.1804, + "step": 5152 + }, + { + "epoch": 0.77, + "learning_rate": 6.537060500475135e-07, + "loss": 0.1842, + "step": 5184 + }, + { + "epoch": 0.78, + "learning_rate": 6.511719987329743e-07, + "loss": 0.2388, + "step": 5216 + }, + { + "epoch": 0.78, + "learning_rate": 6.486379474184352e-07, + "loss": 0.2417, + "step": 5248 + }, + { + "epoch": 0.79, + "learning_rate": 6.46103896103896e-07, + "loss": 0.2224, + "step": 5280 + }, + { + "epoch": 0.79, + "learning_rate": 6.435698447893569e-07, + "loss": 0.2029, + "step": 5312 + }, + { + "epoch": 0.8, + "learning_rate": 6.410357934748179e-07, + "loss": 0.2807, + "step": 5344 + }, + { + "epoch": 0.8, + "learning_rate": 6.385017421602787e-07, + "loss": 0.192, + "step": 5376 + }, + { + "epoch": 0.81, + "learning_rate": 6.359676908457397e-07, + "loss": 0.1848, + "step": 5408 + }, + { + "epoch": 0.81, + "learning_rate": 6.334336395312004e-07, + "loss": 0.2143, + "step": 5440 + }, + { + "epoch": 0.81, + "learning_rate": 6.308995882166613e-07, + "loss": 0.2421, + "step": 5472 + }, + { + "epoch": 0.82, + "learning_rate": 6.283655369021223e-07, + "loss": 0.1724, + "step": 5504 + }, + { + "epoch": 0.82, + "learning_rate": 6.258314855875831e-07, + "loss": 0.1207, + "step": 5536 + }, + { + "epoch": 0.83, + "learning_rate": 6.232974342730441e-07, + "loss": 0.2259, + "step": 5568 + }, + { + "epoch": 0.83, + "learning_rate": 6.207633829585048e-07, + "loss": 0.2504, + "step": 5600 + }, + { + "epoch": 0.84, + "learning_rate": 6.182293316439658e-07, + "loss": 0.188, + "step": 5632 + }, + { + "epoch": 0.84, + "learning_rate": 6.156952803294266e-07, + "loss": 0.1893, + "step": 5664 + }, + { + "epoch": 0.85, + "learning_rate": 6.131612290148875e-07, + "loss": 0.1905, + "step": 5696 + }, + { + "epoch": 0.85, + "learning_rate": 6.106271777003485e-07, + "loss": 0.2594, + "step": 5728 + }, + { + "epoch": 0.86, + "learning_rate": 6.080931263858092e-07, + "loss": 0.3084, + "step": 5760 + }, + { + "epoch": 0.86, + "learning_rate": 6.055590750712702e-07, + "loss": 0.1925, + "step": 5792 + }, + { + "epoch": 0.87, + "learning_rate": 6.03025023756731e-07, + "loss": 0.186, + "step": 5824 + }, + { + "epoch": 0.87, + "learning_rate": 6.004909724421919e-07, + "loss": 0.2302, + "step": 5856 + }, + { + "epoch": 0.88, + "learning_rate": 5.979569211276529e-07, + "loss": 0.1371, + "step": 5888 + }, + { + "epoch": 0.88, + "learning_rate": 5.954228698131137e-07, + "loss": 0.231, + "step": 5920 + }, + { + "epoch": 0.89, + "learning_rate": 5.928888184985746e-07, + "loss": 0.2012, + "step": 5952 + }, + { + "epoch": 0.89, + "learning_rate": 5.903547671840354e-07, + "loss": 0.2006, + "step": 5984 + }, + { + "epoch": 0.9, + "learning_rate": 5.878207158694963e-07, + "loss": 0.215, + "step": 6016 + }, + { + "epoch": 0.9, + "learning_rate": 5.852866645549572e-07, + "loss": 0.1471, + "step": 6048 + }, + { + "epoch": 0.91, + "learning_rate": 5.827526132404181e-07, + "loss": 0.2364, + "step": 6080 + }, + { + "epoch": 0.91, + "learning_rate": 5.80218561925879e-07, + "loss": 0.2881, + "step": 6112 + }, + { + "epoch": 0.91, + "learning_rate": 5.776845106113398e-07, + "loss": 0.1536, + "step": 6144 + }, + { + "epoch": 0.92, + "learning_rate": 5.751504592968008e-07, + "loss": 0.2317, + "step": 6176 + }, + { + "epoch": 0.92, + "learning_rate": 5.726164079822616e-07, + "loss": 0.1952, + "step": 6208 + }, + { + "epoch": 0.93, + "learning_rate": 5.700823566677225e-07, + "loss": 0.1602, + "step": 6240 + }, + { + "epoch": 0.93, + "learning_rate": 5.675483053531834e-07, + "loss": 0.212, + "step": 6272 + }, + { + "epoch": 0.94, + "learning_rate": 5.650142540386442e-07, + "loss": 0.2401, + "step": 6304 + }, + { + "epoch": 0.94, + "learning_rate": 5.624802027241052e-07, + "loss": 0.1992, + "step": 6336 + }, + { + "epoch": 0.95, + "learning_rate": 5.59946151409566e-07, + "loss": 0.2616, + "step": 6368 + }, + { + "epoch": 0.95, + "learning_rate": 5.574121000950269e-07, + "loss": 0.146, + "step": 6400 + }, + { + "epoch": 0.96, + "learning_rate": 5.548780487804878e-07, + "loss": 0.2081, + "step": 6432 + }, + { + "epoch": 0.96, + "learning_rate": 5.523439974659486e-07, + "loss": 0.207, + "step": 6464 + }, + { + "epoch": 0.97, + "learning_rate": 5.498099461514096e-07, + "loss": 0.2631, + "step": 6496 + }, + { + "epoch": 0.97, + "learning_rate": 5.472758948368704e-07, + "loss": 0.1721, + "step": 6528 + }, + { + "epoch": 0.98, + "learning_rate": 5.447418435223313e-07, + "loss": 0.1908, + "step": 6560 + }, + { + "epoch": 0.98, + "learning_rate": 5.422869813113715e-07, + "loss": 0.2238, + "step": 6592 + }, + { + "epoch": 0.99, + "learning_rate": 5.397529299968325e-07, + "loss": 0.2524, + "step": 6624 + }, + { + "epoch": 0.99, + "learning_rate": 5.372188786822933e-07, + "loss": 0.1968, + "step": 6656 + }, + { + "epoch": 1.0, + "learning_rate": 5.346848273677542e-07, + "loss": 0.2379, + "step": 6688 + }, + { + "epoch": 1.0, + "learning_rate": 5.32150776053215e-07, + "loss": 0.246, + "step": 6720 + }, + { + "epoch": 1.01, + "learning_rate": 5.296167247386759e-07, + "loss": 0.104, + "step": 6752 + }, + { + "epoch": 1.01, + "learning_rate": 5.270826734241369e-07, + "loss": 0.102, + "step": 6784 + }, + { + "epoch": 1.01, + "learning_rate": 5.245486221095977e-07, + "loss": 0.1432, + "step": 6816 + }, + { + "epoch": 1.02, + "learning_rate": 5.220145707950586e-07, + "loss": 0.1262, + "step": 6848 + }, + { + "epoch": 1.02, + "learning_rate": 5.194805194805194e-07, + "loss": 0.1299, + "step": 6880 + }, + { + "epoch": 1.03, + "learning_rate": 5.169464681659803e-07, + "loss": 0.1319, + "step": 6912 + }, + { + "epoch": 1.03, + "learning_rate": 5.144124168514412e-07, + "loss": 0.105, + "step": 6944 + }, + { + "epoch": 1.04, + "learning_rate": 5.118783655369021e-07, + "loss": 0.1233, + "step": 6976 + }, + { + "epoch": 1.04, + "learning_rate": 5.09344314222363e-07, + "loss": 0.0922, + "step": 7008 + }, + { + "epoch": 1.05, + "learning_rate": 5.068102629078239e-07, + "loss": 0.144, + "step": 7040 + }, + { + "epoch": 1.05, + "learning_rate": 5.042762115932847e-07, + "loss": 0.1828, + "step": 7072 + }, + { + "epoch": 1.06, + "learning_rate": 5.017421602787456e-07, + "loss": 0.1097, + "step": 7104 + }, + { + "epoch": 1.06, + "learning_rate": 4.992081089642065e-07, + "loss": 0.1867, + "step": 7136 + }, + { + "epoch": 1.07, + "learning_rate": 4.966740576496675e-07, + "loss": 0.1338, + "step": 7168 + }, + { + "epoch": 1.07, + "learning_rate": 4.941400063351283e-07, + "loss": 0.1535, + "step": 7200 + }, + { + "epoch": 1.08, + "learning_rate": 4.916059550205891e-07, + "loss": 0.1861, + "step": 7232 + }, + { + "epoch": 1.08, + "learning_rate": 4.8907190370605e-07, + "loss": 0.1284, + "step": 7264 + }, + { + "epoch": 1.09, + "learning_rate": 4.865378523915109e-07, + "loss": 0.1037, + "step": 7296 + }, + { + "epoch": 1.09, + "learning_rate": 4.840038010769719e-07, + "loss": 0.1217, + "step": 7328 + }, + { + "epoch": 1.1, + "learning_rate": 4.814697497624327e-07, + "loss": 0.1469, + "step": 7360 + }, + { + "epoch": 1.1, + "learning_rate": 4.789356984478935e-07, + "loss": 0.1218, + "step": 7392 + }, + { + "epoch": 1.11, + "learning_rate": 4.764016471333545e-07, + "loss": 0.1486, + "step": 7424 + }, + { + "epoch": 1.11, + "learning_rate": 4.738675958188153e-07, + "loss": 0.0796, + "step": 7456 + }, + { + "epoch": 1.11, + "learning_rate": 4.7141273360785554e-07, + "loss": 0.1163, + "step": 7488 + }, + { + "epoch": 1.12, + "learning_rate": 4.688786822933164e-07, + "loss": 0.0821, + "step": 7520 + }, + { + "epoch": 1.12, + "learning_rate": 4.663446309787773e-07, + "loss": 0.1701, + "step": 7552 + }, + { + "epoch": 1.13, + "learning_rate": 4.638105796642382e-07, + "loss": 0.1002, + "step": 7584 + }, + { + "epoch": 1.13, + "learning_rate": 4.613557174532784e-07, + "loss": 0.0914, + "step": 7616 + }, + { + "epoch": 1.14, + "learning_rate": 4.5882166613873927e-07, + "loss": 0.1374, + "step": 7648 + }, + { + "epoch": 1.14, + "learning_rate": 4.562876148242002e-07, + "loss": 0.1142, + "step": 7680 + }, + { + "epoch": 1.15, + "learning_rate": 4.5375356350966105e-07, + "loss": 0.1351, + "step": 7712 + }, + { + "epoch": 1.15, + "learning_rate": 4.5121951219512194e-07, + "loss": 0.1513, + "step": 7744 + }, + { + "epoch": 1.16, + "learning_rate": 4.486854608805828e-07, + "loss": 0.0998, + "step": 7776 + }, + { + "epoch": 1.16, + "learning_rate": 4.461514095660437e-07, + "loss": 0.1115, + "step": 7808 + }, + { + "epoch": 1.17, + "learning_rate": 4.4361735825150457e-07, + "loss": 0.1373, + "step": 7840 + }, + { + "epoch": 1.17, + "learning_rate": 4.4108330693696546e-07, + "loss": 0.1614, + "step": 7872 + }, + { + "epoch": 1.18, + "learning_rate": 4.3854925562242635e-07, + "loss": 0.0826, + "step": 7904 + }, + { + "epoch": 1.18, + "learning_rate": 4.3601520430788724e-07, + "loss": 0.1129, + "step": 7936 + }, + { + "epoch": 1.19, + "learning_rate": 4.334811529933481e-07, + "loss": 0.0825, + "step": 7968 + }, + { + "epoch": 1.19, + "learning_rate": 4.3094710167880897e-07, + "loss": 0.1497, + "step": 8000 + }, + { + "epoch": 1.2, + "learning_rate": 4.284130503642698e-07, + "loss": 0.1575, + "step": 8032 + }, + { + "epoch": 1.2, + "learning_rate": 4.2587899904973075e-07, + "loss": 0.1377, + "step": 8064 + }, + { + "epoch": 1.21, + "learning_rate": 4.2334494773519165e-07, + "loss": 0.152, + "step": 8096 + }, + { + "epoch": 1.21, + "learning_rate": 4.208108964206525e-07, + "loss": 0.1142, + "step": 8128 + }, + { + "epoch": 1.21, + "learning_rate": 4.182768451061134e-07, + "loss": 0.1458, + "step": 8160 + }, + { + "epoch": 1.22, + "learning_rate": 4.1574279379157427e-07, + "loss": 0.1216, + "step": 8192 + }, + { + "epoch": 1.22, + "learning_rate": 4.132087424770351e-07, + "loss": 0.112, + "step": 8224 + }, + { + "epoch": 1.23, + "learning_rate": 4.1067469116249605e-07, + "loss": 0.1517, + "step": 8256 + }, + { + "epoch": 1.23, + "learning_rate": 4.081406398479569e-07, + "loss": 0.1046, + "step": 8288 + }, + { + "epoch": 1.24, + "learning_rate": 4.056065885334178e-07, + "loss": 0.1538, + "step": 8320 + }, + { + "epoch": 1.24, + "learning_rate": 4.030725372188787e-07, + "loss": 0.1734, + "step": 8352 + }, + { + "epoch": 1.25, + "learning_rate": 4.005384859043395e-07, + "loss": 0.1236, + "step": 8384 + }, + { + "epoch": 1.25, + "learning_rate": 3.980044345898004e-07, + "loss": 0.1129, + "step": 8416 + }, + { + "epoch": 1.26, + "learning_rate": 3.9547038327526135e-07, + "loss": 0.1193, + "step": 8448 + }, + { + "epoch": 1.26, + "learning_rate": 3.929363319607222e-07, + "loss": 0.1124, + "step": 8480 + }, + { + "epoch": 1.27, + "learning_rate": 3.904022806461831e-07, + "loss": 0.103, + "step": 8512 + }, + { + "epoch": 1.27, + "learning_rate": 3.878682293316439e-07, + "loss": 0.1526, + "step": 8544 + }, + { + "epoch": 1.28, + "learning_rate": 3.853341780171048e-07, + "loss": 0.1747, + "step": 8576 + }, + { + "epoch": 1.28, + "learning_rate": 3.8280012670256575e-07, + "loss": 0.0711, + "step": 8608 + }, + { + "epoch": 1.29, + "learning_rate": 3.802660753880266e-07, + "loss": 0.131, + "step": 8640 + }, + { + "epoch": 1.29, + "learning_rate": 3.777320240734875e-07, + "loss": 0.0695, + "step": 8672 + }, + { + "epoch": 1.3, + "learning_rate": 3.751979727589484e-07, + "loss": 0.1176, + "step": 8704 + }, + { + "epoch": 1.3, + "learning_rate": 3.726639214444092e-07, + "loss": 0.1141, + "step": 8736 + }, + { + "epoch": 1.31, + "learning_rate": 3.701298701298701e-07, + "loss": 0.1437, + "step": 8768 + }, + { + "epoch": 1.31, + "learning_rate": 3.6759581881533095e-07, + "loss": 0.1273, + "step": 8800 + }, + { + "epoch": 1.31, + "learning_rate": 3.650617675007919e-07, + "loss": 0.0765, + "step": 8832 + }, + { + "epoch": 1.32, + "learning_rate": 3.625277161862528e-07, + "loss": 0.1322, + "step": 8864 + }, + { + "epoch": 1.32, + "learning_rate": 3.599936648717136e-07, + "loss": 0.1643, + "step": 8896 + }, + { + "epoch": 1.33, + "learning_rate": 3.574596135571745e-07, + "loss": 0.1005, + "step": 8928 + }, + { + "epoch": 1.33, + "learning_rate": 3.549255622426354e-07, + "loss": 0.1545, + "step": 8960 + }, + { + "epoch": 1.34, + "learning_rate": 3.523915109280963e-07, + "loss": 0.0755, + "step": 8992 + }, + { + "epoch": 1.34, + "learning_rate": 3.498574596135572e-07, + "loss": 0.0917, + "step": 9024 + }, + { + "epoch": 1.35, + "learning_rate": 3.47323408299018e-07, + "loss": 0.0893, + "step": 9056 + }, + { + "epoch": 1.35, + "learning_rate": 3.447893569844789e-07, + "loss": 0.152, + "step": 9088 + }, + { + "epoch": 1.36, + "learning_rate": 3.422553056699398e-07, + "loss": 0.1154, + "step": 9120 + }, + { + "epoch": 1.36, + "learning_rate": 3.3972125435540065e-07, + "loss": 0.0817, + "step": 9152 + }, + { + "epoch": 1.37, + "learning_rate": 3.371872030408616e-07, + "loss": 0.083, + "step": 9184 + }, + { + "epoch": 1.37, + "learning_rate": 3.346531517263225e-07, + "loss": 0.1084, + "step": 9216 + }, + { + "epoch": 1.38, + "learning_rate": 3.321191004117833e-07, + "loss": 0.1514, + "step": 9248 + }, + { + "epoch": 1.38, + "learning_rate": 3.295850490972442e-07, + "loss": 0.1195, + "step": 9280 + }, + { + "epoch": 1.39, + "learning_rate": 3.2705099778270505e-07, + "loss": 0.0939, + "step": 9312 + }, + { + "epoch": 1.39, + "learning_rate": 3.2451694646816595e-07, + "loss": 0.1227, + "step": 9344 + }, + { + "epoch": 1.4, + "learning_rate": 3.219828951536269e-07, + "loss": 0.1342, + "step": 9376 + }, + { + "epoch": 1.4, + "learning_rate": 3.1944884383908773e-07, + "loss": 0.1214, + "step": 9408 + }, + { + "epoch": 1.41, + "learning_rate": 3.169147925245486e-07, + "loss": 0.1624, + "step": 9440 + }, + { + "epoch": 1.41, + "learning_rate": 3.143807412100095e-07, + "loss": 0.1261, + "step": 9472 + }, + { + "epoch": 1.41, + "learning_rate": 3.1184668989547035e-07, + "loss": 0.1674, + "step": 9504 + }, + { + "epoch": 1.42, + "learning_rate": 3.0931263858093124e-07, + "loss": 0.1613, + "step": 9536 + }, + { + "epoch": 1.42, + "learning_rate": 3.0677858726639213e-07, + "loss": 0.1519, + "step": 9568 + }, + { + "epoch": 1.43, + "learning_rate": 3.04244535951853e-07, + "loss": 0.1107, + "step": 9600 + }, + { + "epoch": 1.43, + "learning_rate": 3.017104846373139e-07, + "loss": 0.0992, + "step": 9632 + }, + { + "epoch": 1.44, + "learning_rate": 2.9917643332277476e-07, + "loss": 0.0921, + "step": 9664 + }, + { + "epoch": 1.44, + "learning_rate": 2.9664238200823565e-07, + "loss": 0.1478, + "step": 9696 + }, + { + "epoch": 1.45, + "learning_rate": 2.9410833069369654e-07, + "loss": 0.1268, + "step": 9728 + }, + { + "epoch": 1.45, + "learning_rate": 2.9157427937915743e-07, + "loss": 0.1086, + "step": 9760 + }, + { + "epoch": 1.46, + "learning_rate": 2.890402280646183e-07, + "loss": 0.1913, + "step": 9792 + }, + { + "epoch": 1.46, + "learning_rate": 2.8650617675007916e-07, + "loss": 0.1626, + "step": 9824 + }, + { + "epoch": 1.47, + "learning_rate": 2.8397212543554005e-07, + "loss": 0.1647, + "step": 9856 + }, + { + "epoch": 1.47, + "learning_rate": 2.8143807412100095e-07, + "loss": 0.1245, + "step": 9888 + }, + { + "epoch": 1.48, + "learning_rate": 2.789040228064618e-07, + "loss": 0.1326, + "step": 9920 + }, + { + "epoch": 1.48, + "learning_rate": 2.7636997149192273e-07, + "loss": 0.1095, + "step": 9952 + }, + { + "epoch": 1.49, + "learning_rate": 2.738359201773836e-07, + "loss": 0.1089, + "step": 9984 + }, + { + "epoch": 1.49, + "learning_rate": 2.7130186886284446e-07, + "loss": 0.1074, + "step": 10016 + }, + { + "epoch": 1.5, + "learning_rate": 2.6876781754830535e-07, + "loss": 0.1084, + "step": 10048 + }, + { + "epoch": 1.5, + "learning_rate": 2.662337662337662e-07, + "loss": 0.162, + "step": 10080 + }, + { + "epoch": 1.51, + "learning_rate": 2.636997149192271e-07, + "loss": 0.1419, + "step": 10112 + }, + { + "epoch": 1.51, + "learning_rate": 2.61165663604688e-07, + "loss": 0.1125, + "step": 10144 + }, + { + "epoch": 1.51, + "learning_rate": 2.5863161229014886e-07, + "loss": 0.0937, + "step": 10176 + }, + { + "epoch": 1.52, + "learning_rate": 2.5609756097560976e-07, + "loss": 0.1234, + "step": 10208 + }, + { + "epoch": 1.52, + "learning_rate": 2.5356350966107065e-07, + "loss": 0.138, + "step": 10240 + }, + { + "epoch": 1.53, + "learning_rate": 2.510294583465315e-07, + "loss": 0.1753, + "step": 10272 + }, + { + "epoch": 1.53, + "learning_rate": 2.484954070319924e-07, + "loss": 0.1185, + "step": 10304 + }, + { + "epoch": 1.54, + "learning_rate": 2.4596135571745327e-07, + "loss": 0.0995, + "step": 10336 + }, + { + "epoch": 1.54, + "learning_rate": 2.4342730440291416e-07, + "loss": 0.0922, + "step": 10368 + }, + { + "epoch": 1.55, + "learning_rate": 2.40893253088375e-07, + "loss": 0.1387, + "step": 10400 + }, + { + "epoch": 1.55, + "learning_rate": 2.3835920177383592e-07, + "loss": 0.1478, + "step": 10432 + }, + { + "epoch": 1.56, + "learning_rate": 2.3582515045929678e-07, + "loss": 0.112, + "step": 10464 + }, + { + "epoch": 1.56, + "learning_rate": 2.3329109914475768e-07, + "loss": 0.1357, + "step": 10496 + }, + { + "epoch": 1.57, + "learning_rate": 2.3075704783021854e-07, + "loss": 0.1086, + "step": 10528 + }, + { + "epoch": 1.57, + "learning_rate": 2.2822299651567943e-07, + "loss": 0.1271, + "step": 10560 + }, + { + "epoch": 1.58, + "learning_rate": 2.2568894520114032e-07, + "loss": 0.1474, + "step": 10592 + }, + { + "epoch": 1.58, + "learning_rate": 2.231548938866012e-07, + "loss": 0.1282, + "step": 10624 + }, + { + "epoch": 1.59, + "learning_rate": 2.2062084257206205e-07, + "loss": 0.1723, + "step": 10656 + }, + { + "epoch": 1.59, + "learning_rate": 2.1808679125752297e-07, + "loss": 0.161, + "step": 10688 + }, + { + "epoch": 1.6, + "learning_rate": 2.156319290465632e-07, + "loss": 0.1467, + "step": 10720 + }, + { + "epoch": 1.6, + "learning_rate": 2.1309787773202406e-07, + "loss": 0.0872, + "step": 10752 + }, + { + "epoch": 1.61, + "learning_rate": 2.1056382641748495e-07, + "loss": 0.1393, + "step": 10784 + }, + { + "epoch": 1.61, + "learning_rate": 2.0802977510294584e-07, + "loss": 0.1885, + "step": 10816 + }, + { + "epoch": 1.62, + "learning_rate": 2.054957237884067e-07, + "loss": 0.0775, + "step": 10848 + }, + { + "epoch": 1.62, + "learning_rate": 2.029616724738676e-07, + "loss": 0.1347, + "step": 10880 + }, + { + "epoch": 1.62, + "learning_rate": 2.0042762115932846e-07, + "loss": 0.086, + "step": 10912 + }, + { + "epoch": 1.63, + "learning_rate": 1.9789356984478935e-07, + "loss": 0.1257, + "step": 10944 + }, + { + "epoch": 1.63, + "learning_rate": 1.9535951853025024e-07, + "loss": 0.169, + "step": 10976 + }, + { + "epoch": 1.64, + "learning_rate": 1.928254672157111e-07, + "loss": 0.1343, + "step": 11008 + }, + { + "epoch": 1.64, + "learning_rate": 1.9029141590117197e-07, + "loss": 0.097, + "step": 11040 + }, + { + "epoch": 1.65, + "learning_rate": 1.877573645866329e-07, + "loss": 0.1061, + "step": 11072 + }, + { + "epoch": 1.65, + "learning_rate": 1.8522331327209376e-07, + "loss": 0.1503, + "step": 11104 + }, + { + "epoch": 1.66, + "learning_rate": 1.8268926195755462e-07, + "loss": 0.1025, + "step": 11136 + }, + { + "epoch": 1.66, + "learning_rate": 1.8015521064301551e-07, + "loss": 0.0772, + "step": 11168 + }, + { + "epoch": 1.67, + "learning_rate": 1.776211593284764e-07, + "loss": 0.1272, + "step": 11200 + }, + { + "epoch": 1.67, + "learning_rate": 1.7508710801393727e-07, + "loss": 0.1134, + "step": 11232 + }, + { + "epoch": 1.68, + "learning_rate": 1.7255305669939816e-07, + "loss": 0.1774, + "step": 11264 + }, + { + "epoch": 1.68, + "learning_rate": 1.7001900538485903e-07, + "loss": 0.1119, + "step": 11296 + }, + { + "epoch": 1.69, + "learning_rate": 1.6748495407031992e-07, + "loss": 0.1709, + "step": 11328 + }, + { + "epoch": 1.69, + "learning_rate": 1.649509027557808e-07, + "loss": 0.1346, + "step": 11360 + }, + { + "epoch": 1.7, + "learning_rate": 1.6241685144124168e-07, + "loss": 0.1499, + "step": 11392 + }, + { + "epoch": 1.7, + "learning_rate": 1.5988280012670254e-07, + "loss": 0.1406, + "step": 11424 + }, + { + "epoch": 1.71, + "learning_rate": 1.5734874881216346e-07, + "loss": 0.1105, + "step": 11456 + }, + { + "epoch": 1.71, + "learning_rate": 1.5481469749762433e-07, + "loss": 0.1298, + "step": 11488 + }, + { + "epoch": 1.72, + "learning_rate": 1.522806461830852e-07, + "loss": 0.1309, + "step": 11520 + }, + { + "epoch": 1.72, + "learning_rate": 1.4974659486854608e-07, + "loss": 0.1059, + "step": 11552 + }, + { + "epoch": 1.72, + "learning_rate": 1.4721254355400697e-07, + "loss": 0.1232, + "step": 11584 + }, + { + "epoch": 1.73, + "learning_rate": 1.4467849223946784e-07, + "loss": 0.1357, + "step": 11616 + }, + { + "epoch": 1.73, + "learning_rate": 1.4214444092492873e-07, + "loss": 0.0904, + "step": 11648 + }, + { + "epoch": 1.74, + "learning_rate": 1.396103896103896e-07, + "loss": 0.1196, + "step": 11680 + }, + { + "epoch": 1.74, + "learning_rate": 1.370763382958505e-07, + "loss": 0.1525, + "step": 11712 + }, + { + "epoch": 1.75, + "learning_rate": 1.3454228698131138e-07, + "loss": 0.1775, + "step": 11744 + }, + { + "epoch": 1.75, + "learning_rate": 1.3200823566677224e-07, + "loss": 0.1018, + "step": 11776 + }, + { + "epoch": 1.76, + "learning_rate": 1.294741843522331e-07, + "loss": 0.0977, + "step": 11808 + }, + { + "epoch": 1.76, + "learning_rate": 1.2694013303769403e-07, + "loss": 0.118, + "step": 11840 + }, + { + "epoch": 1.77, + "learning_rate": 1.244060817231549e-07, + "loss": 0.0621, + "step": 11872 + }, + { + "epoch": 1.77, + "learning_rate": 1.2187203040861576e-07, + "loss": 0.1363, + "step": 11904 + }, + { + "epoch": 1.78, + "learning_rate": 1.1933797909407665e-07, + "loss": 0.1073, + "step": 11936 + }, + { + "epoch": 1.78, + "learning_rate": 1.1680392777953753e-07, + "loss": 0.1046, + "step": 11968 + }, + { + "epoch": 1.79, + "learning_rate": 1.1426987646499842e-07, + "loss": 0.0813, + "step": 12000 + }, + { + "epoch": 1.79, + "learning_rate": 1.1173582515045929e-07, + "loss": 0.1154, + "step": 12032 + }, + { + "epoch": 1.8, + "learning_rate": 1.0920177383592018e-07, + "loss": 0.1207, + "step": 12064 + }, + { + "epoch": 1.8, + "learning_rate": 1.0666772252138106e-07, + "loss": 0.1177, + "step": 12096 + }, + { + "epoch": 1.81, + "learning_rate": 1.0413367120684193e-07, + "loss": 0.1019, + "step": 12128 + }, + { + "epoch": 1.81, + "learning_rate": 1.0159961989230281e-07, + "loss": 0.0776, + "step": 12160 + }, + { + "epoch": 1.82, + "learning_rate": 9.90655685777637e-08, + "loss": 0.176, + "step": 12192 + }, + { + "epoch": 1.82, + "learning_rate": 9.653151726322457e-08, + "loss": 0.1396, + "step": 12224 + }, + { + "epoch": 1.82, + "learning_rate": 9.399746594868546e-08, + "loss": 0.1733, + "step": 12256 + }, + { + "epoch": 1.83, + "learning_rate": 9.146341463414634e-08, + "loss": 0.1555, + "step": 12288 + }, + { + "epoch": 1.83, + "learning_rate": 8.892936331960722e-08, + "loss": 0.1402, + "step": 12320 + }, + { + "epoch": 1.84, + "learning_rate": 8.63953120050681e-08, + "loss": 0.1134, + "step": 12352 + }, + { + "epoch": 1.84, + "learning_rate": 8.386126069052899e-08, + "loss": 0.1745, + "step": 12384 + }, + { + "epoch": 1.85, + "learning_rate": 8.132720937598985e-08, + "loss": 0.1001, + "step": 12416 + }, + { + "epoch": 1.85, + "learning_rate": 7.879315806145075e-08, + "loss": 0.0996, + "step": 12448 + }, + { + "epoch": 1.86, + "learning_rate": 7.625910674691162e-08, + "loss": 0.0759, + "step": 12480 + }, + { + "epoch": 1.86, + "learning_rate": 7.37250554323725e-08, + "loss": 0.0885, + "step": 12512 + }, + { + "epoch": 1.87, + "learning_rate": 7.119100411783338e-08, + "loss": 0.1775, + "step": 12544 + }, + { + "epoch": 1.87, + "learning_rate": 6.865695280329427e-08, + "loss": 0.0895, + "step": 12576 + }, + { + "epoch": 1.88, + "learning_rate": 6.612290148875514e-08, + "loss": 0.1475, + "step": 12608 + }, + { + "epoch": 1.88, + "learning_rate": 6.358885017421603e-08, + "loss": 0.1242, + "step": 12640 + }, + { + "epoch": 1.89, + "learning_rate": 6.105479885967691e-08, + "loss": 0.149, + "step": 12672 + }, + { + "epoch": 1.89, + "learning_rate": 5.8520747545137786e-08, + "loss": 0.0646, + "step": 12704 + }, + { + "epoch": 1.9, + "learning_rate": 5.5986696230598664e-08, + "loss": 0.1511, + "step": 12736 + }, + { + "epoch": 1.9, + "learning_rate": 5.345264491605954e-08, + "loss": 0.1567, + "step": 12768 + }, + { + "epoch": 1.91, + "learning_rate": 5.091859360152043e-08, + "loss": 0.1383, + "step": 12800 + }, + { + "epoch": 1.91, + "learning_rate": 4.8384542286981306e-08, + "loss": 0.153, + "step": 12832 + }, + { + "epoch": 1.92, + "learning_rate": 4.5850490972442185e-08, + "loss": 0.15, + "step": 12864 + }, + { + "epoch": 1.92, + "learning_rate": 4.331643965790307e-08, + "loss": 0.1292, + "step": 12896 + }, + { + "epoch": 1.92, + "learning_rate": 4.078238834336395e-08, + "loss": 0.1298, + "step": 12928 + }, + { + "epoch": 1.93, + "learning_rate": 3.824833702882483e-08, + "loss": 0.1317, + "step": 12960 + }, + { + "epoch": 1.93, + "learning_rate": 3.571428571428571e-08, + "loss": 0.0836, + "step": 12992 + }, + { + "epoch": 1.94, + "learning_rate": 3.318023439974659e-08, + "loss": 0.0985, + "step": 13024 + }, + { + "epoch": 1.94, + "learning_rate": 3.0646183085207475e-08, + "loss": 0.0995, + "step": 13056 + }, + { + "epoch": 1.95, + "learning_rate": 2.8112131770668357e-08, + "loss": 0.0664, + "step": 13088 + }, + { + "epoch": 1.95, + "learning_rate": 2.5657269559708584e-08, + "loss": 0.1424, + "step": 13120 + }, + { + "epoch": 1.96, + "learning_rate": 2.3123218245169466e-08, + "loss": 0.1044, + "step": 13152 + }, + { + "epoch": 1.96, + "learning_rate": 2.0589166930630344e-08, + "loss": 0.1377, + "step": 13184 + }, + { + "epoch": 1.97, + "learning_rate": 1.8055115616091226e-08, + "loss": 0.1469, + "step": 13216 + }, + { + "epoch": 1.97, + "learning_rate": 1.5521064301552108e-08, + "loss": 0.0877, + "step": 13248 + }, + { + "epoch": 1.98, + "learning_rate": 1.2987012987012988e-08, + "loss": 0.139, + "step": 13280 + }, + { + "epoch": 1.98, + "learning_rate": 1.0452961672473868e-08, + "loss": 0.1598, + "step": 13312 + }, + { + "epoch": 1.99, + "learning_rate": 7.918910357934748e-09, + "loss": 0.1874, + "step": 13344 + }, + { + "epoch": 1.99, + "learning_rate": 5.384859043395628e-09, + "loss": 0.1875, + "step": 13376 + }, + { + "epoch": 2.0, + "learning_rate": 2.8508077288565093e-09, + "loss": 0.1206, + "step": 13408 + } + ], + "logging_steps": 32, + "max_steps": 13434, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 3358, + "total_flos": 5.70368663027712e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-20149/training_args.bin b/checkpoint-20149/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2c662ef5476ddd668bfd2aa78effb1b4e6129c1 --- /dev/null +++ b/checkpoint-20149/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e4db6f6fcaf2005bb3f583cfbd6e7afdd0dfdcdb7d4db7e107d5c22865fcf47 +size 5048 diff --git a/checkpoint-6717/config.json b/checkpoint-6717/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8ed74527c8219d428d1f813aa49a362ce97275 --- /dev/null +++ b/checkpoint-6717/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "openai/whisper-large-v2", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-6717/generation_config.json b/checkpoint-6717/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-6717/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-6717/model-00001-of-00002.safetensors b/checkpoint-6717/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1ed51241be202400255bcff765d28a0bf04aa29 --- /dev/null +++ b/checkpoint-6717/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cbea7bf3e0a471c26c40067b5e0a880f36a63f3e9024e97afdec04184851c33 +size 4992706480 diff --git a/checkpoint-6717/model-00002-of-00002.safetensors b/checkpoint-6717/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8603a3a9a2116d01f7bbdb7d1d406583f7f3424d --- /dev/null +++ b/checkpoint-6717/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49150d638e0ced917107f0af865392158918de17427afb8e34d0daa72ed7fbeb +size 1180663192 diff --git a/checkpoint-6717/model.safetensors.index.json b/checkpoint-6717/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-6717/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-6717/optimizer.pt b/checkpoint-6717/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..95e86bb9bfb25cbf431f3cca484687c9716d55fc --- /dev/null +++ b/checkpoint-6717/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ced3b7553c984f99429fdd37b1eb83bcda1bed0400a6c81c76635140fe4a5653 +size 3095074288 diff --git a/checkpoint-6717/preprocessor_config.json b/checkpoint-6717/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-6717/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-6717/rng_state.pth b/checkpoint-6717/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ab70f1767d4d8c0c3ef5e68bc095412607941487 --- /dev/null +++ b/checkpoint-6717/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f6d1d3d0751e2edf28d0430761a90aa69f15f2f62574160ff3315fc30791a05 +size 14244 diff --git a/checkpoint-6717/scheduler.pt b/checkpoint-6717/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8dcb15eca7dbf6261fd3cd52d76067ff4e30a033 --- /dev/null +++ b/checkpoint-6717/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b6ba11422234178c10f4354f4419d9a209941aa16700e6a46cc211162f5ce42 +size 1064 diff --git a/checkpoint-6717/trainer_state.json b/checkpoint-6717/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..21ac5dc4b1f91f3b2dbb6f861976cd8c1283d011 --- /dev/null +++ b/checkpoint-6717/trainer_state.json @@ -0,0 +1,1275 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 6717, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 7.196029776674937e-08, + "loss": 4.7214, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 1.4640198511166252e-07, + "loss": 4.5528, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 2.2580645161290322e-07, + "loss": 3.8865, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 3.052109181141439e-07, + "loss": 3.1638, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 3.8461538461538463e-07, + "loss": 2.3349, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 4.640198511166253e-07, + "loss": 1.7851, + "step": 192 + }, + { + "epoch": 0.03, + "learning_rate": 5.43424317617866e-07, + "loss": 1.4686, + "step": 224 + }, + { + "epoch": 0.04, + "learning_rate": 6.228287841191067e-07, + "loss": 1.3682, + "step": 256 + }, + { + "epoch": 0.04, + "learning_rate": 7.022332506203473e-07, + "loss": 1.1261, + "step": 288 + }, + { + "epoch": 0.05, + "learning_rate": 7.816377171215881e-07, + "loss": 0.7135, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 8.610421836228287e-07, + "loss": 0.3729, + "step": 352 + }, + { + "epoch": 0.06, + "learning_rate": 9.404466501240694e-07, + "loss": 0.3237, + "step": 384 + }, + { + "epoch": 0.06, + "learning_rate": 9.987329743427303e-07, + "loss": 0.2622, + "step": 416 + }, + { + "epoch": 0.07, + "learning_rate": 9.936648717136522e-07, + "loss": 0.3261, + "step": 448 + }, + { + "epoch": 0.07, + "learning_rate": 9.885967690845739e-07, + "loss": 0.3324, + "step": 480 + }, + { + "epoch": 0.08, + "learning_rate": 9.835286664554958e-07, + "loss": 0.295, + "step": 512 + }, + { + "epoch": 0.08, + "learning_rate": 9.784605638264175e-07, + "loss": 0.2994, + "step": 544 + }, + { + "epoch": 0.09, + "learning_rate": 9.733924611973391e-07, + "loss": 0.3807, + "step": 576 + }, + { + "epoch": 0.09, + "learning_rate": 9.68324358568261e-07, + "loss": 0.2923, + "step": 608 + }, + { + "epoch": 0.1, + "learning_rate": 9.632562559391827e-07, + "loss": 0.2129, + "step": 640 + }, + { + "epoch": 0.1, + "learning_rate": 9.581881533101046e-07, + "loss": 0.3763, + "step": 672 + }, + { + "epoch": 0.1, + "learning_rate": 9.531200506810263e-07, + "loss": 0.3443, + "step": 704 + }, + { + "epoch": 0.11, + "learning_rate": 9.480519480519479e-07, + "loss": 0.3744, + "step": 736 + }, + { + "epoch": 0.11, + "learning_rate": 9.429838454228698e-07, + "loss": 0.1972, + "step": 768 + }, + { + "epoch": 0.12, + "learning_rate": 9.379157427937915e-07, + "loss": 0.3089, + "step": 800 + }, + { + "epoch": 0.12, + "learning_rate": 9.328476401647133e-07, + "loss": 0.2917, + "step": 832 + }, + { + "epoch": 0.13, + "learning_rate": 9.277795375356351e-07, + "loss": 0.2499, + "step": 864 + }, + { + "epoch": 0.13, + "learning_rate": 9.227114349065568e-07, + "loss": 0.2822, + "step": 896 + }, + { + "epoch": 0.14, + "learning_rate": 9.176433322774785e-07, + "loss": 0.2675, + "step": 928 + }, + { + "epoch": 0.14, + "learning_rate": 9.125752296484004e-07, + "loss": 0.3076, + "step": 960 + }, + { + "epoch": 0.15, + "learning_rate": 9.075071270193221e-07, + "loss": 0.3385, + "step": 992 + }, + { + "epoch": 0.15, + "learning_rate": 9.024390243902439e-07, + "loss": 0.2867, + "step": 1024 + }, + { + "epoch": 0.16, + "learning_rate": 8.973709217611656e-07, + "loss": 0.2401, + "step": 1056 + }, + { + "epoch": 0.16, + "learning_rate": 8.923028191320873e-07, + "loss": 0.2954, + "step": 1088 + }, + { + "epoch": 0.17, + "learning_rate": 8.872347165030091e-07, + "loss": 0.2139, + "step": 1120 + }, + { + "epoch": 0.17, + "learning_rate": 8.821666138739309e-07, + "loss": 0.2839, + "step": 1152 + }, + { + "epoch": 0.18, + "learning_rate": 8.770985112448527e-07, + "loss": 0.2057, + "step": 1184 + }, + { + "epoch": 0.18, + "learning_rate": 8.720304086157745e-07, + "loss": 0.3533, + "step": 1216 + }, + { + "epoch": 0.19, + "learning_rate": 8.669623059866962e-07, + "loss": 0.246, + "step": 1248 + }, + { + "epoch": 0.19, + "learning_rate": 8.618942033576179e-07, + "loss": 0.2183, + "step": 1280 + }, + { + "epoch": 0.2, + "learning_rate": 8.568261007285396e-07, + "loss": 0.2467, + "step": 1312 + }, + { + "epoch": 0.2, + "learning_rate": 8.517579980994615e-07, + "loss": 0.2425, + "step": 1344 + }, + { + "epoch": 0.2, + "learning_rate": 8.466898954703833e-07, + "loss": 0.3074, + "step": 1376 + }, + { + "epoch": 0.21, + "learning_rate": 8.41621792841305e-07, + "loss": 0.2242, + "step": 1408 + }, + { + "epoch": 0.21, + "learning_rate": 8.365536902122268e-07, + "loss": 0.2959, + "step": 1440 + }, + { + "epoch": 0.22, + "learning_rate": 8.314855875831485e-07, + "loss": 0.3424, + "step": 1472 + }, + { + "epoch": 0.22, + "learning_rate": 8.264174849540702e-07, + "loss": 0.356, + "step": 1504 + }, + { + "epoch": 0.23, + "learning_rate": 8.213493823249921e-07, + "loss": 0.2976, + "step": 1536 + }, + { + "epoch": 0.23, + "learning_rate": 8.162812796959138e-07, + "loss": 0.2889, + "step": 1568 + }, + { + "epoch": 0.24, + "learning_rate": 8.112131770668356e-07, + "loss": 0.2759, + "step": 1600 + }, + { + "epoch": 0.24, + "learning_rate": 8.061450744377573e-07, + "loss": 0.2287, + "step": 1632 + }, + { + "epoch": 0.25, + "learning_rate": 8.01076971808679e-07, + "loss": 0.2485, + "step": 1664 + }, + { + "epoch": 0.25, + "learning_rate": 7.960088691796008e-07, + "loss": 0.1535, + "step": 1696 + }, + { + "epoch": 0.26, + "learning_rate": 7.909407665505227e-07, + "loss": 0.165, + "step": 1728 + }, + { + "epoch": 0.26, + "learning_rate": 7.858726639214444e-07, + "loss": 0.3974, + "step": 1760 + }, + { + "epoch": 0.27, + "learning_rate": 7.808045612923662e-07, + "loss": 0.2746, + "step": 1792 + }, + { + "epoch": 0.27, + "learning_rate": 7.757364586632878e-07, + "loss": 0.217, + "step": 1824 + }, + { + "epoch": 0.28, + "learning_rate": 7.706683560342096e-07, + "loss": 0.168, + "step": 1856 + }, + { + "epoch": 0.28, + "learning_rate": 7.656002534051315e-07, + "loss": 0.255, + "step": 1888 + }, + { + "epoch": 0.29, + "learning_rate": 7.605321507760532e-07, + "loss": 0.1905, + "step": 1920 + }, + { + "epoch": 0.29, + "learning_rate": 7.55464048146975e-07, + "loss": 0.2732, + "step": 1952 + }, + { + "epoch": 0.3, + "learning_rate": 7.503959455178968e-07, + "loss": 0.2436, + "step": 1984 + }, + { + "epoch": 0.3, + "learning_rate": 7.453278428888184e-07, + "loss": 0.2511, + "step": 2016 + }, + { + "epoch": 0.3, + "learning_rate": 7.402597402597402e-07, + "loss": 0.2719, + "step": 2048 + }, + { + "epoch": 0.31, + "learning_rate": 7.351916376306619e-07, + "loss": 0.2737, + "step": 2080 + }, + { + "epoch": 0.31, + "learning_rate": 7.301235350015838e-07, + "loss": 0.2588, + "step": 2112 + }, + { + "epoch": 0.32, + "learning_rate": 7.250554323725056e-07, + "loss": 0.2455, + "step": 2144 + }, + { + "epoch": 0.32, + "learning_rate": 7.199873297434272e-07, + "loss": 0.2213, + "step": 2176 + }, + { + "epoch": 0.33, + "learning_rate": 7.14919227114349e-07, + "loss": 0.2366, + "step": 2208 + }, + { + "epoch": 0.33, + "learning_rate": 7.098511244852708e-07, + "loss": 0.2187, + "step": 2240 + }, + { + "epoch": 0.34, + "learning_rate": 7.047830218561926e-07, + "loss": 0.3166, + "step": 2272 + }, + { + "epoch": 0.34, + "learning_rate": 6.997149192271144e-07, + "loss": 0.2254, + "step": 2304 + }, + { + "epoch": 0.35, + "learning_rate": 6.94646816598036e-07, + "loss": 0.2592, + "step": 2336 + }, + { + "epoch": 0.35, + "learning_rate": 6.895787139689578e-07, + "loss": 0.193, + "step": 2368 + }, + { + "epoch": 0.36, + "learning_rate": 6.845106113398796e-07, + "loss": 0.2309, + "step": 2400 + }, + { + "epoch": 0.36, + "learning_rate": 6.794425087108013e-07, + "loss": 0.3267, + "step": 2432 + }, + { + "epoch": 0.37, + "learning_rate": 6.743744060817232e-07, + "loss": 0.2224, + "step": 2464 + }, + { + "epoch": 0.37, + "learning_rate": 6.69306303452645e-07, + "loss": 0.2075, + "step": 2496 + }, + { + "epoch": 0.38, + "learning_rate": 6.642382008235666e-07, + "loss": 0.289, + "step": 2528 + }, + { + "epoch": 0.38, + "learning_rate": 6.591700981944884e-07, + "loss": 0.2122, + "step": 2560 + }, + { + "epoch": 0.39, + "learning_rate": 6.541019955654101e-07, + "loss": 0.2358, + "step": 2592 + }, + { + "epoch": 0.39, + "learning_rate": 6.490338929363319e-07, + "loss": 0.1704, + "step": 2624 + }, + { + "epoch": 0.4, + "learning_rate": 6.439657903072538e-07, + "loss": 0.2362, + "step": 2656 + }, + { + "epoch": 0.4, + "learning_rate": 6.388976876781755e-07, + "loss": 0.3415, + "step": 2688 + }, + { + "epoch": 0.4, + "learning_rate": 6.338295850490972e-07, + "loss": 0.2514, + "step": 2720 + }, + { + "epoch": 0.41, + "learning_rate": 6.28761482420019e-07, + "loss": 0.2282, + "step": 2752 + }, + { + "epoch": 0.41, + "learning_rate": 6.236933797909407e-07, + "loss": 0.1534, + "step": 2784 + }, + { + "epoch": 0.42, + "learning_rate": 6.186252771618625e-07, + "loss": 0.1802, + "step": 2816 + }, + { + "epoch": 0.42, + "learning_rate": 6.137155527399429e-07, + "loss": 0.2968, + "step": 2848 + }, + { + "epoch": 0.43, + "learning_rate": 6.086474501108647e-07, + "loss": 0.2895, + "step": 2880 + }, + { + "epoch": 0.43, + "learning_rate": 6.035793474817865e-07, + "loss": 0.1974, + "step": 2912 + }, + { + "epoch": 0.44, + "learning_rate": 5.985112448527082e-07, + "loss": 0.2907, + "step": 2944 + }, + { + "epoch": 0.44, + "learning_rate": 5.934431422236301e-07, + "loss": 0.2532, + "step": 2976 + }, + { + "epoch": 0.45, + "learning_rate": 5.883750395945518e-07, + "loss": 0.2646, + "step": 3008 + }, + { + "epoch": 0.45, + "learning_rate": 5.833069369654735e-07, + "loss": 0.2245, + "step": 3040 + }, + { + "epoch": 0.46, + "learning_rate": 5.782388343363953e-07, + "loss": 0.2016, + "step": 3072 + }, + { + "epoch": 0.46, + "learning_rate": 5.73170731707317e-07, + "loss": 0.2196, + "step": 3104 + }, + { + "epoch": 0.47, + "learning_rate": 5.681026290782388e-07, + "loss": 0.2708, + "step": 3136 + }, + { + "epoch": 0.47, + "learning_rate": 5.630345264491606e-07, + "loss": 0.2212, + "step": 3168 + }, + { + "epoch": 0.48, + "learning_rate": 5.579664238200823e-07, + "loss": 0.2534, + "step": 3200 + }, + { + "epoch": 0.48, + "learning_rate": 5.528983211910041e-07, + "loss": 0.3166, + "step": 3232 + }, + { + "epoch": 0.49, + "learning_rate": 5.478302185619259e-07, + "loss": 0.258, + "step": 3264 + }, + { + "epoch": 0.49, + "learning_rate": 5.427621159328476e-07, + "loss": 0.2732, + "step": 3296 + }, + { + "epoch": 0.5, + "learning_rate": 5.376940133037694e-07, + "loss": 0.238, + "step": 3328 + }, + { + "epoch": 0.5, + "learning_rate": 5.326259106746911e-07, + "loss": 0.274, + "step": 3360 + }, + { + "epoch": 0.5, + "learning_rate": 5.275578080456129e-07, + "loss": 0.2804, + "step": 3392 + }, + { + "epoch": 0.51, + "learning_rate": 5.224897054165347e-07, + "loss": 0.2292, + "step": 3424 + }, + { + "epoch": 0.51, + "learning_rate": 5.174216027874564e-07, + "loss": 0.2854, + "step": 3456 + }, + { + "epoch": 0.52, + "learning_rate": 5.123535001583782e-07, + "loss": 0.2529, + "step": 3488 + }, + { + "epoch": 0.52, + "learning_rate": 5.072853975292999e-07, + "loss": 0.2417, + "step": 3520 + }, + { + "epoch": 0.53, + "learning_rate": 5.022172949002217e-07, + "loss": 0.2774, + "step": 3552 + }, + { + "epoch": 0.53, + "learning_rate": 4.971491922711435e-07, + "loss": 0.249, + "step": 3584 + }, + { + "epoch": 0.54, + "learning_rate": 4.920810896420652e-07, + "loss": 0.2097, + "step": 3616 + }, + { + "epoch": 0.54, + "learning_rate": 4.87012987012987e-07, + "loss": 0.2589, + "step": 3648 + }, + { + "epoch": 0.55, + "learning_rate": 4.819448843839088e-07, + "loss": 0.2084, + "step": 3680 + }, + { + "epoch": 0.55, + "learning_rate": 4.768767817548305e-07, + "loss": 0.2122, + "step": 3712 + }, + { + "epoch": 0.56, + "learning_rate": 4.7180867912575227e-07, + "loss": 0.3507, + "step": 3744 + }, + { + "epoch": 0.56, + "learning_rate": 4.6674057649667405e-07, + "loss": 0.2697, + "step": 3776 + }, + { + "epoch": 0.57, + "learning_rate": 4.616724738675958e-07, + "loss": 0.1677, + "step": 3808 + }, + { + "epoch": 0.57, + "learning_rate": 4.5660437123851757e-07, + "loss": 0.2808, + "step": 3840 + }, + { + "epoch": 0.58, + "learning_rate": 4.5153626860943935e-07, + "loss": 0.2708, + "step": 3872 + }, + { + "epoch": 0.58, + "learning_rate": 4.464681659803611e-07, + "loss": 0.2201, + "step": 3904 + }, + { + "epoch": 0.59, + "learning_rate": 4.414000633512828e-07, + "loss": 0.168, + "step": 3936 + }, + { + "epoch": 0.59, + "learning_rate": 4.3633196072220465e-07, + "loss": 0.2336, + "step": 3968 + }, + { + "epoch": 0.6, + "learning_rate": 4.312638580931264e-07, + "loss": 0.3067, + "step": 4000 + }, + { + "epoch": 0.6, + "learning_rate": 4.261957554640481e-07, + "loss": 0.3077, + "step": 4032 + }, + { + "epoch": 0.61, + "learning_rate": 4.211276528349699e-07, + "loss": 0.1911, + "step": 4064 + }, + { + "epoch": 0.61, + "learning_rate": 4.160595502058917e-07, + "loss": 0.227, + "step": 4096 + }, + { + "epoch": 0.61, + "learning_rate": 4.109914475768134e-07, + "loss": 0.2105, + "step": 4128 + }, + { + "epoch": 0.62, + "learning_rate": 4.059233449477352e-07, + "loss": 0.1843, + "step": 4160 + }, + { + "epoch": 0.62, + "learning_rate": 4.008552423186569e-07, + "loss": 0.2107, + "step": 4192 + }, + { + "epoch": 0.63, + "learning_rate": 3.957871396895787e-07, + "loss": 0.204, + "step": 4224 + }, + { + "epoch": 0.63, + "learning_rate": 3.907190370605005e-07, + "loss": 0.33, + "step": 4256 + }, + { + "epoch": 0.64, + "learning_rate": 3.856509344314222e-07, + "loss": 0.2357, + "step": 4288 + }, + { + "epoch": 0.64, + "learning_rate": 3.8058283180234395e-07, + "loss": 0.2259, + "step": 4320 + }, + { + "epoch": 0.65, + "learning_rate": 3.755147291732658e-07, + "loss": 0.2652, + "step": 4352 + }, + { + "epoch": 0.65, + "learning_rate": 3.704466265441875e-07, + "loss": 0.3144, + "step": 4384 + }, + { + "epoch": 0.66, + "learning_rate": 3.6537852391510925e-07, + "loss": 0.2105, + "step": 4416 + }, + { + "epoch": 0.66, + "learning_rate": 3.6031042128603103e-07, + "loss": 0.2584, + "step": 4448 + }, + { + "epoch": 0.67, + "learning_rate": 3.552423186569528e-07, + "loss": 0.2144, + "step": 4480 + }, + { + "epoch": 0.67, + "learning_rate": 3.5017421602787454e-07, + "loss": 0.1907, + "step": 4512 + }, + { + "epoch": 0.68, + "learning_rate": 3.451061133987963e-07, + "loss": 0.2485, + "step": 4544 + }, + { + "epoch": 0.68, + "learning_rate": 3.4003801076971806e-07, + "loss": 0.25, + "step": 4576 + }, + { + "epoch": 0.69, + "learning_rate": 3.3496990814063984e-07, + "loss": 0.2195, + "step": 4608 + }, + { + "epoch": 0.69, + "learning_rate": 3.299018055115616e-07, + "loss": 0.2037, + "step": 4640 + }, + { + "epoch": 0.7, + "learning_rate": 3.2483370288248335e-07, + "loss": 0.1899, + "step": 4672 + }, + { + "epoch": 0.7, + "learning_rate": 3.197656002534051e-07, + "loss": 0.2469, + "step": 4704 + }, + { + "epoch": 0.71, + "learning_rate": 3.146974976243269e-07, + "loss": 0.3028, + "step": 4736 + }, + { + "epoch": 0.71, + "learning_rate": 3.0962939499524865e-07, + "loss": 0.2007, + "step": 4768 + }, + { + "epoch": 0.71, + "learning_rate": 3.045612923661704e-07, + "loss": 0.2162, + "step": 4800 + }, + { + "epoch": 0.72, + "learning_rate": 2.9949318973709216e-07, + "loss": 0.2541, + "step": 4832 + }, + { + "epoch": 0.72, + "learning_rate": 2.9442508710801395e-07, + "loss": 0.2467, + "step": 4864 + }, + { + "epoch": 0.73, + "learning_rate": 2.893569844789357e-07, + "loss": 0.1974, + "step": 4896 + }, + { + "epoch": 0.73, + "learning_rate": 2.8428888184985746e-07, + "loss": 0.2263, + "step": 4928 + }, + { + "epoch": 0.74, + "learning_rate": 2.792207792207792e-07, + "loss": 0.2616, + "step": 4960 + }, + { + "epoch": 0.74, + "learning_rate": 2.74152676591701e-07, + "loss": 0.2685, + "step": 4992 + }, + { + "epoch": 0.75, + "learning_rate": 2.6908457396262276e-07, + "loss": 0.2122, + "step": 5024 + }, + { + "epoch": 0.75, + "learning_rate": 2.640164713335445e-07, + "loss": 0.2345, + "step": 5056 + }, + { + "epoch": 0.76, + "learning_rate": 2.589483687044662e-07, + "loss": 0.2311, + "step": 5088 + }, + { + "epoch": 0.76, + "learning_rate": 2.5388026607538806e-07, + "loss": 0.2501, + "step": 5120 + }, + { + "epoch": 0.77, + "learning_rate": 2.488121634463098e-07, + "loss": 0.2036, + "step": 5152 + }, + { + "epoch": 0.77, + "learning_rate": 2.437440608172315e-07, + "loss": 0.212, + "step": 5184 + }, + { + "epoch": 0.78, + "learning_rate": 2.386759581881533e-07, + "loss": 0.2717, + "step": 5216 + }, + { + "epoch": 0.78, + "learning_rate": 2.3360785555907506e-07, + "loss": 0.2736, + "step": 5248 + }, + { + "epoch": 0.79, + "learning_rate": 2.2853975292999684e-07, + "loss": 0.2362, + "step": 5280 + }, + { + "epoch": 0.79, + "learning_rate": 2.2347165030091857e-07, + "loss": 0.2285, + "step": 5312 + }, + { + "epoch": 0.8, + "learning_rate": 2.1840354767184035e-07, + "loss": 0.2892, + "step": 5344 + }, + { + "epoch": 0.8, + "learning_rate": 2.133354450427621e-07, + "loss": 0.2337, + "step": 5376 + }, + { + "epoch": 0.81, + "learning_rate": 2.0826734241368387e-07, + "loss": 0.2117, + "step": 5408 + }, + { + "epoch": 0.81, + "learning_rate": 2.0319923978460563e-07, + "loss": 0.2257, + "step": 5440 + }, + { + "epoch": 0.81, + "learning_rate": 1.981311371555274e-07, + "loss": 0.2819, + "step": 5472 + }, + { + "epoch": 0.82, + "learning_rate": 1.9306303452644914e-07, + "loss": 0.1814, + "step": 5504 + }, + { + "epoch": 0.82, + "learning_rate": 1.8799493189737092e-07, + "loss": 0.1349, + "step": 5536 + }, + { + "epoch": 0.83, + "learning_rate": 1.8292682926829268e-07, + "loss": 0.2373, + "step": 5568 + }, + { + "epoch": 0.83, + "learning_rate": 1.7785872663921444e-07, + "loss": 0.28, + "step": 5600 + }, + { + "epoch": 0.84, + "learning_rate": 1.727906240101362e-07, + "loss": 0.2024, + "step": 5632 + }, + { + "epoch": 0.84, + "learning_rate": 1.6772252138105798e-07, + "loss": 0.2169, + "step": 5664 + }, + { + "epoch": 0.85, + "learning_rate": 1.628127969591384e-07, + "loss": 0.2039, + "step": 5696 + }, + { + "epoch": 0.85, + "learning_rate": 1.577446943300602e-07, + "loss": 0.2612, + "step": 5728 + }, + { + "epoch": 0.86, + "learning_rate": 1.5267659170098192e-07, + "loss": 0.3311, + "step": 5760 + }, + { + "epoch": 0.86, + "learning_rate": 1.476084890719037e-07, + "loss": 0.2063, + "step": 5792 + }, + { + "epoch": 0.87, + "learning_rate": 1.4254038644282546e-07, + "loss": 0.1902, + "step": 5824 + }, + { + "epoch": 0.87, + "learning_rate": 1.3747228381374722e-07, + "loss": 0.2569, + "step": 5856 + }, + { + "epoch": 0.88, + "learning_rate": 1.3240418118466898e-07, + "loss": 0.1476, + "step": 5888 + }, + { + "epoch": 0.88, + "learning_rate": 1.2733607855559076e-07, + "loss": 0.2325, + "step": 5920 + }, + { + "epoch": 0.89, + "learning_rate": 1.222679759265125e-07, + "loss": 0.219, + "step": 5952 + }, + { + "epoch": 0.89, + "learning_rate": 1.1719987329743426e-07, + "loss": 0.2189, + "step": 5984 + }, + { + "epoch": 0.9, + "learning_rate": 1.1213177066835602e-07, + "loss": 0.2202, + "step": 6016 + }, + { + "epoch": 0.9, + "learning_rate": 1.0706366803927779e-07, + "loss": 0.161, + "step": 6048 + }, + { + "epoch": 0.91, + "learning_rate": 1.0199556541019955e-07, + "loss": 0.2635, + "step": 6080 + }, + { + "epoch": 0.91, + "learning_rate": 9.69274627811213e-08, + "loss": 0.2975, + "step": 6112 + }, + { + "epoch": 0.91, + "learning_rate": 9.185936015204307e-08, + "loss": 0.1722, + "step": 6144 + }, + { + "epoch": 0.92, + "learning_rate": 8.679125752296483e-08, + "loss": 0.25, + "step": 6176 + }, + { + "epoch": 0.92, + "learning_rate": 8.172315489388659e-08, + "loss": 0.2154, + "step": 6208 + }, + { + "epoch": 0.93, + "learning_rate": 7.665505226480836e-08, + "loss": 0.1769, + "step": 6240 + }, + { + "epoch": 0.93, + "learning_rate": 7.158694963573011e-08, + "loss": 0.226, + "step": 6272 + }, + { + "epoch": 0.94, + "learning_rate": 6.651884700665187e-08, + "loss": 0.2441, + "step": 6304 + }, + { + "epoch": 0.94, + "learning_rate": 6.145074437757364e-08, + "loss": 0.2062, + "step": 6336 + }, + { + "epoch": 0.95, + "learning_rate": 5.6382641748495405e-08, + "loss": 0.2777, + "step": 6368 + }, + { + "epoch": 0.95, + "learning_rate": 5.131453911941717e-08, + "loss": 0.1502, + "step": 6400 + }, + { + "epoch": 0.96, + "learning_rate": 4.624643649033893e-08, + "loss": 0.2044, + "step": 6432 + }, + { + "epoch": 0.96, + "learning_rate": 4.117833386126069e-08, + "loss": 0.2089, + "step": 6464 + }, + { + "epoch": 0.97, + "learning_rate": 3.611023123218245e-08, + "loss": 0.2773, + "step": 6496 + }, + { + "epoch": 0.97, + "learning_rate": 3.1042128603104216e-08, + "loss": 0.1716, + "step": 6528 + }, + { + "epoch": 0.98, + "learning_rate": 2.5974025974025976e-08, + "loss": 0.1976, + "step": 6560 + }, + { + "epoch": 0.98, + "learning_rate": 2.0905923344947736e-08, + "loss": 0.2176, + "step": 6592 + }, + { + "epoch": 0.99, + "learning_rate": 1.5837820715869496e-08, + "loss": 0.2487, + "step": 6624 + }, + { + "epoch": 0.99, + "learning_rate": 1.0769718086791257e-08, + "loss": 0.1841, + "step": 6656 + }, + { + "epoch": 1.0, + "learning_rate": 5.7016154577130185e-09, + "loss": 0.2355, + "step": 6688 + } + ], + "logging_steps": 32, + "max_steps": 6717, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 200, + "total_flos": 2.85226794934272e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-6717/training_args.bin b/checkpoint-6717/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e53d20e4b258f902224d5fc52a7bc802862aa91d --- /dev/null +++ b/checkpoint-6717/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da0284d0bb5f66a95e93a398f925b63585e9f3b3ccdf040165b16e3c61043b1c +size 4984 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dbe4666ca61b271d1e7307d3cdd7e387a877b181 --- /dev/null +++ b/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "models/openai/whisper-large-v2/finetune/jacob_filter/checkpoint-6717", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc1a2a5890f79fa9ae022bebac3e71c361fa2759 --- /dev/null +++ b/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d5a1cc6905c607c21d266bbbad40340a367dc8cb00c24e0aed61a5c7a6d4225 +size 4992706480 diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f615266326f992079ba82436e22f8f731c72563 --- /dev/null +++ b/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c64f8b10dd7899ce9313f0a7a81a18faffd70ec749c31b4a0428eb4881d13c18 +size 1180663192 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2c662ef5476ddd668bfd2aa78effb1b4e6129c1 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e4db6f6fcaf2005bb3f583cfbd6e7afdd0dfdcdb7d4db7e107d5c22865fcf47 +size 5048