markus.pernpointner commited on
Commit
59d2d00
·
1 Parent(s): d0d07f3

initial commit of Pharia4608 embedding control model

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.yml +227 -0
  2. model_state_layer_0_EmbeddingInput.pt +3 -0
  3. model_state_layer_10_TransformerLayer.pt +3 -0
  4. model_state_layer_10_TransformerLayer_adapter_embed.pt +3 -0
  5. model_state_layer_11_TransformerLayer.pt +3 -0
  6. model_state_layer_11_TransformerLayer_adapter_embed.pt +3 -0
  7. model_state_layer_12_TransformerLayer.pt +3 -0
  8. model_state_layer_12_TransformerLayer_adapter_embed.pt +3 -0
  9. model_state_layer_13_TransformerLayer.pt +3 -0
  10. model_state_layer_13_TransformerLayer_adapter_embed.pt +3 -0
  11. model_state_layer_14_TransformerLayer.pt +3 -0
  12. model_state_layer_14_TransformerLayer_adapter_embed.pt +3 -0
  13. model_state_layer_15_TransformerLayer.pt +3 -0
  14. model_state_layer_15_TransformerLayer_adapter_embed.pt +3 -0
  15. model_state_layer_16_TransformerLayer.pt +3 -0
  16. model_state_layer_16_TransformerLayer_adapter_embed.pt +3 -0
  17. model_state_layer_17_TransformerLayer.pt +3 -0
  18. model_state_layer_17_TransformerLayer_adapter_embed.pt +3 -0
  19. model_state_layer_18_TransformerLayer.pt +3 -0
  20. model_state_layer_18_TransformerLayer_adapter_embed.pt +3 -0
  21. model_state_layer_19_TransformerLayer.pt +3 -0
  22. model_state_layer_19_TransformerLayer_adapter_embed.pt +3 -0
  23. model_state_layer_1_TransformerLayer.pt +3 -0
  24. model_state_layer_1_TransformerLayer_adapter_embed.pt +3 -0
  25. model_state_layer_20_TransformerLayer.pt +3 -0
  26. model_state_layer_20_TransformerLayer_adapter_embed.pt +3 -0
  27. model_state_layer_21_TransformerLayer.pt +3 -0
  28. model_state_layer_21_TransformerLayer_adapter_embed.pt +3 -0
  29. model_state_layer_22_TransformerLayer.pt +3 -0
  30. model_state_layer_22_TransformerLayer_adapter_embed.pt +3 -0
  31. model_state_layer_23_TransformerLayer.pt +3 -0
  32. model_state_layer_23_TransformerLayer_adapter_embed.pt +3 -0
  33. model_state_layer_24_TransformerLayer.pt +3 -0
  34. model_state_layer_24_TransformerLayer_adapter_embed.pt +3 -0
  35. model_state_layer_25_TransformerLayer.pt +3 -0
  36. model_state_layer_25_TransformerLayer_adapter_embed.pt +3 -0
  37. model_state_layer_26_TransformerLayer.pt +3 -0
  38. model_state_layer_26_TransformerLayer_adapter_embed.pt +3 -0
  39. model_state_layer_27_TransformerLayer.pt +3 -0
  40. model_state_layer_27_TransformerLayer_adapter_embed.pt +3 -0
  41. model_state_layer_28_LayerNormWrapper.pt +3 -0
  42. model_state_layer_2_TransformerLayer.pt +3 -0
  43. model_state_layer_2_TransformerLayer_adapter_embed.pt +3 -0
  44. model_state_layer_3_TransformerLayer.pt +3 -0
  45. model_state_layer_3_TransformerLayer_adapter_embed.pt +3 -0
  46. model_state_layer_4_TransformerLayer.pt +3 -0
  47. model_state_layer_4_TransformerLayer_adapter_embed.pt +3 -0
  48. model_state_layer_5_TransformerLayer.pt +3 -0
  49. model_state_layer_5_TransformerLayer_adapter_embed.pt +3 -0
  50. model_state_layer_6_TransformerLayer.pt +3 -0
config.yml ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": ".unknown.",
3
+ "runner": {
4
+ "runner_type": "pdsh",
5
+ "hostsfile": null,
6
+ "hosts": null,
7
+ "master_port": 29500,
8
+ "master_addr": null,
9
+ "script": "src/scaling/transformer/train.py",
10
+ "default_gpu_count": 8,
11
+ "docker_config": {
12
+ "docker_container": null,
13
+ "docker_sudo": false,
14
+ "docker_mounts": [
15
+ [
16
+ "/mnt/",
17
+ "/mnt/"
18
+ ]
19
+ ]
20
+ },
21
+ "use_determined": true
22
+ },
23
+ "logger": {
24
+ "log_level": "info",
25
+ "log_dir": null,
26
+ "metrics_ranks": null,
27
+ "use_wandb": true,
28
+ "wandb_ranks": null,
29
+ "wandb_host": "https://api.wandb.ai",
30
+ "wandb_team": "aleph-alpha",
31
+ "wandb_project": "ng-semantic-embedding",
32
+ "wandb_group": "7b_medi_from_pretrained-2024-09-17-08-51-16",
33
+ "wandb_api_key": "d80060c84c496915fcad15283809dbde65087d73",
34
+ "use_tensorboard": false,
35
+ "tensorboard_ranks": null,
36
+ "determined_metrics_ranks": null
37
+ },
38
+ "topology": {
39
+ "global_rank": 0,
40
+ "world_size": 256,
41
+ "local_slot": 0,
42
+ "model_parallel_size": 1,
43
+ "pipe_parallel_size": 1,
44
+ "data_parallel_size": 256,
45
+ "global_batch_size": 2048,
46
+ "micro_batch_size": 8,
47
+ "gradient_accumulation_steps": 1,
48
+ "pipe_partition_method": "balanced",
49
+ "pipe_partition_overwrite": null,
50
+ "activation_checkpointing_type": "every_layer",
51
+ "sequence_parallel": false
52
+ },
53
+ "optimizer": {
54
+ "method": "adamw",
55
+ "beta1": 0.9,
56
+ "beta2": 0.95,
57
+ "eps": 1e-15,
58
+ "gradient_clipping": 0.0,
59
+ "allreduce_bucket_size": 500000000,
60
+ "loss_scaler": {
61
+ "enable": false,
62
+ "initial_scale": 4294967296.0,
63
+ "window": 1000,
64
+ "hysteresis": 2.0,
65
+ "consecutive_hysteresis": false,
66
+ "min_scale": 1.0,
67
+ "factor": 2.0
68
+ },
69
+ "zero": true,
70
+ "zero_save_static": false,
71
+ "debug_log": false
72
+ },
73
+ "learning_rate_scheduler": {
74
+ "learning_rate": 2e-05,
75
+ "learning_rate_minimum": 0.0,
76
+ "learning_rate_decay_style": "linear",
77
+ "learning_rate_decay_iters": 938,
78
+ "learning_rate_warmup_steps": 50
79
+ },
80
+ "embedding_learning_rate_scheduler": {
81
+ "learning_rate": 0.0,
82
+ "learning_rate_minimum": 0.0,
83
+ "learning_rate_decay_style": "cosine",
84
+ "learning_rate_decay_iters": 0,
85
+ "learning_rate_warmup_steps": 0
86
+ },
87
+ "training": {
88
+ "weight_decay": 0.0001,
89
+ "finetune": true,
90
+ "finetunable_parameters": [
91
+ "embed"
92
+ ],
93
+ "parameters_exclude": [],
94
+ "use_separate_lr_on_embeddings": false,
95
+ "use_deterministic_torch_algorithms": false,
96
+ "loss_function_config": {
97
+ "loss_type": "contrastive_loss",
98
+ "number_of_hard_negatives": 1,
99
+ "use_instructions": true,
100
+ "query_side_only": false,
101
+ "scale": 50,
102
+ "log_verbose_metrics": true
103
+ }
104
+ },
105
+ "trainer": {
106
+ "save_dir": "checkpoints",
107
+ "save_interval": 938,
108
+ "load_dir": "/scratch/samuel/Pharia-1-LLM-7B-control",
109
+ "train_iterations": 938,
110
+ "assert_checkpoint_loaded": true,
111
+ "load_optimizer_states": false,
112
+ "delete_past_optimizer_states": true,
113
+ "load_context": false,
114
+ "allowed_missing_keys_in_checkpoint": [
115
+ "embed"
116
+ ],
117
+ "allowed_unexpected_keys_in_checkpoint": null,
118
+ "ignore_keys_in_checkpoint": null,
119
+ "merge_lora_after_loading_checkpoint": false,
120
+ "seed": 42,
121
+ "dataloader_num_workers": 0,
122
+ "dataloader_pin_memory": true,
123
+ "dataloader_prefetch_factor": null,
124
+ "eval_iterations": 1,
125
+ "eval_interval": null,
126
+ "separate_file_for_parameters": [
127
+ "adapter_embed"
128
+ ]
129
+ },
130
+ "profiler": {
131
+ "profile_steps": 0,
132
+ "profile_start_at_step": 0,
133
+ "profiler_output": null
134
+ },
135
+ "transformer_architecture": {
136
+ "vocab_size": 128000,
137
+ "vocab_file": "/scratch/samuel/Pharia-1-LLM-7B-control/vocab.json",
138
+ "hidden_size": 4608,
139
+ "num_layers": 27,
140
+ "num_attention_heads": 36,
141
+ "num_local_attention_heads": 0,
142
+ "local_attention_window_size": null,
143
+ "rotary_embedding_base": 1000000,
144
+ "rotary_percentage": 1.0,
145
+ "sequence_length": 2048,
146
+ "norm_type": "layernorm",
147
+ "relative_position_embedding_type": "rotary_complex",
148
+ "mlp_type": "default",
149
+ "mlp_factor": 4.0,
150
+ "attention_bias": true,
151
+ "attention_qkv_in_one": false,
152
+ "attention_num_kv_heads": 4,
153
+ "attention_use_matmul": false,
154
+ "mlp_bias": true,
155
+ "key_query_norm": false,
156
+ "weight_tying": false,
157
+ "masked_softmax": {
158
+ "kernel": "flash_attention",
159
+ "softmax_in_fp32": true,
160
+ "scale": 1.0,
161
+ "deterministic_flash_attn_bwd": false
162
+ },
163
+ "layernorm": {
164
+ "optimization_type": "torch",
165
+ "layernorm_epsilon": 1e-05
166
+ },
167
+ "precision": "bfloat16",
168
+ "dropout_embedding": 0.0,
169
+ "dropout_attention_probs": 0.0,
170
+ "dropout_after_attention": 0.0,
171
+ "dropout_after_mlp": 0.0,
172
+ "bitfit_bias_config": null,
173
+ "finetunable_token_ids": [],
174
+ "image_encoder": false,
175
+ "dropout_image_encoder": 0.0,
176
+ "softprompt_config": null,
177
+ "adapter_config": {
178
+ "name": "embed",
179
+ "attention_downsampling_factor": 0.25,
180
+ "mlp_downsampling_factor": 0.25,
181
+ "init_std": 1e-05,
182
+ "version": ".unknown."
183
+ },
184
+ "lora_config": null,
185
+ "embedding_head_config": {
186
+ "name": "pooling_only",
187
+ "proj_layers": null,
188
+ "pooling": "weighted_mean"
189
+ },
190
+ "lm_head": false,
191
+ "causal": true
192
+ },
193
+ "data": {
194
+ "legacy_dataset": false,
195
+ "load_mmap_index_to_memory": false,
196
+ "use_mmap": false,
197
+ "load_data_item_mmap_index_to_memory": false,
198
+ "finetuning_dataset": false,
199
+ "finetuning_chat_dataset": false,
200
+ "finetuning_dataset_memory_map": false,
201
+ "embedding_dataset": true,
202
+ "embedding_dataset_memory_map": false,
203
+ "data_prefixes": [
204
+ "/scratch/samuel/all_source_data_templated/allnli_eng.jsonl",
205
+ "/scratch/samuel/all_source_data_templated/hotpot_with_bm25_negatives.jsonl",
206
+ "/scratch/samuel/all_source_data_templated/medi_1.jsonl",
207
+ "/scratch/samuel/all_source_data_templated/msmarco.jsonl"
208
+ ],
209
+ "validation_data_prefixes": null,
210
+ "blended_dataset": {
211
+ "weight_by_num_documents": true,
212
+ "weighted_sampler_alpha": 1.0,
213
+ "weights": null,
214
+ "weight_examples_proportional": false,
215
+ "ep_maximum": null,
216
+ "ep_temperature": 1.0,
217
+ "minimum_dataset_size": 0,
218
+ "cache_directory": "/scratch/samuel/blended_dataset_cache",
219
+ "shuffle_dataset_indices": false,
220
+ "load_dataset_indices_to_memory": false
221
+ },
222
+ "only_full_sequences": false,
223
+ "allow_incomplete_sequences_every_n": 0
224
+ },
225
+ "determined_experiment_id": 2765,
226
+ "determined_trial_id": 81283
227
+ }
model_state_layer_0_EmbeddingInput.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63f7803c3b4d22df9ef5c826e0df060f140138e5f25b9be47431ad2663802ff0
3
+ size 1179649443
model_state_layer_10_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef63496e306c9f3e0112742300dd47b51c31fd2226fd3d7aa5c2adc1a5ffb265
3
+ size 434219902
model_state_layer_10_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:676a207e49d6c3a2ec44a9e46f2556b14aff9338fba4e264ed2e74a5f2cd0e6e
3
+ size 42469872
model_state_layer_11_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ae21f20ea4e1492bdf2369f7ef91dda55015f81ab4eab9604a9362086d12cb
3
+ size 434219902
model_state_layer_11_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cecb2254f42e26b348186f40932e45e15619b614726a2b0260c7c9ccc153020
3
+ size 42469872
model_state_layer_12_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd902b46a5a353687a2f31e754164e3db18fe72c769a69ed13284bbff224f455
3
+ size 434219902
model_state_layer_12_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9792a7ebe8ce63557a6d899cdf41de63ada82de5bde9c1af8eb2949704a83094
3
+ size 42469872
model_state_layer_13_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:664c1b719da7b9218debdc2e9794798ca9d0f22568b138996e182229d79c3444
3
+ size 434219902
model_state_layer_13_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a6450411a45420b87e825d62d1733c5a5ecfbcd7a0a0a19842cfed1eb98172
3
+ size 42469872
model_state_layer_14_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ef77d20a502e6c8d8908cbd7cb222d04ccea69002e78042a57ac11a8a0b4362
3
+ size 434219902
model_state_layer_14_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c084fd0149e3b81d783bc54a83026b0b9bf34b69db846af5981a57f37f3e738e
3
+ size 42469872
model_state_layer_15_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7de81299e97471d03af358c730a40238867eb6032c93c4cebcba714f2db5d47
3
+ size 434219902
model_state_layer_15_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:685c7a8933ff4764e0fe8df4cbd42569c956faa08c00da4530da692c065a5159
3
+ size 42469872
model_state_layer_16_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e866357f6d06e90c4a4785e7b0cc2056b50361904168d54fb6fc314ef9f4639b
3
+ size 434219902
model_state_layer_16_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9de4b8a1852dfbbfb30e80a42ec141062ee1f9cef355074ef2f6b5305c30a715
3
+ size 42469872
model_state_layer_17_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03b4c380ed92cd055a067919127a7365bcd66f18fa8dfa8d38653018ac0f597c
3
+ size 434219902
model_state_layer_17_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c33bda7e7ced7993a85681a940aff2a79678c37c7ee02721dca5f7a1388b6a
3
+ size 42469872
model_state_layer_18_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c34494e568a417b9adae97acb4d7c76012a9903a3e3d596e68d0020f1af6d2b0
3
+ size 434219902
model_state_layer_18_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a208197337f96311529638ce0f8ecda2728e034e13d5a61fd1337292b6bac799
3
+ size 42469872
model_state_layer_19_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2807c758304a165e6140b8942fa9eab390814f81c26fd327628822ce004ebfe7
3
+ size 434219902
model_state_layer_19_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3effc3fdeb41663fd7d38516cbd2b025edc6210e8eb945523ead26a81c8618cf
3
+ size 42469872
model_state_layer_1_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c2679a5ca251090f3a75c87b7add5f42e975a213ec8aaee90bde27da688a3d1
3
+ size 434219882
model_state_layer_1_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d6d706fec5794b603bd590ba493d4c23fe1e6858917211027112f725d650bf4
3
+ size 42469864
model_state_layer_20_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:197bec6a16feca3ac18653521b975673a56a071bf59ae3a9bfce737648bda775
3
+ size 434219902
model_state_layer_20_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:834b9eda7c39ce0afee590f0e7c1a034d47bb0a0c8b827eac414f1f8fe12496b
3
+ size 42469872
model_state_layer_21_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31eb0f91b2b326f7b869a2dc5dae07be2a6831efb72d3193d1d6d457a2d6fcff
3
+ size 434219902
model_state_layer_21_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b0d557feef0473fa638bf2a02808d6887cc1b3fba759addcf4eecd07ca52956
3
+ size 42469872
model_state_layer_22_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8deac3ee07d0285db53ce915d9315491287708edec05a9f16e17e3b0c2395ceb
3
+ size 434219902
model_state_layer_22_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01c5dd99e10c4235d9caca5b13b649bb912de5bfaf4783ce07513b54b5c3b0b8
3
+ size 42469872
model_state_layer_23_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28a4de46c226702e34dbf4d5cd8ead1d366f7cd94b5c5a0f9481dc0f5d502214
3
+ size 434219902
model_state_layer_23_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c041f2f5985fcf81de47980dca2a231269254757771c3fd703b6b8479d0623
3
+ size 42469872
model_state_layer_24_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:290bde2e41ce42a45bef1ae6977cbc4f8add6ca43f1493fedeac5d17b7497248
3
+ size 434219902
model_state_layer_24_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d0f786469b37c31f5e321fe6b20032147c4fe4860a50844e096dabc2f66c5bb
3
+ size 42469872
model_state_layer_25_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45968daef6d99223322950b1251b22b8462b8d95fd355edb67421feaef60fc49
3
+ size 434219902
model_state_layer_25_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:631d05d00de00be24e252a7d448c6d028d9dab8ffbd4de365708feac348b4ac0
3
+ size 42469872
model_state_layer_26_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47274e293886f267b6ef434f55b4087c3400132b40e33ea732762fdcf544648d
3
+ size 434219902
model_state_layer_26_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2e7c0b257e634dc8e2140981bd688dd059a43926d68426f43c80d3b3a05f92d
3
+ size 42469872
model_state_layer_27_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f25827475fe28d36d12002c77f4e337f01af1d38524d74163c804f387ebcad4
3
+ size 434219902
model_state_layer_27_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06e95fb59125bcf66ac9d1c257889094fa27ac8f64802ab94020b031cd05b83c
3
+ size 42469872
model_state_layer_28_LayerNormWrapper.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cd1372d2ae21d66d644fb98394d0868cc9aeff84ccc20619a9bbf95eeebcd62
3
+ size 20172
model_state_layer_2_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15decebd9fe0f57b1d69766411ab106d37f354b7dc1b6b280d48baca79626756
3
+ size 434219882
model_state_layer_2_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2ad19f1d9e94cf44d1015372f7b060bc1f96bc3aee2ef8c82933051e8cd48c1
3
+ size 42469864
model_state_layer_3_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c9465e9dcb05d29f2c9e320c53e486825e05341a6c5097fcefa86ca1384f495
3
+ size 434219882
model_state_layer_3_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d046231c7dd30aa6f3cf077183f9c806f26f4d00683c08e4021681db492541e
3
+ size 42469864
model_state_layer_4_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f8449ff2999f09a5ae6a621104c5d8929bb9f6969315cf4210bfe327dd0bc72
3
+ size 434219882
model_state_layer_4_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd805b365e2e0394fac691cde5f39a6dfee441f9e3e0c167a3194bfc8bfd775a
3
+ size 42469864
model_state_layer_5_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4b77dab79cedd475c6335c22188eefd71bcb07cf39a11d8f80deb7b678f2ac
3
+ size 434219882
model_state_layer_5_TransformerLayer_adapter_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07ae55c82c22d0e1c80796c921607a40770741c522c34adf533d1790599ad718
3
+ size 42469864
model_state_layer_6_TransformerLayer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca257c7e6088e46d529b4b8be5c185d1767f3f5916db5e0e799942af25c9c1b0
3
+ size 434219882