htlou commited on
Commit
d68e7b9
·
verified ·
1 Parent(s): 2e64a5e

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +79 -0
  2. added_tokens.json +4 -0
  3. all_results.json +20 -0
  4. checkpoint-100/added_tokens.json +4 -0
  5. checkpoint-100/config.json +68 -0
  6. checkpoint-100/generation_config.json +6 -0
  7. checkpoint-100/model-00001-of-00004.safetensors +3 -0
  8. checkpoint-100/model-00002-of-00004.safetensors +3 -0
  9. checkpoint-100/model-00003-of-00004.safetensors +3 -0
  10. checkpoint-100/model-00004-of-00004.safetensors +3 -0
  11. checkpoint-100/model.safetensors.index.json +694 -0
  12. checkpoint-100/preprocessor_config.json +52 -0
  13. checkpoint-100/special_tokens_map.json +30 -0
  14. checkpoint-100/tokenizer.json +0 -0
  15. checkpoint-100/tokenizer.model +3 -0
  16. checkpoint-100/tokenizer_config.json +70 -0
  17. checkpoint-100/trainer_state.json +365 -0
  18. checkpoint-100/training_args.bin +3 -0
  19. checkpoint-200/added_tokens.json +4 -0
  20. checkpoint-200/config.json +68 -0
  21. checkpoint-200/generation_config.json +6 -0
  22. checkpoint-200/model-00001-of-00004.safetensors +3 -0
  23. checkpoint-200/model-00002-of-00004.safetensors +3 -0
  24. checkpoint-200/model-00003-of-00004.safetensors +3 -0
  25. checkpoint-200/model-00004-of-00004.safetensors +3 -0
  26. checkpoint-200/model.safetensors.index.json +694 -0
  27. checkpoint-200/preprocessor_config.json +52 -0
  28. checkpoint-200/special_tokens_map.json +30 -0
  29. checkpoint-200/tokenizer.json +0 -0
  30. checkpoint-200/tokenizer.model +3 -0
  31. checkpoint-200/tokenizer_config.json +70 -0
  32. checkpoint-200/trainer_state.json +697 -0
  33. checkpoint-200/training_args.bin +3 -0
  34. checkpoint-300/added_tokens.json +4 -0
  35. checkpoint-300/config.json +68 -0
  36. checkpoint-300/generation_config.json +6 -0
  37. checkpoint-300/model-00001-of-00004.safetensors +3 -0
  38. checkpoint-300/model-00002-of-00004.safetensors +3 -0
  39. checkpoint-300/model-00003-of-00004.safetensors +3 -0
  40. checkpoint-300/model-00004-of-00004.safetensors +3 -0
  41. checkpoint-300/model.safetensors.index.json +694 -0
  42. checkpoint-300/preprocessor_config.json +52 -0
  43. checkpoint-300/special_tokens_map.json +30 -0
  44. checkpoint-300/tokenizer.json +0 -0
  45. checkpoint-300/tokenizer.model +3 -0
  46. checkpoint-300/tokenizer_config.json +70 -0
  47. checkpoint-300/trainer_state.json +1029 -0
  48. checkpoint-300/training_args.bin +3 -0
  49. checkpoint-321/added_tokens.json +4 -0
  50. checkpoint-321/config.json +68 -0
README.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: other
4
+ base_model: llava-hf/llava-v1.6-mistral-7b-hf
5
+ tags:
6
+ - llama-factory
7
+ - full
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: AA_preference_cocour_new_step10_0_80
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # AA_preference_cocour_new_step10_0_80
18
+
19
+ This model is a fine-tuned version of [llava-hf/llava-v1.6-mistral-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf) on the AA_preference_cocour_new_step10_0_80 dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.5250
22
+ - Rewards/chosen: 0.5658
23
+ - Rewards/rejected: -2.2697
24
+ - Rewards/accuracies: 0.8307
25
+ - Rewards/margins: 2.8355
26
+ - Logps/rejected: -237.9764
27
+ - Logps/chosen: -250.5795
28
+ - Logits/rejected: -2.1447
29
+ - Logits/chosen: -2.1680
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 1e-06
49
+ - train_batch_size: 8
50
+ - eval_batch_size: 8
51
+ - seed: 42
52
+ - distributed_type: multi-GPU
53
+ - num_devices: 8
54
+ - gradient_accumulation_steps: 4
55
+ - total_train_batch_size: 256
56
+ - total_eval_batch_size: 64
57
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
+ - lr_scheduler_type: cosine
59
+ - lr_scheduler_warmup_steps: 10
60
+ - num_epochs: 3.0
61
+
62
+ ### Training results
63
+
64
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
65
+ |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
66
+ | 0.572 | 0.4673 | 50 | 0.5720 | 1.0708 | -0.2185 | 0.7578 | 1.2893 | -217.4643 | -245.5292 | -2.5291 | -2.5309 |
67
+ | 0.4997 | 0.9346 | 100 | 0.5102 | 0.8653 | -0.9135 | 0.7865 | 1.7788 | -224.4143 | -247.5850 | -2.1776 | -2.2008 |
68
+ | 0.2873 | 1.4019 | 150 | 0.5675 | 1.0559 | -1.2279 | 0.7891 | 2.2838 | -227.5579 | -245.6786 | -2.2632 | -2.2750 |
69
+ | 0.2853 | 1.8692 | 200 | 0.5163 | 0.7188 | -1.7114 | 0.8203 | 2.4302 | -232.3931 | -249.0491 | -2.1251 | -2.1478 |
70
+ | 0.1541 | 2.3364 | 250 | 0.5271 | 0.5977 | -2.1434 | 0.8177 | 2.7411 | -236.7135 | -250.2604 | -2.2153 | -2.2352 |
71
+ | 0.1566 | 2.8037 | 300 | 0.5242 | 0.5568 | -2.2821 | 0.8307 | 2.8389 | -238.1007 | -250.6694 | -2.1442 | -2.1674 |
72
+
73
+
74
+ ### Framework versions
75
+
76
+ - Transformers 4.45.2
77
+ - Pytorch 2.4.0+cu121
78
+ - Datasets 2.21.0
79
+ - Tokenizers 0.20.3
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image>": 32000,
3
+ "<pad>": 32001
4
+ }
all_results.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_logits/chosen": -2.1679632663726807,
4
+ "eval_logits/rejected": -2.144676923751831,
5
+ "eval_logps/chosen": -250.57945251464844,
6
+ "eval_logps/rejected": -237.97642517089844,
7
+ "eval_loss": 0.5249601006507874,
8
+ "eval_rewards/accuracies": 0.8307291865348816,
9
+ "eval_rewards/chosen": 0.5658118724822998,
10
+ "eval_rewards/margins": 2.835527181625366,
11
+ "eval_rewards/rejected": -2.2697153091430664,
12
+ "eval_runtime": 201.1633,
13
+ "eval_samples_per_second": 15.112,
14
+ "eval_steps_per_second": 0.239,
15
+ "total_flos": 3785055088410624.0,
16
+ "train_loss": 0.33236435687059185,
17
+ "train_runtime": 12185.597,
18
+ "train_samples_per_second": 6.734,
19
+ "train_steps_per_second": 0.026
20
+ }
checkpoint-100/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image>": 32000,
3
+ "<pad>": 32001
4
+ }
checkpoint-100/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/align-anything/hantao/models/llava-v1.6-mistral-7b-hf",
3
+ "architectures": [
4
+ "LlavaNextForConditionalGeneration"
5
+ ],
6
+ "hidden_size": 4096,
7
+ "ignore_index": -100,
8
+ "image_grid_pinpoints": [
9
+ [
10
+ 336,
11
+ 672
12
+ ],
13
+ [
14
+ 672,
15
+ 336
16
+ ],
17
+ [
18
+ 672,
19
+ 672
20
+ ],
21
+ [
22
+ 1008,
23
+ 336
24
+ ],
25
+ [
26
+ 336,
27
+ 1008
28
+ ]
29
+ ],
30
+ "image_seq_length": 576,
31
+ "image_token_index": 32000,
32
+ "model_type": "llava_next",
33
+ "projector_hidden_act": "gelu",
34
+ "text_config": {
35
+ "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
36
+ "architectures": [
37
+ "MistralForCausalLM"
38
+ ],
39
+ "intermediate_size": 14336,
40
+ "max_position_embeddings": 32768,
41
+ "model_type": "mistral",
42
+ "num_key_value_heads": 8,
43
+ "rms_norm_eps": 1e-05,
44
+ "rope_theta": 1000000.0,
45
+ "sliding_window": null,
46
+ "torch_dtype": "bfloat16",
47
+ "vocab_size": 32064
48
+ },
49
+ "tie_word_embeddings": false,
50
+ "torch_dtype": "bfloat16",
51
+ "transformers_version": "4.45.2",
52
+ "use_cache": false,
53
+ "use_image_newline_parameter": true,
54
+ "vision_config": {
55
+ "hidden_size": 1024,
56
+ "image_size": 336,
57
+ "intermediate_size": 4096,
58
+ "model_type": "clip_vision_model",
59
+ "num_attention_heads": 16,
60
+ "num_hidden_layers": 24,
61
+ "patch_size": 14,
62
+ "projection_dim": 768,
63
+ "vocab_size": 32000
64
+ },
65
+ "vision_feature_layer": -2,
66
+ "vision_feature_select_strategy": "default",
67
+ "vocab_size": 32064
68
+ }
checkpoint-100/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.45.2"
6
+ }
checkpoint-100/model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:153ca54787a97273f617d9b49c98e6183f964c24cba17900008907011b9b7fee
3
+ size 4921618624
checkpoint-100/model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17f2f38f0b5a34cfce09b3141e958742adf4546e2374109164e0f48994b63441
3
+ size 4915917672
checkpoint-100/model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d39f66d134f95c6ef52c3d8d23ed59c83778660bdb59065f1bb6e74e9bc86d
3
+ size 4915917680
checkpoint-100/model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b182a6e197084f76431fafdb3b00cd02af81a79435301834a5230ecdf7a01fcf
3
+ size 380134008
checkpoint-100/model.safetensors.index.json ADDED
@@ -0,0 +1,694 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 15133495296
4
+ },
5
+ "weight_map": {
6
+ "image_newline": "model-00001-of-00004.safetensors",
7
+ "language_model.lm_head.weight": "model-00004-of-00004.safetensors",
8
+ "language_model.model.embed_tokens.weight": "model-00001-of-00004.safetensors",
9
+ "language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
10
+ "language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
11
+ "language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
12
+ "language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
13
+ "language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
14
+ "language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
15
+ "language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
16
+ "language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
17
+ "language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
18
+ "language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
19
+ "language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
20
+ "language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
21
+ "language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
22
+ "language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
23
+ "language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
24
+ "language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
25
+ "language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
26
+ "language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
27
+ "language_model.model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
28
+ "language_model.model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
29
+ "language_model.model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
30
+ "language_model.model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
31
+ "language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
32
+ "language_model.model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
33
+ "language_model.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
34
+ "language_model.model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
35
+ "language_model.model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
36
+ "language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
37
+ "language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
38
+ "language_model.model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
39
+ "language_model.model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
40
+ "language_model.model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
41
+ "language_model.model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
42
+ "language_model.model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
43
+ "language_model.model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
44
+ "language_model.model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
45
+ "language_model.model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
46
+ "language_model.model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
47
+ "language_model.model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
48
+ "language_model.model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
49
+ "language_model.model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
50
+ "language_model.model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
51
+ "language_model.model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
52
+ "language_model.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
53
+ "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
54
+ "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
55
+ "language_model.model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
56
+ "language_model.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
57
+ "language_model.model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
58
+ "language_model.model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
59
+ "language_model.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
60
+ "language_model.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
61
+ "language_model.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
62
+ "language_model.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
63
+ "language_model.model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
64
+ "language_model.model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
65
+ "language_model.model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
66
+ "language_model.model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
67
+ "language_model.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
68
+ "language_model.model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
69
+ "language_model.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
70
+ "language_model.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
71
+ "language_model.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
72
+ "language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
73
+ "language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
74
+ "language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
75
+ "language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
76
+ "language_model.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
77
+ "language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
78
+ "language_model.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
79
+ "language_model.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
80
+ "language_model.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
81
+ "language_model.model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
82
+ "language_model.model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
83
+ "language_model.model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
84
+ "language_model.model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
85
+ "language_model.model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
86
+ "language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
87
+ "language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
88
+ "language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
89
+ "language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
90
+ "language_model.model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
91
+ "language_model.model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
92
+ "language_model.model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
93
+ "language_model.model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
94
+ "language_model.model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
95
+ "language_model.model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
96
+ "language_model.model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
97
+ "language_model.model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
98
+ "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
99
+ "language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
100
+ "language_model.model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
101
+ "language_model.model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
102
+ "language_model.model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
103
+ "language_model.model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
104
+ "language_model.model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
105
+ "language_model.model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
106
+ "language_model.model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
107
+ "language_model.model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
108
+ "language_model.model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
109
+ "language_model.model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
110
+ "language_model.model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
111
+ "language_model.model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
112
+ "language_model.model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
113
+ "language_model.model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
114
+ "language_model.model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
115
+ "language_model.model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
116
+ "language_model.model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
117
+ "language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
118
+ "language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
119
+ "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
120
+ "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
121
+ "language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
122
+ "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
123
+ "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
124
+ "language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
125
+ "language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
126
+ "language_model.model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
127
+ "language_model.model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
128
+ "language_model.model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
129
+ "language_model.model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
130
+ "language_model.model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
131
+ "language_model.model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
132
+ "language_model.model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
133
+ "language_model.model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
134
+ "language_model.model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
135
+ "language_model.model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
136
+ "language_model.model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
137
+ "language_model.model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
138
+ "language_model.model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
139
+ "language_model.model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
140
+ "language_model.model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
141
+ "language_model.model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
142
+ "language_model.model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
143
+ "language_model.model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
144
+ "language_model.model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
145
+ "language_model.model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
146
+ "language_model.model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
147
+ "language_model.model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
148
+ "language_model.model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
149
+ "language_model.model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
150
+ "language_model.model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
151
+ "language_model.model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
152
+ "language_model.model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
153
+ "language_model.model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
154
+ "language_model.model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
155
+ "language_model.model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
156
+ "language_model.model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
157
+ "language_model.model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
158
+ "language_model.model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
159
+ "language_model.model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
160
+ "language_model.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
161
+ "language_model.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
162
+ "language_model.model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
163
+ "language_model.model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
164
+ "language_model.model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
165
+ "language_model.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
166
+ "language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
167
+ "language_model.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
168
+ "language_model.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
169
+ "language_model.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
170
+ "language_model.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
171
+ "language_model.model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
172
+ "language_model.model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
173
+ "language_model.model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
174
+ "language_model.model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
175
+ "language_model.model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
176
+ "language_model.model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
177
+ "language_model.model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
178
+ "language_model.model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
179
+ "language_model.model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
180
+ "language_model.model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
181
+ "language_model.model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
182
+ "language_model.model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
183
+ "language_model.model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
184
+ "language_model.model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
185
+ "language_model.model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
186
+ "language_model.model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
187
+ "language_model.model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
188
+ "language_model.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
189
+ "language_model.model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
190
+ "language_model.model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
191
+ "language_model.model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
192
+ "language_model.model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
193
+ "language_model.model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
194
+ "language_model.model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
195
+ "language_model.model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
196
+ "language_model.model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
197
+ "language_model.model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
198
+ "language_model.model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
199
+ "language_model.model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
200
+ "language_model.model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
201
+ "language_model.model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
202
+ "language_model.model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
203
+ "language_model.model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
204
+ "language_model.model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
205
+ "language_model.model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
206
+ "language_model.model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
207
+ "language_model.model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
208
+ "language_model.model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
209
+ "language_model.model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
210
+ "language_model.model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
211
+ "language_model.model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
212
+ "language_model.model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
213
+ "language_model.model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
214
+ "language_model.model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
215
+ "language_model.model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
216
+ "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
217
+ "language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
218
+ "language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
219
+ "language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
220
+ "language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
221
+ "language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
222
+ "language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
223
+ "language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
224
+ "language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
225
+ "language_model.model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
226
+ "language_model.model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
227
+ "language_model.model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
228
+ "language_model.model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
229
+ "language_model.model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
230
+ "language_model.model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
231
+ "language_model.model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
232
+ "language_model.model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
233
+ "language_model.model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
234
+ "language_model.model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
235
+ "language_model.model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
236
+ "language_model.model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
237
+ "language_model.model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
238
+ "language_model.model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
239
+ "language_model.model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
240
+ "language_model.model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
241
+ "language_model.model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
242
+ "language_model.model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
243
+ "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
244
+ "language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
245
+ "language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
246
+ "language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
247
+ "language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
248
+ "language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
249
+ "language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
250
+ "language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
251
+ "language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
252
+ "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
253
+ "language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
254
+ "language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
255
+ "language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
256
+ "language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
257
+ "language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
258
+ "language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
259
+ "language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
260
+ "language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
261
+ "language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
262
+ "language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
263
+ "language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
264
+ "language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
265
+ "language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
266
+ "language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
267
+ "language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
268
+ "language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
269
+ "language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
270
+ "language_model.model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
271
+ "language_model.model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
272
+ "language_model.model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
273
+ "language_model.model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
274
+ "language_model.model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
275
+ "language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
276
+ "language_model.model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
277
+ "language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
278
+ "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
279
+ "language_model.model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
280
+ "language_model.model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
281
+ "language_model.model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
282
+ "language_model.model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
283
+ "language_model.model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
284
+ "language_model.model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
285
+ "language_model.model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
286
+ "language_model.model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
287
+ "language_model.model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
288
+ "language_model.model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
289
+ "language_model.model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
290
+ "language_model.model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
291
+ "language_model.model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
292
+ "language_model.model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
293
+ "language_model.model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
294
+ "language_model.model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
295
+ "language_model.model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
296
+ "language_model.model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
297
+ "language_model.model.norm.weight": "model-00004-of-00004.safetensors",
298
+ "multi_modal_projector.linear_1.bias": "model-00001-of-00004.safetensors",
299
+ "multi_modal_projector.linear_1.weight": "model-00001-of-00004.safetensors",
300
+ "multi_modal_projector.linear_2.bias": "model-00001-of-00004.safetensors",
301
+ "multi_modal_projector.linear_2.weight": "model-00001-of-00004.safetensors",
302
+ "vision_tower.vision_model.embeddings.class_embedding": "model-00001-of-00004.safetensors",
303
+ "vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00001-of-00004.safetensors",
304
+ "vision_tower.vision_model.embeddings.position_embedding.weight": "model-00001-of-00004.safetensors",
305
+ "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00001-of-00004.safetensors",
306
+ "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00001-of-00004.safetensors",
307
+ "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00001-of-00004.safetensors",
308
+ "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00001-of-00004.safetensors",
309
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00001-of-00004.safetensors",
310
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00001-of-00004.safetensors",
311
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00001-of-00004.safetensors",
312
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00001-of-00004.safetensors",
313
+ "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
314
+ "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
315
+ "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
316
+ "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
317
+ "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
318
+ "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
319
+ "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
320
+ "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
321
+ "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00001-of-00004.safetensors",
322
+ "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00001-of-00004.safetensors",
323
+ "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00001-of-00004.safetensors",
324
+ "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00001-of-00004.safetensors",
325
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00001-of-00004.safetensors",
326
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00001-of-00004.safetensors",
327
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00001-of-00004.safetensors",
328
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00001-of-00004.safetensors",
329
+ "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
330
+ "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
331
+ "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
332
+ "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
333
+ "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
334
+ "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
335
+ "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
336
+ "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
337
+ "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00001-of-00004.safetensors",
338
+ "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00001-of-00004.safetensors",
339
+ "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00001-of-00004.safetensors",
340
+ "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00001-of-00004.safetensors",
341
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00001-of-00004.safetensors",
342
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00001-of-00004.safetensors",
343
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00001-of-00004.safetensors",
344
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00001-of-00004.safetensors",
345
+ "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
346
+ "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
347
+ "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
348
+ "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
349
+ "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
350
+ "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
351
+ "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
352
+ "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
353
+ "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00001-of-00004.safetensors",
354
+ "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00001-of-00004.safetensors",
355
+ "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00001-of-00004.safetensors",
356
+ "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00001-of-00004.safetensors",
357
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00001-of-00004.safetensors",
358
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00001-of-00004.safetensors",
359
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00001-of-00004.safetensors",
360
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00001-of-00004.safetensors",
361
+ "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
362
+ "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
363
+ "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
364
+ "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
365
+ "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
366
+ "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
367
+ "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
368
+ "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
369
+ "vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00001-of-00004.safetensors",
370
+ "vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00001-of-00004.safetensors",
371
+ "vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00001-of-00004.safetensors",
372
+ "vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00001-of-00004.safetensors",
373
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00001-of-00004.safetensors",
374
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00001-of-00004.safetensors",
375
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00001-of-00004.safetensors",
376
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00001-of-00004.safetensors",
377
+ "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
378
+ "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
379
+ "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
380
+ "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
381
+ "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
382
+ "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
383
+ "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
384
+ "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
385
+ "vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00001-of-00004.safetensors",
386
+ "vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00001-of-00004.safetensors",
387
+ "vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00001-of-00004.safetensors",
388
+ "vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00001-of-00004.safetensors",
389
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00001-of-00004.safetensors",
390
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00001-of-00004.safetensors",
391
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00001-of-00004.safetensors",
392
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00001-of-00004.safetensors",
393
+ "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
394
+ "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
395
+ "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
396
+ "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
397
+ "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
398
+ "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
399
+ "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
400
+ "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
401
+ "vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00001-of-00004.safetensors",
402
+ "vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00001-of-00004.safetensors",
403
+ "vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00001-of-00004.safetensors",
404
+ "vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00001-of-00004.safetensors",
405
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00001-of-00004.safetensors",
406
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00001-of-00004.safetensors",
407
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00001-of-00004.safetensors",
408
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00001-of-00004.safetensors",
409
+ "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
410
+ "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
411
+ "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
412
+ "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
413
+ "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
414
+ "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
415
+ "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
416
+ "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
417
+ "vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00001-of-00004.safetensors",
418
+ "vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00001-of-00004.safetensors",
419
+ "vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00001-of-00004.safetensors",
420
+ "vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00001-of-00004.safetensors",
421
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00001-of-00004.safetensors",
422
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00001-of-00004.safetensors",
423
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00001-of-00004.safetensors",
424
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00001-of-00004.safetensors",
425
+ "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
426
+ "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
427
+ "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
428
+ "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
429
+ "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
430
+ "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
431
+ "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
432
+ "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
433
+ "vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00001-of-00004.safetensors",
434
+ "vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00001-of-00004.safetensors",
435
+ "vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00001-of-00004.safetensors",
436
+ "vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00001-of-00004.safetensors",
437
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00001-of-00004.safetensors",
438
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00001-of-00004.safetensors",
439
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00001-of-00004.safetensors",
440
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00001-of-00004.safetensors",
441
+ "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
442
+ "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
443
+ "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
444
+ "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
445
+ "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
446
+ "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
447
+ "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
448
+ "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
449
+ "vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00001-of-00004.safetensors",
450
+ "vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00001-of-00004.safetensors",
451
+ "vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00001-of-00004.safetensors",
452
+ "vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00001-of-00004.safetensors",
453
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00001-of-00004.safetensors",
454
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00001-of-00004.safetensors",
455
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00001-of-00004.safetensors",
456
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00001-of-00004.safetensors",
457
+ "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
458
+ "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
459
+ "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
460
+ "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
461
+ "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
462
+ "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
463
+ "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
464
+ "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
465
+ "vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00001-of-00004.safetensors",
466
+ "vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00001-of-00004.safetensors",
467
+ "vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00001-of-00004.safetensors",
468
+ "vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00001-of-00004.safetensors",
469
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00001-of-00004.safetensors",
470
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00001-of-00004.safetensors",
471
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00001-of-00004.safetensors",
472
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00001-of-00004.safetensors",
473
+ "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
474
+ "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
475
+ "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
476
+ "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
477
+ "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
478
+ "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
479
+ "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
480
+ "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
481
+ "vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00001-of-00004.safetensors",
482
+ "vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00001-of-00004.safetensors",
483
+ "vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00001-of-00004.safetensors",
484
+ "vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00001-of-00004.safetensors",
485
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00001-of-00004.safetensors",
486
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00001-of-00004.safetensors",
487
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00001-of-00004.safetensors",
488
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00001-of-00004.safetensors",
489
+ "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
490
+ "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
491
+ "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
492
+ "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
493
+ "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
494
+ "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
495
+ "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
496
+ "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
497
+ "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00001-of-00004.safetensors",
498
+ "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00001-of-00004.safetensors",
499
+ "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00001-of-00004.safetensors",
500
+ "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00001-of-00004.safetensors",
501
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00001-of-00004.safetensors",
502
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00001-of-00004.safetensors",
503
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00001-of-00004.safetensors",
504
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00001-of-00004.safetensors",
505
+ "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
506
+ "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
507
+ "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
508
+ "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
509
+ "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
510
+ "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
511
+ "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
512
+ "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
513
+ "vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00001-of-00004.safetensors",
514
+ "vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00001-of-00004.safetensors",
515
+ "vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00001-of-00004.safetensors",
516
+ "vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00001-of-00004.safetensors",
517
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00001-of-00004.safetensors",
518
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00001-of-00004.safetensors",
519
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00001-of-00004.safetensors",
520
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00001-of-00004.safetensors",
521
+ "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
522
+ "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
523
+ "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
524
+ "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
525
+ "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
526
+ "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
527
+ "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
528
+ "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
529
+ "vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00001-of-00004.safetensors",
530
+ "vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00001-of-00004.safetensors",
531
+ "vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00001-of-00004.safetensors",
532
+ "vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00001-of-00004.safetensors",
533
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00001-of-00004.safetensors",
534
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00001-of-00004.safetensors",
535
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00001-of-00004.safetensors",
536
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00001-of-00004.safetensors",
537
+ "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
538
+ "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
539
+ "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
540
+ "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
541
+ "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
542
+ "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
543
+ "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
544
+ "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
545
+ "vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00001-of-00004.safetensors",
546
+ "vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00001-of-00004.safetensors",
547
+ "vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00001-of-00004.safetensors",
548
+ "vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00001-of-00004.safetensors",
549
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00001-of-00004.safetensors",
550
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00001-of-00004.safetensors",
551
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00001-of-00004.safetensors",
552
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00001-of-00004.safetensors",
553
+ "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
554
+ "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
555
+ "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
556
+ "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
557
+ "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
558
+ "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
559
+ "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
560
+ "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
561
+ "vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00001-of-00004.safetensors",
562
+ "vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00001-of-00004.safetensors",
563
+ "vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00001-of-00004.safetensors",
564
+ "vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00001-of-00004.safetensors",
565
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00001-of-00004.safetensors",
566
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00001-of-00004.safetensors",
567
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00001-of-00004.safetensors",
568
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00001-of-00004.safetensors",
569
+ "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
570
+ "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
571
+ "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
572
+ "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
573
+ "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
574
+ "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
575
+ "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
576
+ "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
577
+ "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00001-of-00004.safetensors",
578
+ "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00001-of-00004.safetensors",
579
+ "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00001-of-00004.safetensors",
580
+ "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00001-of-00004.safetensors",
581
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00001-of-00004.safetensors",
582
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00001-of-00004.safetensors",
583
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00001-of-00004.safetensors",
584
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00001-of-00004.safetensors",
585
+ "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
586
+ "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
587
+ "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
588
+ "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
589
+ "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
590
+ "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
591
+ "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
592
+ "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
593
+ "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00001-of-00004.safetensors",
594
+ "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00001-of-00004.safetensors",
595
+ "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00001-of-00004.safetensors",
596
+ "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00001-of-00004.safetensors",
597
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00001-of-00004.safetensors",
598
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00001-of-00004.safetensors",
599
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00001-of-00004.safetensors",
600
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00001-of-00004.safetensors",
601
+ "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
602
+ "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
603
+ "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
604
+ "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
605
+ "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
606
+ "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
607
+ "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
608
+ "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
609
+ "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00001-of-00004.safetensors",
610
+ "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00001-of-00004.safetensors",
611
+ "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00001-of-00004.safetensors",
612
+ "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00001-of-00004.safetensors",
613
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00001-of-00004.safetensors",
614
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00001-of-00004.safetensors",
615
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00001-of-00004.safetensors",
616
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00001-of-00004.safetensors",
617
+ "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
618
+ "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
619
+ "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
620
+ "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
621
+ "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
622
+ "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
623
+ "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
624
+ "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
625
+ "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00001-of-00004.safetensors",
626
+ "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00001-of-00004.safetensors",
627
+ "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00001-of-00004.safetensors",
628
+ "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00001-of-00004.safetensors",
629
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00001-of-00004.safetensors",
630
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00001-of-00004.safetensors",
631
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00001-of-00004.safetensors",
632
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00001-of-00004.safetensors",
633
+ "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
634
+ "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
635
+ "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
636
+ "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
637
+ "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
638
+ "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
639
+ "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
640
+ "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
641
+ "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00001-of-00004.safetensors",
642
+ "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00001-of-00004.safetensors",
643
+ "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00001-of-00004.safetensors",
644
+ "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00001-of-00004.safetensors",
645
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00001-of-00004.safetensors",
646
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00001-of-00004.safetensors",
647
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00001-of-00004.safetensors",
648
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00001-of-00004.safetensors",
649
+ "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
650
+ "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
651
+ "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
652
+ "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
653
+ "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
654
+ "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
655
+ "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
656
+ "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
657
+ "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00001-of-00004.safetensors",
658
+ "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00001-of-00004.safetensors",
659
+ "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00001-of-00004.safetensors",
660
+ "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00001-of-00004.safetensors",
661
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00001-of-00004.safetensors",
662
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00001-of-00004.safetensors",
663
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00001-of-00004.safetensors",
664
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00001-of-00004.safetensors",
665
+ "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
666
+ "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
667
+ "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
668
+ "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
669
+ "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
670
+ "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
671
+ "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
672
+ "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
673
+ "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00001-of-00004.safetensors",
674
+ "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00001-of-00004.safetensors",
675
+ "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00001-of-00004.safetensors",
676
+ "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00001-of-00004.safetensors",
677
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00001-of-00004.safetensors",
678
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00001-of-00004.safetensors",
679
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00001-of-00004.safetensors",
680
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00001-of-00004.safetensors",
681
+ "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
682
+ "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
683
+ "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
684
+ "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
685
+ "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
686
+ "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
687
+ "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
688
+ "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
689
+ "vision_tower.vision_model.post_layernorm.bias": "model-00001-of-00004.safetensors",
690
+ "vision_tower.vision_model.post_layernorm.weight": "model-00001-of-00004.safetensors",
691
+ "vision_tower.vision_model.pre_layrnorm.bias": "model-00001-of-00004.safetensors",
692
+ "vision_tower.vision_model.pre_layrnorm.weight": "model-00001-of-00004.safetensors"
693
+ }
694
+ }
checkpoint-100/preprocessor_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aspect_ratio_setting": "anyres",
3
+ "crop_size": {
4
+ "height": 336,
5
+ "width": 336
6
+ },
7
+ "do_center_crop": true,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": true,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_grid_pinpoints": [
14
+ [
15
+ 336,
16
+ 672
17
+ ],
18
+ [
19
+ 672,
20
+ 336
21
+ ],
22
+ [
23
+ 672,
24
+ 672
25
+ ],
26
+ [
27
+ 1008,
28
+ 336
29
+ ],
30
+ [
31
+ 336,
32
+ 1008
33
+ ]
34
+ ],
35
+ "image_mean": [
36
+ 0.48145466,
37
+ 0.4578275,
38
+ 0.40821073
39
+ ],
40
+ "image_processor_type": "LlavaNextImageProcessor",
41
+ "image_std": [
42
+ 0.26862954,
43
+ 0.26130258,
44
+ 0.27577711
45
+ ],
46
+ "processor_class": "LlavaNextProcessor",
47
+ "resample": 3,
48
+ "rescale_factor": 0.00392156862745098,
49
+ "size": {
50
+ "shortest_edge": 336
51
+ }
52
+ }
checkpoint-100/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoint-100/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-100/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoint-100/tokenizer_config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<image>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<pad>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ }
46
+ },
47
+ "additional_special_tokens": [],
48
+ "bos_token": "<s>",
49
+ "chat_template": "{{ '<s>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '`[INST] `' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
50
+ "clean_up_tokenization_spaces": false,
51
+ "eos_token": "</s>",
52
+ "extra_special_tokens": {
53
+ "image_token": "<image>"
54
+ },
55
+ "image_token": "<image>",
56
+ "legacy": true,
57
+ "max_length": null,
58
+ "model_max_length": 1000000000000000019884624838656,
59
+ "pad_to_multiple_of": null,
60
+ "pad_token": "<pad>",
61
+ "pad_token_type_id": 0,
62
+ "padding_side": "right",
63
+ "processor_class": "LlavaNextProcessor",
64
+ "sp_model_kwargs": {},
65
+ "spaces_between_special_tokens": false,
66
+ "split_special_tokens": false,
67
+ "tokenizer_class": "LlamaTokenizer",
68
+ "unk_token": "<unk>",
69
+ "use_default_system_prompt": false
70
+ }
checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9345794392523364,
5
+ "eval_steps": 50,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04672897196261682,
13
+ "grad_norm": 55.02521133123827,
14
+ "learning_rate": 5e-07,
15
+ "logits/chosen": -2.7216238975524902,
16
+ "logits/rejected": -2.7209055423736572,
17
+ "logps/chosen": -268.4510192871094,
18
+ "logps/rejected": -203.9590606689453,
19
+ "loss": 0.6914,
20
+ "rewards/accuracies": 0.33125001192092896,
21
+ "rewards/chosen": 0.017813727259635925,
22
+ "rewards/margins": 0.009322145953774452,
23
+ "rewards/rejected": 0.008491581305861473,
24
+ "step": 5
25
+ },
26
+ {
27
+ "epoch": 0.09345794392523364,
28
+ "grad_norm": 50.12554517439661,
29
+ "learning_rate": 1e-06,
30
+ "logits/chosen": -2.660832166671753,
31
+ "logits/rejected": -2.6669700145721436,
32
+ "logps/chosen": -256.93609619140625,
33
+ "logps/rejected": -211.64938354492188,
34
+ "loss": 0.6467,
35
+ "rewards/accuracies": 0.6812499761581421,
36
+ "rewards/chosen": 0.4671781659126282,
37
+ "rewards/margins": 0.18480566143989563,
38
+ "rewards/rejected": 0.28237253427505493,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.14018691588785046,
43
+ "grad_norm": 48.457281147169425,
44
+ "learning_rate": 9.993623730611148e-07,
45
+ "logits/chosen": -2.494657278060913,
46
+ "logits/rejected": -2.5133018493652344,
47
+ "logps/chosen": -232.6891632080078,
48
+ "logps/rejected": -212.8677215576172,
49
+ "loss": 0.6204,
50
+ "rewards/accuracies": 0.6625000238418579,
51
+ "rewards/chosen": 1.2435152530670166,
52
+ "rewards/margins": 0.6126660108566284,
53
+ "rewards/rejected": 0.6308490037918091,
54
+ "step": 15
55
+ },
56
+ {
57
+ "epoch": 0.18691588785046728,
58
+ "grad_norm": 42.23668651632898,
59
+ "learning_rate": 9.97451118516912e-07,
60
+ "logits/chosen": -2.3121209144592285,
61
+ "logits/rejected": -2.302377462387085,
62
+ "logps/chosen": -234.3399658203125,
63
+ "logps/rejected": -191.3181610107422,
64
+ "loss": 0.6223,
65
+ "rewards/accuracies": 0.706250011920929,
66
+ "rewards/chosen": 1.3283790349960327,
67
+ "rewards/margins": 0.9071598052978516,
68
+ "rewards/rejected": 0.4212193489074707,
69
+ "step": 20
70
+ },
71
+ {
72
+ "epoch": 0.2336448598130841,
73
+ "grad_norm": 49.68505743626908,
74
+ "learning_rate": 9.94271111036929e-07,
75
+ "logits/chosen": -2.2619333267211914,
76
+ "logits/rejected": -2.2323482036590576,
77
+ "logps/chosen": -230.17385864257812,
78
+ "logps/rejected": -205.64108276367188,
79
+ "loss": 0.616,
80
+ "rewards/accuracies": 0.762499988079071,
81
+ "rewards/chosen": 1.3353136777877808,
82
+ "rewards/margins": 1.208017110824585,
83
+ "rewards/rejected": 0.127296581864357,
84
+ "step": 25
85
+ },
86
+ {
87
+ "epoch": 0.2803738317757009,
88
+ "grad_norm": 39.50573463077347,
89
+ "learning_rate": 9.898304612549066e-07,
90
+ "logits/chosen": -2.308243989944458,
91
+ "logits/rejected": -2.2968266010284424,
92
+ "logps/chosen": -241.26632690429688,
93
+ "logps/rejected": -207.3319854736328,
94
+ "loss": 0.5998,
95
+ "rewards/accuracies": 0.731249988079071,
96
+ "rewards/chosen": 0.9092999696731567,
97
+ "rewards/margins": 0.9556086659431458,
98
+ "rewards/rejected": -0.046308644115924835,
99
+ "step": 30
100
+ },
101
+ {
102
+ "epoch": 0.32710280373831774,
103
+ "grad_norm": 42.78736900246308,
104
+ "learning_rate": 9.841404950825536e-07,
105
+ "logits/chosen": -2.3728604316711426,
106
+ "logits/rejected": -2.3580102920532227,
107
+ "logps/chosen": -256.8548583984375,
108
+ "logps/rejected": -205.154052734375,
109
+ "loss": 0.5996,
110
+ "rewards/accuracies": 0.768750011920929,
111
+ "rewards/chosen": 1.0152027606964111,
112
+ "rewards/margins": 1.2137099504470825,
113
+ "rewards/rejected": -0.1985071450471878,
114
+ "step": 35
115
+ },
116
+ {
117
+ "epoch": 0.37383177570093457,
118
+ "grad_norm": 41.73908597429494,
119
+ "learning_rate": 9.77215724822721e-07,
120
+ "logits/chosen": -2.4492850303649902,
121
+ "logits/rejected": -2.4539356231689453,
122
+ "logps/chosen": -243.1707763671875,
123
+ "logps/rejected": -213.95166015625,
124
+ "loss": 0.6098,
125
+ "rewards/accuracies": 0.71875,
126
+ "rewards/chosen": 1.0353302955627441,
127
+ "rewards/margins": 1.2659428119659424,
128
+ "rewards/rejected": -0.2306123673915863,
129
+ "step": 40
130
+ },
131
+ {
132
+ "epoch": 0.4205607476635514,
133
+ "grad_norm": 41.530064757148224,
134
+ "learning_rate": 9.69073812155662e-07,
135
+ "logits/chosen": -2.5637125968933105,
136
+ "logits/rejected": -2.5535428524017334,
137
+ "logps/chosen": -244.7168731689453,
138
+ "logps/rejected": -205.80923461914062,
139
+ "loss": 0.5974,
140
+ "rewards/accuracies": 0.71875,
141
+ "rewards/chosen": 0.8133939504623413,
142
+ "rewards/margins": 0.9837163686752319,
143
+ "rewards/rejected": -0.17032238841056824,
144
+ "step": 45
145
+ },
146
+ {
147
+ "epoch": 0.4672897196261682,
148
+ "grad_norm": 38.26706141308248,
149
+ "learning_rate": 9.597355230927788e-07,
150
+ "logits/chosen": -2.5823917388916016,
151
+ "logits/rejected": -2.562842607498169,
152
+ "logps/chosen": -240.04067993164062,
153
+ "logps/rejected": -209.23428344726562,
154
+ "loss": 0.572,
155
+ "rewards/accuracies": 0.800000011920929,
156
+ "rewards/chosen": 0.9298027753829956,
157
+ "rewards/margins": 1.2456680536270142,
158
+ "rewards/rejected": -0.3158652186393738,
159
+ "step": 50
160
+ },
161
+ {
162
+ "epoch": 0.4672897196261682,
163
+ "eval_logits/chosen": -2.530949115753174,
164
+ "eval_logits/rejected": -2.529101610183716,
165
+ "eval_logps/chosen": -245.5291748046875,
166
+ "eval_logps/rejected": -217.46429443359375,
167
+ "eval_loss": 0.5720326900482178,
168
+ "eval_rewards/accuracies": 0.7578125,
169
+ "eval_rewards/chosen": 1.0708366632461548,
170
+ "eval_rewards/margins": 1.28933846950531,
171
+ "eval_rewards/rejected": -0.2185017466545105,
172
+ "eval_runtime": 202.2601,
173
+ "eval_samples_per_second": 15.03,
174
+ "eval_steps_per_second": 0.237,
175
+ "step": 50
176
+ },
177
+ {
178
+ "epoch": 0.514018691588785,
179
+ "grad_norm": 40.54073508413725,
180
+ "learning_rate": 9.4922467501275e-07,
181
+ "logits/chosen": -2.495945930480957,
182
+ "logits/rejected": -2.487422466278076,
183
+ "logps/chosen": -250.51620483398438,
184
+ "logps/rejected": -228.90200805664062,
185
+ "loss": 0.5176,
186
+ "rewards/accuracies": 0.7875000238418579,
187
+ "rewards/chosen": 1.0155770778656006,
188
+ "rewards/margins": 1.9236654043197632,
189
+ "rewards/rejected": -0.9080885648727417,
190
+ "step": 55
191
+ },
192
+ {
193
+ "epoch": 0.5607476635514018,
194
+ "grad_norm": 38.23797310786567,
195
+ "learning_rate": 9.375680759151206e-07,
196
+ "logits/chosen": -2.474236249923706,
197
+ "logits/rejected": -2.4737977981567383,
198
+ "logps/chosen": -255.09298706054688,
199
+ "logps/rejected": -200.73593139648438,
200
+ "loss": 0.5654,
201
+ "rewards/accuracies": 0.768750011920929,
202
+ "rewards/chosen": 1.0740002393722534,
203
+ "rewards/margins": 1.5434155464172363,
204
+ "rewards/rejected": -0.4694152772426605,
205
+ "step": 60
206
+ },
207
+ {
208
+ "epoch": 0.6074766355140186,
209
+ "grad_norm": 42.648181943788025,
210
+ "learning_rate": 9.247954560462927e-07,
211
+ "logits/chosen": -2.505916118621826,
212
+ "logits/rejected": -2.506608724594116,
213
+ "logps/chosen": -255.432861328125,
214
+ "logps/rejected": -205.4224090576172,
215
+ "loss": 0.5628,
216
+ "rewards/accuracies": 0.8062499761581421,
217
+ "rewards/chosen": 1.1411590576171875,
218
+ "rewards/margins": 1.7762504816055298,
219
+ "rewards/rejected": -0.6350914239883423,
220
+ "step": 65
221
+ },
222
+ {
223
+ "epoch": 0.6542056074766355,
224
+ "grad_norm": 38.81572593341751,
225
+ "learning_rate": 9.109393920723001e-07,
226
+ "logits/chosen": -2.4328043460845947,
227
+ "logits/rejected": -2.4342734813690186,
228
+ "logps/chosen": -233.8389129638672,
229
+ "logps/rejected": -212.91085815429688,
230
+ "loss": 0.5378,
231
+ "rewards/accuracies": 0.7124999761581421,
232
+ "rewards/chosen": 0.6836588978767395,
233
+ "rewards/margins": 1.282029390335083,
234
+ "rewards/rejected": -0.5983705520629883,
235
+ "step": 70
236
+ },
237
+ {
238
+ "epoch": 0.7009345794392523,
239
+ "grad_norm": 32.317612654080975,
240
+ "learning_rate": 8.960352239917699e-07,
241
+ "logits/chosen": -2.450084924697876,
242
+ "logits/rejected": -2.401425361633301,
243
+ "logps/chosen": -240.6315460205078,
244
+ "logps/rejected": -227.21084594726562,
245
+ "loss": 0.5154,
246
+ "rewards/accuracies": 0.78125,
247
+ "rewards/chosen": 0.6731350421905518,
248
+ "rewards/margins": 1.5562646389007568,
249
+ "rewards/rejected": -0.8831297755241394,
250
+ "step": 75
251
+ },
252
+ {
253
+ "epoch": 0.7476635514018691,
254
+ "grad_norm": 30.17721204804764,
255
+ "learning_rate": 8.801209650009814e-07,
256
+ "logits/chosen": -2.4172046184539795,
257
+ "logits/rejected": -2.400567054748535,
258
+ "logps/chosen": -245.4665985107422,
259
+ "logps/rejected": -214.18515014648438,
260
+ "loss": 0.5206,
261
+ "rewards/accuracies": 0.8187500238418579,
262
+ "rewards/chosen": 0.9583255052566528,
263
+ "rewards/margins": 1.7562158107757568,
264
+ "rewards/rejected": -0.7978904843330383,
265
+ "step": 80
266
+ },
267
+ {
268
+ "epoch": 0.794392523364486,
269
+ "grad_norm": 32.24535114623233,
270
+ "learning_rate": 8.632372045409141e-07,
271
+ "logits/chosen": -2.320589065551758,
272
+ "logits/rejected": -2.3311946392059326,
273
+ "logps/chosen": -245.598388671875,
274
+ "logps/rejected": -234.7646026611328,
275
+ "loss": 0.5472,
276
+ "rewards/accuracies": 0.78125,
277
+ "rewards/chosen": 1.1685658693313599,
278
+ "rewards/margins": 1.754003882408142,
279
+ "rewards/rejected": -0.5854381322860718,
280
+ "step": 85
281
+ },
282
+ {
283
+ "epoch": 0.8411214953271028,
284
+ "grad_norm": 34.061291659967246,
285
+ "learning_rate": 8.454270047735642e-07,
286
+ "logits/chosen": -2.329784870147705,
287
+ "logits/rejected": -2.304997682571411,
288
+ "logps/chosen": -238.0483856201172,
289
+ "logps/rejected": -195.24313354492188,
290
+ "loss": 0.5291,
291
+ "rewards/accuracies": 0.8125,
292
+ "rewards/chosen": 0.6335947513580322,
293
+ "rewards/margins": 1.5654070377349854,
294
+ "rewards/rejected": -0.9318124055862427,
295
+ "step": 90
296
+ },
297
+ {
298
+ "epoch": 0.8878504672897196,
299
+ "grad_norm": 31.735542564732725,
300
+ "learning_rate": 8.267357907515661e-07,
301
+ "logits/chosen": -2.298316478729248,
302
+ "logits/rejected": -2.2975010871887207,
303
+ "logps/chosen": -246.3526153564453,
304
+ "logps/rejected": -206.03524780273438,
305
+ "loss": 0.5418,
306
+ "rewards/accuracies": 0.7562500238418579,
307
+ "rewards/chosen": 0.5418449640274048,
308
+ "rewards/margins": 1.539952039718628,
309
+ "rewards/rejected": -0.9981070756912231,
310
+ "step": 95
311
+ },
312
+ {
313
+ "epoch": 0.9345794392523364,
314
+ "grad_norm": 32.65915062987667,
315
+ "learning_rate": 8.072112345612433e-07,
316
+ "logits/chosen": -2.2663910388946533,
317
+ "logits/rejected": -2.218681812286377,
318
+ "logps/chosen": -246.5704803466797,
319
+ "logps/rejected": -218.6560821533203,
320
+ "loss": 0.4997,
321
+ "rewards/accuracies": 0.8125,
322
+ "rewards/chosen": 0.8927062153816223,
323
+ "rewards/margins": 2.236736297607422,
324
+ "rewards/rejected": -1.3440301418304443,
325
+ "step": 100
326
+ },
327
+ {
328
+ "epoch": 0.9345794392523364,
329
+ "eval_logits/chosen": -2.2007782459259033,
330
+ "eval_logits/rejected": -2.177567720413208,
331
+ "eval_logps/chosen": -247.5850067138672,
332
+ "eval_logps/rejected": -224.4142608642578,
333
+ "eval_loss": 0.5101521015167236,
334
+ "eval_rewards/accuracies": 0.7864583134651184,
335
+ "eval_rewards/chosen": 0.8652558326721191,
336
+ "eval_rewards/margins": 1.7787574529647827,
337
+ "eval_rewards/rejected": -0.9135015606880188,
338
+ "eval_runtime": 201.8599,
339
+ "eval_samples_per_second": 15.06,
340
+ "eval_steps_per_second": 0.238,
341
+ "step": 100
342
+ }
343
+ ],
344
+ "logging_steps": 5,
345
+ "max_steps": 321,
346
+ "num_input_tokens_seen": 0,
347
+ "num_train_epochs": 3,
348
+ "save_steps": 100,
349
+ "stateful_callbacks": {
350
+ "TrainerControl": {
351
+ "args": {
352
+ "should_epoch_stop": false,
353
+ "should_evaluate": false,
354
+ "should_log": false,
355
+ "should_save": true,
356
+ "should_training_stop": false
357
+ },
358
+ "attributes": {}
359
+ }
360
+ },
361
+ "total_flos": 1178822762299392.0,
362
+ "train_batch_size": 8,
363
+ "trial_name": null,
364
+ "trial_params": null
365
+ }
checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18434785ed5c1cf7b24cfe9bc32bfda4c423eb14a3664f74540e373b8660d0e
3
+ size 7096
checkpoint-200/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image>": 32000,
3
+ "<pad>": 32001
4
+ }
checkpoint-200/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/align-anything/hantao/models/llava-v1.6-mistral-7b-hf",
3
+ "architectures": [
4
+ "LlavaNextForConditionalGeneration"
5
+ ],
6
+ "hidden_size": 4096,
7
+ "ignore_index": -100,
8
+ "image_grid_pinpoints": [
9
+ [
10
+ 336,
11
+ 672
12
+ ],
13
+ [
14
+ 672,
15
+ 336
16
+ ],
17
+ [
18
+ 672,
19
+ 672
20
+ ],
21
+ [
22
+ 1008,
23
+ 336
24
+ ],
25
+ [
26
+ 336,
27
+ 1008
28
+ ]
29
+ ],
30
+ "image_seq_length": 576,
31
+ "image_token_index": 32000,
32
+ "model_type": "llava_next",
33
+ "projector_hidden_act": "gelu",
34
+ "text_config": {
35
+ "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
36
+ "architectures": [
37
+ "MistralForCausalLM"
38
+ ],
39
+ "intermediate_size": 14336,
40
+ "max_position_embeddings": 32768,
41
+ "model_type": "mistral",
42
+ "num_key_value_heads": 8,
43
+ "rms_norm_eps": 1e-05,
44
+ "rope_theta": 1000000.0,
45
+ "sliding_window": null,
46
+ "torch_dtype": "bfloat16",
47
+ "vocab_size": 32064
48
+ },
49
+ "tie_word_embeddings": false,
50
+ "torch_dtype": "bfloat16",
51
+ "transformers_version": "4.45.2",
52
+ "use_cache": false,
53
+ "use_image_newline_parameter": true,
54
+ "vision_config": {
55
+ "hidden_size": 1024,
56
+ "image_size": 336,
57
+ "intermediate_size": 4096,
58
+ "model_type": "clip_vision_model",
59
+ "num_attention_heads": 16,
60
+ "num_hidden_layers": 24,
61
+ "patch_size": 14,
62
+ "projection_dim": 768,
63
+ "vocab_size": 32000
64
+ },
65
+ "vision_feature_layer": -2,
66
+ "vision_feature_select_strategy": "default",
67
+ "vocab_size": 32064
68
+ }
checkpoint-200/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.45.2"
6
+ }
checkpoint-200/model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52e9d2718fea7b17054962db5f9b2158c82e94db2356b5b3f8615c14e6c0d68c
3
+ size 4921618624
checkpoint-200/model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c2955ee8f2a2cfb5e5870be12b5d79e984454e4a7ab80b81cb06536dffd2ff3
3
+ size 4915917672
checkpoint-200/model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a18b10c1dd5b3d2cd4df5df8dd1b01d3698e2a46d808690ff4ff93d960c6bf2a
3
+ size 4915917680
checkpoint-200/model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d18ec63ae11edae236265752d6362562f2d5609f3b4722ed67d7db62fdef1a6
3
+ size 380134008
checkpoint-200/model.safetensors.index.json ADDED
@@ -0,0 +1,694 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 15133495296
4
+ },
5
+ "weight_map": {
6
+ "image_newline": "model-00001-of-00004.safetensors",
7
+ "language_model.lm_head.weight": "model-00004-of-00004.safetensors",
8
+ "language_model.model.embed_tokens.weight": "model-00001-of-00004.safetensors",
9
+ "language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
10
+ "language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
11
+ "language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
12
+ "language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
13
+ "language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
14
+ "language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
15
+ "language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
16
+ "language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
17
+ "language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
18
+ "language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
19
+ "language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
20
+ "language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
21
+ "language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
22
+ "language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
23
+ "language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
24
+ "language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
25
+ "language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
26
+ "language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
27
+ "language_model.model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
28
+ "language_model.model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
29
+ "language_model.model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
30
+ "language_model.model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
31
+ "language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
32
+ "language_model.model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
33
+ "language_model.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
34
+ "language_model.model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
35
+ "language_model.model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
36
+ "language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
37
+ "language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
38
+ "language_model.model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
39
+ "language_model.model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
40
+ "language_model.model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
41
+ "language_model.model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
42
+ "language_model.model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
43
+ "language_model.model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
44
+ "language_model.model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
45
+ "language_model.model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
46
+ "language_model.model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
47
+ "language_model.model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
48
+ "language_model.model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
49
+ "language_model.model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
50
+ "language_model.model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
51
+ "language_model.model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
52
+ "language_model.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
53
+ "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
54
+ "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
55
+ "language_model.model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
56
+ "language_model.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
57
+ "language_model.model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
58
+ "language_model.model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
59
+ "language_model.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
60
+ "language_model.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
61
+ "language_model.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
62
+ "language_model.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
63
+ "language_model.model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
64
+ "language_model.model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
65
+ "language_model.model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
66
+ "language_model.model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
67
+ "language_model.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
68
+ "language_model.model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
69
+ "language_model.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
70
+ "language_model.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
71
+ "language_model.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
72
+ "language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
73
+ "language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
74
+ "language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
75
+ "language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
76
+ "language_model.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
77
+ "language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
78
+ "language_model.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
79
+ "language_model.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
80
+ "language_model.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
81
+ "language_model.model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
82
+ "language_model.model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
83
+ "language_model.model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
84
+ "language_model.model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
85
+ "language_model.model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
86
+ "language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
87
+ "language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
88
+ "language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
89
+ "language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
90
+ "language_model.model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
91
+ "language_model.model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
92
+ "language_model.model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
93
+ "language_model.model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
94
+ "language_model.model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
95
+ "language_model.model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
96
+ "language_model.model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
97
+ "language_model.model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
98
+ "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
99
+ "language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
100
+ "language_model.model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
101
+ "language_model.model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
102
+ "language_model.model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
103
+ "language_model.model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
104
+ "language_model.model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
105
+ "language_model.model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
106
+ "language_model.model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
107
+ "language_model.model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
108
+ "language_model.model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
109
+ "language_model.model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
110
+ "language_model.model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
111
+ "language_model.model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
112
+ "language_model.model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
113
+ "language_model.model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
114
+ "language_model.model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
115
+ "language_model.model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
116
+ "language_model.model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
117
+ "language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
118
+ "language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
119
+ "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
120
+ "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
121
+ "language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
122
+ "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
123
+ "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
124
+ "language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
125
+ "language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
126
+ "language_model.model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
127
+ "language_model.model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
128
+ "language_model.model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
129
+ "language_model.model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
130
+ "language_model.model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
131
+ "language_model.model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
132
+ "language_model.model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
133
+ "language_model.model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
134
+ "language_model.model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
135
+ "language_model.model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
136
+ "language_model.model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
137
+ "language_model.model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
138
+ "language_model.model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
139
+ "language_model.model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
140
+ "language_model.model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
141
+ "language_model.model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
142
+ "language_model.model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
143
+ "language_model.model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
144
+ "language_model.model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
145
+ "language_model.model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
146
+ "language_model.model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
147
+ "language_model.model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
148
+ "language_model.model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
149
+ "language_model.model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
150
+ "language_model.model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
151
+ "language_model.model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
152
+ "language_model.model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
153
+ "language_model.model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
154
+ "language_model.model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
155
+ "language_model.model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
156
+ "language_model.model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
157
+ "language_model.model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
158
+ "language_model.model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
159
+ "language_model.model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
160
+ "language_model.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
161
+ "language_model.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
162
+ "language_model.model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
163
+ "language_model.model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
164
+ "language_model.model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
165
+ "language_model.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
166
+ "language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
167
+ "language_model.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
168
+ "language_model.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
169
+ "language_model.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
170
+ "language_model.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
171
+ "language_model.model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
172
+ "language_model.model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
173
+ "language_model.model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
174
+ "language_model.model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
175
+ "language_model.model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
176
+ "language_model.model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
177
+ "language_model.model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
178
+ "language_model.model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
179
+ "language_model.model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
180
+ "language_model.model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
181
+ "language_model.model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
182
+ "language_model.model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
183
+ "language_model.model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
184
+ "language_model.model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
185
+ "language_model.model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
186
+ "language_model.model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
187
+ "language_model.model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
188
+ "language_model.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
189
+ "language_model.model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
190
+ "language_model.model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
191
+ "language_model.model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
192
+ "language_model.model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
193
+ "language_model.model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
194
+ "language_model.model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
195
+ "language_model.model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
196
+ "language_model.model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
197
+ "language_model.model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
198
+ "language_model.model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
199
+ "language_model.model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
200
+ "language_model.model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
201
+ "language_model.model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
202
+ "language_model.model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
203
+ "language_model.model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
204
+ "language_model.model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
205
+ "language_model.model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
206
+ "language_model.model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
207
+ "language_model.model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
208
+ "language_model.model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
209
+ "language_model.model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
210
+ "language_model.model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
211
+ "language_model.model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
212
+ "language_model.model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
213
+ "language_model.model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
214
+ "language_model.model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
215
+ "language_model.model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
216
+ "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
217
+ "language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
218
+ "language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
219
+ "language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
220
+ "language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
221
+ "language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
222
+ "language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
223
+ "language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
224
+ "language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
225
+ "language_model.model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
226
+ "language_model.model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
227
+ "language_model.model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
228
+ "language_model.model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
229
+ "language_model.model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
230
+ "language_model.model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
231
+ "language_model.model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
232
+ "language_model.model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
233
+ "language_model.model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
234
+ "language_model.model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
235
+ "language_model.model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
236
+ "language_model.model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
237
+ "language_model.model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
238
+ "language_model.model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
239
+ "language_model.model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
240
+ "language_model.model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
241
+ "language_model.model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
242
+ "language_model.model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
243
+ "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
244
+ "language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
245
+ "language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
246
+ "language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
247
+ "language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
248
+ "language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
249
+ "language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
250
+ "language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
251
+ "language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
252
+ "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
253
+ "language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
254
+ "language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
255
+ "language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
256
+ "language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
257
+ "language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
258
+ "language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
259
+ "language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
260
+ "language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
261
+ "language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
262
+ "language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
263
+ "language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
264
+ "language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
265
+ "language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
266
+ "language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
267
+ "language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
268
+ "language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
269
+ "language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
270
+ "language_model.model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
271
+ "language_model.model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
272
+ "language_model.model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
273
+ "language_model.model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
274
+ "language_model.model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
275
+ "language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
276
+ "language_model.model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
277
+ "language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
278
+ "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
279
+ "language_model.model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
280
+ "language_model.model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
281
+ "language_model.model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
282
+ "language_model.model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
283
+ "language_model.model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
284
+ "language_model.model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
285
+ "language_model.model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
286
+ "language_model.model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
287
+ "language_model.model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
288
+ "language_model.model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
289
+ "language_model.model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
290
+ "language_model.model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
291
+ "language_model.model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
292
+ "language_model.model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
293
+ "language_model.model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
294
+ "language_model.model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
295
+ "language_model.model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
296
+ "language_model.model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
297
+ "language_model.model.norm.weight": "model-00004-of-00004.safetensors",
298
+ "multi_modal_projector.linear_1.bias": "model-00001-of-00004.safetensors",
299
+ "multi_modal_projector.linear_1.weight": "model-00001-of-00004.safetensors",
300
+ "multi_modal_projector.linear_2.bias": "model-00001-of-00004.safetensors",
301
+ "multi_modal_projector.linear_2.weight": "model-00001-of-00004.safetensors",
302
+ "vision_tower.vision_model.embeddings.class_embedding": "model-00001-of-00004.safetensors",
303
+ "vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00001-of-00004.safetensors",
304
+ "vision_tower.vision_model.embeddings.position_embedding.weight": "model-00001-of-00004.safetensors",
305
+ "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00001-of-00004.safetensors",
306
+ "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00001-of-00004.safetensors",
307
+ "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00001-of-00004.safetensors",
308
+ "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00001-of-00004.safetensors",
309
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00001-of-00004.safetensors",
310
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00001-of-00004.safetensors",
311
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00001-of-00004.safetensors",
312
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00001-of-00004.safetensors",
313
+ "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
314
+ "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
315
+ "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
316
+ "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
317
+ "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
318
+ "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
319
+ "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
320
+ "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
321
+ "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00001-of-00004.safetensors",
322
+ "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00001-of-00004.safetensors",
323
+ "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00001-of-00004.safetensors",
324
+ "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00001-of-00004.safetensors",
325
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00001-of-00004.safetensors",
326
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00001-of-00004.safetensors",
327
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00001-of-00004.safetensors",
328
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00001-of-00004.safetensors",
329
+ "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
330
+ "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
331
+ "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
332
+ "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
333
+ "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
334
+ "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
335
+ "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
336
+ "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
337
+ "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00001-of-00004.safetensors",
338
+ "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00001-of-00004.safetensors",
339
+ "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00001-of-00004.safetensors",
340
+ "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00001-of-00004.safetensors",
341
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00001-of-00004.safetensors",
342
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00001-of-00004.safetensors",
343
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00001-of-00004.safetensors",
344
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00001-of-00004.safetensors",
345
+ "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
346
+ "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
347
+ "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
348
+ "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
349
+ "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
350
+ "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
351
+ "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
352
+ "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
353
+ "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00001-of-00004.safetensors",
354
+ "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00001-of-00004.safetensors",
355
+ "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00001-of-00004.safetensors",
356
+ "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00001-of-00004.safetensors",
357
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00001-of-00004.safetensors",
358
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00001-of-00004.safetensors",
359
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00001-of-00004.safetensors",
360
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00001-of-00004.safetensors",
361
+ "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
362
+ "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
363
+ "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
364
+ "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
365
+ "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
366
+ "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
367
+ "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
368
+ "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
369
+ "vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00001-of-00004.safetensors",
370
+ "vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00001-of-00004.safetensors",
371
+ "vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00001-of-00004.safetensors",
372
+ "vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00001-of-00004.safetensors",
373
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00001-of-00004.safetensors",
374
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00001-of-00004.safetensors",
375
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00001-of-00004.safetensors",
376
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00001-of-00004.safetensors",
377
+ "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
378
+ "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
379
+ "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
380
+ "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
381
+ "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
382
+ "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
383
+ "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
384
+ "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
385
+ "vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00001-of-00004.safetensors",
386
+ "vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00001-of-00004.safetensors",
387
+ "vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00001-of-00004.safetensors",
388
+ "vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00001-of-00004.safetensors",
389
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00001-of-00004.safetensors",
390
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00001-of-00004.safetensors",
391
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00001-of-00004.safetensors",
392
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00001-of-00004.safetensors",
393
+ "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
394
+ "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
395
+ "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
396
+ "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
397
+ "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
398
+ "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
399
+ "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
400
+ "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
401
+ "vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00001-of-00004.safetensors",
402
+ "vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00001-of-00004.safetensors",
403
+ "vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00001-of-00004.safetensors",
404
+ "vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00001-of-00004.safetensors",
405
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00001-of-00004.safetensors",
406
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00001-of-00004.safetensors",
407
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00001-of-00004.safetensors",
408
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00001-of-00004.safetensors",
409
+ "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
410
+ "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
411
+ "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
412
+ "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
413
+ "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
414
+ "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
415
+ "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
416
+ "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
417
+ "vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00001-of-00004.safetensors",
418
+ "vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00001-of-00004.safetensors",
419
+ "vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00001-of-00004.safetensors",
420
+ "vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00001-of-00004.safetensors",
421
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00001-of-00004.safetensors",
422
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00001-of-00004.safetensors",
423
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00001-of-00004.safetensors",
424
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00001-of-00004.safetensors",
425
+ "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
426
+ "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
427
+ "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
428
+ "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
429
+ "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
430
+ "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
431
+ "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
432
+ "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
433
+ "vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00001-of-00004.safetensors",
434
+ "vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00001-of-00004.safetensors",
435
+ "vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00001-of-00004.safetensors",
436
+ "vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00001-of-00004.safetensors",
437
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00001-of-00004.safetensors",
438
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00001-of-00004.safetensors",
439
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00001-of-00004.safetensors",
440
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00001-of-00004.safetensors",
441
+ "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
442
+ "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
443
+ "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
444
+ "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
445
+ "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
446
+ "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
447
+ "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
448
+ "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
449
+ "vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00001-of-00004.safetensors",
450
+ "vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00001-of-00004.safetensors",
451
+ "vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00001-of-00004.safetensors",
452
+ "vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00001-of-00004.safetensors",
453
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00001-of-00004.safetensors",
454
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00001-of-00004.safetensors",
455
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00001-of-00004.safetensors",
456
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00001-of-00004.safetensors",
457
+ "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
458
+ "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
459
+ "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
460
+ "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
461
+ "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
462
+ "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
463
+ "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
464
+ "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
465
+ "vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00001-of-00004.safetensors",
466
+ "vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00001-of-00004.safetensors",
467
+ "vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00001-of-00004.safetensors",
468
+ "vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00001-of-00004.safetensors",
469
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00001-of-00004.safetensors",
470
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00001-of-00004.safetensors",
471
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00001-of-00004.safetensors",
472
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00001-of-00004.safetensors",
473
+ "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
474
+ "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
475
+ "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
476
+ "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
477
+ "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
478
+ "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
479
+ "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
480
+ "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
481
+ "vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00001-of-00004.safetensors",
482
+ "vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00001-of-00004.safetensors",
483
+ "vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00001-of-00004.safetensors",
484
+ "vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00001-of-00004.safetensors",
485
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00001-of-00004.safetensors",
486
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00001-of-00004.safetensors",
487
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00001-of-00004.safetensors",
488
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00001-of-00004.safetensors",
489
+ "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
490
+ "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
491
+ "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
492
+ "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
493
+ "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
494
+ "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
495
+ "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
496
+ "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
497
+ "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00001-of-00004.safetensors",
498
+ "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00001-of-00004.safetensors",
499
+ "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00001-of-00004.safetensors",
500
+ "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00001-of-00004.safetensors",
501
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00001-of-00004.safetensors",
502
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00001-of-00004.safetensors",
503
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00001-of-00004.safetensors",
504
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00001-of-00004.safetensors",
505
+ "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
506
+ "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
507
+ "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
508
+ "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
509
+ "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
510
+ "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
511
+ "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
512
+ "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
513
+ "vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00001-of-00004.safetensors",
514
+ "vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00001-of-00004.safetensors",
515
+ "vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00001-of-00004.safetensors",
516
+ "vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00001-of-00004.safetensors",
517
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00001-of-00004.safetensors",
518
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00001-of-00004.safetensors",
519
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00001-of-00004.safetensors",
520
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00001-of-00004.safetensors",
521
+ "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
522
+ "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
523
+ "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
524
+ "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
525
+ "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
526
+ "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
527
+ "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
528
+ "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
529
+ "vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00001-of-00004.safetensors",
530
+ "vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00001-of-00004.safetensors",
531
+ "vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00001-of-00004.safetensors",
532
+ "vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00001-of-00004.safetensors",
533
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00001-of-00004.safetensors",
534
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00001-of-00004.safetensors",
535
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00001-of-00004.safetensors",
536
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00001-of-00004.safetensors",
537
+ "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
538
+ "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
539
+ "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
540
+ "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
541
+ "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
542
+ "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
543
+ "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
544
+ "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
545
+ "vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00001-of-00004.safetensors",
546
+ "vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00001-of-00004.safetensors",
547
+ "vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00001-of-00004.safetensors",
548
+ "vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00001-of-00004.safetensors",
549
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00001-of-00004.safetensors",
550
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00001-of-00004.safetensors",
551
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00001-of-00004.safetensors",
552
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00001-of-00004.safetensors",
553
+ "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
554
+ "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
555
+ "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
556
+ "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
557
+ "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
558
+ "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
559
+ "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
560
+ "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
561
+ "vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00001-of-00004.safetensors",
562
+ "vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00001-of-00004.safetensors",
563
+ "vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00001-of-00004.safetensors",
564
+ "vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00001-of-00004.safetensors",
565
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00001-of-00004.safetensors",
566
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00001-of-00004.safetensors",
567
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00001-of-00004.safetensors",
568
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00001-of-00004.safetensors",
569
+ "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
570
+ "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
571
+ "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
572
+ "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
573
+ "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
574
+ "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
575
+ "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
576
+ "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
577
+ "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00001-of-00004.safetensors",
578
+ "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00001-of-00004.safetensors",
579
+ "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00001-of-00004.safetensors",
580
+ "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00001-of-00004.safetensors",
581
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00001-of-00004.safetensors",
582
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00001-of-00004.safetensors",
583
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00001-of-00004.safetensors",
584
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00001-of-00004.safetensors",
585
+ "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
586
+ "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
587
+ "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
588
+ "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
589
+ "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
590
+ "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
591
+ "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
592
+ "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
593
+ "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00001-of-00004.safetensors",
594
+ "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00001-of-00004.safetensors",
595
+ "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00001-of-00004.safetensors",
596
+ "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00001-of-00004.safetensors",
597
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00001-of-00004.safetensors",
598
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00001-of-00004.safetensors",
599
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00001-of-00004.safetensors",
600
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00001-of-00004.safetensors",
601
+ "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
602
+ "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
603
+ "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
604
+ "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
605
+ "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
606
+ "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
607
+ "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
608
+ "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
609
+ "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00001-of-00004.safetensors",
610
+ "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00001-of-00004.safetensors",
611
+ "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00001-of-00004.safetensors",
612
+ "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00001-of-00004.safetensors",
613
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00001-of-00004.safetensors",
614
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00001-of-00004.safetensors",
615
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00001-of-00004.safetensors",
616
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00001-of-00004.safetensors",
617
+ "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
618
+ "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
619
+ "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
620
+ "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
621
+ "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
622
+ "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
623
+ "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
624
+ "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
625
+ "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00001-of-00004.safetensors",
626
+ "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00001-of-00004.safetensors",
627
+ "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00001-of-00004.safetensors",
628
+ "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00001-of-00004.safetensors",
629
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00001-of-00004.safetensors",
630
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00001-of-00004.safetensors",
631
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00001-of-00004.safetensors",
632
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00001-of-00004.safetensors",
633
+ "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
634
+ "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
635
+ "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
636
+ "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
637
+ "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
638
+ "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
639
+ "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
640
+ "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
641
+ "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00001-of-00004.safetensors",
642
+ "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00001-of-00004.safetensors",
643
+ "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00001-of-00004.safetensors",
644
+ "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00001-of-00004.safetensors",
645
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00001-of-00004.safetensors",
646
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00001-of-00004.safetensors",
647
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00001-of-00004.safetensors",
648
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00001-of-00004.safetensors",
649
+ "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
650
+ "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
651
+ "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
652
+ "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
653
+ "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
654
+ "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
655
+ "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
656
+ "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
657
+ "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00001-of-00004.safetensors",
658
+ "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00001-of-00004.safetensors",
659
+ "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00001-of-00004.safetensors",
660
+ "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00001-of-00004.safetensors",
661
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00001-of-00004.safetensors",
662
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00001-of-00004.safetensors",
663
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00001-of-00004.safetensors",
664
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00001-of-00004.safetensors",
665
+ "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
666
+ "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
667
+ "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
668
+ "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
669
+ "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
670
+ "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
671
+ "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
672
+ "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
673
+ "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00001-of-00004.safetensors",
674
+ "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00001-of-00004.safetensors",
675
+ "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00001-of-00004.safetensors",
676
+ "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00001-of-00004.safetensors",
677
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00001-of-00004.safetensors",
678
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00001-of-00004.safetensors",
679
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00001-of-00004.safetensors",
680
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00001-of-00004.safetensors",
681
+ "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
682
+ "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
683
+ "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
684
+ "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
685
+ "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
686
+ "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
687
+ "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
688
+ "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
689
+ "vision_tower.vision_model.post_layernorm.bias": "model-00001-of-00004.safetensors",
690
+ "vision_tower.vision_model.post_layernorm.weight": "model-00001-of-00004.safetensors",
691
+ "vision_tower.vision_model.pre_layrnorm.bias": "model-00001-of-00004.safetensors",
692
+ "vision_tower.vision_model.pre_layrnorm.weight": "model-00001-of-00004.safetensors"
693
+ }
694
+ }
checkpoint-200/preprocessor_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aspect_ratio_setting": "anyres",
3
+ "crop_size": {
4
+ "height": 336,
5
+ "width": 336
6
+ },
7
+ "do_center_crop": true,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": true,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_grid_pinpoints": [
14
+ [
15
+ 336,
16
+ 672
17
+ ],
18
+ [
19
+ 672,
20
+ 336
21
+ ],
22
+ [
23
+ 672,
24
+ 672
25
+ ],
26
+ [
27
+ 1008,
28
+ 336
29
+ ],
30
+ [
31
+ 336,
32
+ 1008
33
+ ]
34
+ ],
35
+ "image_mean": [
36
+ 0.48145466,
37
+ 0.4578275,
38
+ 0.40821073
39
+ ],
40
+ "image_processor_type": "LlavaNextImageProcessor",
41
+ "image_std": [
42
+ 0.26862954,
43
+ 0.26130258,
44
+ 0.27577711
45
+ ],
46
+ "processor_class": "LlavaNextProcessor",
47
+ "resample": 3,
48
+ "rescale_factor": 0.00392156862745098,
49
+ "size": {
50
+ "shortest_edge": 336
51
+ }
52
+ }
checkpoint-200/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoint-200/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-200/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoint-200/tokenizer_config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<image>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<pad>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ }
46
+ },
47
+ "additional_special_tokens": [],
48
+ "bos_token": "<s>",
49
+ "chat_template": "{{ '<s>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '`[INST] `' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
50
+ "clean_up_tokenization_spaces": false,
51
+ "eos_token": "</s>",
52
+ "extra_special_tokens": {
53
+ "image_token": "<image>"
54
+ },
55
+ "image_token": "<image>",
56
+ "legacy": true,
57
+ "max_length": null,
58
+ "model_max_length": 1000000000000000019884624838656,
59
+ "pad_to_multiple_of": null,
60
+ "pad_token": "<pad>",
61
+ "pad_token_type_id": 0,
62
+ "padding_side": "right",
63
+ "processor_class": "LlavaNextProcessor",
64
+ "sp_model_kwargs": {},
65
+ "spaces_between_special_tokens": false,
66
+ "split_special_tokens": false,
67
+ "tokenizer_class": "LlamaTokenizer",
68
+ "unk_token": "<unk>",
69
+ "use_default_system_prompt": false
70
+ }
checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,697 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.8691588785046729,
5
+ "eval_steps": 50,
6
+ "global_step": 200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04672897196261682,
13
+ "grad_norm": 55.02521133123827,
14
+ "learning_rate": 5e-07,
15
+ "logits/chosen": -2.7216238975524902,
16
+ "logits/rejected": -2.7209055423736572,
17
+ "logps/chosen": -268.4510192871094,
18
+ "logps/rejected": -203.9590606689453,
19
+ "loss": 0.6914,
20
+ "rewards/accuracies": 0.33125001192092896,
21
+ "rewards/chosen": 0.017813727259635925,
22
+ "rewards/margins": 0.009322145953774452,
23
+ "rewards/rejected": 0.008491581305861473,
24
+ "step": 5
25
+ },
26
+ {
27
+ "epoch": 0.09345794392523364,
28
+ "grad_norm": 50.12554517439661,
29
+ "learning_rate": 1e-06,
30
+ "logits/chosen": -2.660832166671753,
31
+ "logits/rejected": -2.6669700145721436,
32
+ "logps/chosen": -256.93609619140625,
33
+ "logps/rejected": -211.64938354492188,
34
+ "loss": 0.6467,
35
+ "rewards/accuracies": 0.6812499761581421,
36
+ "rewards/chosen": 0.4671781659126282,
37
+ "rewards/margins": 0.18480566143989563,
38
+ "rewards/rejected": 0.28237253427505493,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.14018691588785046,
43
+ "grad_norm": 48.457281147169425,
44
+ "learning_rate": 9.993623730611148e-07,
45
+ "logits/chosen": -2.494657278060913,
46
+ "logits/rejected": -2.5133018493652344,
47
+ "logps/chosen": -232.6891632080078,
48
+ "logps/rejected": -212.8677215576172,
49
+ "loss": 0.6204,
50
+ "rewards/accuracies": 0.6625000238418579,
51
+ "rewards/chosen": 1.2435152530670166,
52
+ "rewards/margins": 0.6126660108566284,
53
+ "rewards/rejected": 0.6308490037918091,
54
+ "step": 15
55
+ },
56
+ {
57
+ "epoch": 0.18691588785046728,
58
+ "grad_norm": 42.23668651632898,
59
+ "learning_rate": 9.97451118516912e-07,
60
+ "logits/chosen": -2.3121209144592285,
61
+ "logits/rejected": -2.302377462387085,
62
+ "logps/chosen": -234.3399658203125,
63
+ "logps/rejected": -191.3181610107422,
64
+ "loss": 0.6223,
65
+ "rewards/accuracies": 0.706250011920929,
66
+ "rewards/chosen": 1.3283790349960327,
67
+ "rewards/margins": 0.9071598052978516,
68
+ "rewards/rejected": 0.4212193489074707,
69
+ "step": 20
70
+ },
71
+ {
72
+ "epoch": 0.2336448598130841,
73
+ "grad_norm": 49.68505743626908,
74
+ "learning_rate": 9.94271111036929e-07,
75
+ "logits/chosen": -2.2619333267211914,
76
+ "logits/rejected": -2.2323482036590576,
77
+ "logps/chosen": -230.17385864257812,
78
+ "logps/rejected": -205.64108276367188,
79
+ "loss": 0.616,
80
+ "rewards/accuracies": 0.762499988079071,
81
+ "rewards/chosen": 1.3353136777877808,
82
+ "rewards/margins": 1.208017110824585,
83
+ "rewards/rejected": 0.127296581864357,
84
+ "step": 25
85
+ },
86
+ {
87
+ "epoch": 0.2803738317757009,
88
+ "grad_norm": 39.50573463077347,
89
+ "learning_rate": 9.898304612549066e-07,
90
+ "logits/chosen": -2.308243989944458,
91
+ "logits/rejected": -2.2968266010284424,
92
+ "logps/chosen": -241.26632690429688,
93
+ "logps/rejected": -207.3319854736328,
94
+ "loss": 0.5998,
95
+ "rewards/accuracies": 0.731249988079071,
96
+ "rewards/chosen": 0.9092999696731567,
97
+ "rewards/margins": 0.9556086659431458,
98
+ "rewards/rejected": -0.046308644115924835,
99
+ "step": 30
100
+ },
101
+ {
102
+ "epoch": 0.32710280373831774,
103
+ "grad_norm": 42.78736900246308,
104
+ "learning_rate": 9.841404950825536e-07,
105
+ "logits/chosen": -2.3728604316711426,
106
+ "logits/rejected": -2.3580102920532227,
107
+ "logps/chosen": -256.8548583984375,
108
+ "logps/rejected": -205.154052734375,
109
+ "loss": 0.5996,
110
+ "rewards/accuracies": 0.768750011920929,
111
+ "rewards/chosen": 1.0152027606964111,
112
+ "rewards/margins": 1.2137099504470825,
113
+ "rewards/rejected": -0.1985071450471878,
114
+ "step": 35
115
+ },
116
+ {
117
+ "epoch": 0.37383177570093457,
118
+ "grad_norm": 41.73908597429494,
119
+ "learning_rate": 9.77215724822721e-07,
120
+ "logits/chosen": -2.4492850303649902,
121
+ "logits/rejected": -2.4539356231689453,
122
+ "logps/chosen": -243.1707763671875,
123
+ "logps/rejected": -213.95166015625,
124
+ "loss": 0.6098,
125
+ "rewards/accuracies": 0.71875,
126
+ "rewards/chosen": 1.0353302955627441,
127
+ "rewards/margins": 1.2659428119659424,
128
+ "rewards/rejected": -0.2306123673915863,
129
+ "step": 40
130
+ },
131
+ {
132
+ "epoch": 0.4205607476635514,
133
+ "grad_norm": 41.530064757148224,
134
+ "learning_rate": 9.69073812155662e-07,
135
+ "logits/chosen": -2.5637125968933105,
136
+ "logits/rejected": -2.5535428524017334,
137
+ "logps/chosen": -244.7168731689453,
138
+ "logps/rejected": -205.80923461914062,
139
+ "loss": 0.5974,
140
+ "rewards/accuracies": 0.71875,
141
+ "rewards/chosen": 0.8133939504623413,
142
+ "rewards/margins": 0.9837163686752319,
143
+ "rewards/rejected": -0.17032238841056824,
144
+ "step": 45
145
+ },
146
+ {
147
+ "epoch": 0.4672897196261682,
148
+ "grad_norm": 38.26706141308248,
149
+ "learning_rate": 9.597355230927788e-07,
150
+ "logits/chosen": -2.5823917388916016,
151
+ "logits/rejected": -2.562842607498169,
152
+ "logps/chosen": -240.04067993164062,
153
+ "logps/rejected": -209.23428344726562,
154
+ "loss": 0.572,
155
+ "rewards/accuracies": 0.800000011920929,
156
+ "rewards/chosen": 0.9298027753829956,
157
+ "rewards/margins": 1.2456680536270142,
158
+ "rewards/rejected": -0.3158652186393738,
159
+ "step": 50
160
+ },
161
+ {
162
+ "epoch": 0.4672897196261682,
163
+ "eval_logits/chosen": -2.530949115753174,
164
+ "eval_logits/rejected": -2.529101610183716,
165
+ "eval_logps/chosen": -245.5291748046875,
166
+ "eval_logps/rejected": -217.46429443359375,
167
+ "eval_loss": 0.5720326900482178,
168
+ "eval_rewards/accuracies": 0.7578125,
169
+ "eval_rewards/chosen": 1.0708366632461548,
170
+ "eval_rewards/margins": 1.28933846950531,
171
+ "eval_rewards/rejected": -0.2185017466545105,
172
+ "eval_runtime": 202.2601,
173
+ "eval_samples_per_second": 15.03,
174
+ "eval_steps_per_second": 0.237,
175
+ "step": 50
176
+ },
177
+ {
178
+ "epoch": 0.514018691588785,
179
+ "grad_norm": 40.54073508413725,
180
+ "learning_rate": 9.4922467501275e-07,
181
+ "logits/chosen": -2.495945930480957,
182
+ "logits/rejected": -2.487422466278076,
183
+ "logps/chosen": -250.51620483398438,
184
+ "logps/rejected": -228.90200805664062,
185
+ "loss": 0.5176,
186
+ "rewards/accuracies": 0.7875000238418579,
187
+ "rewards/chosen": 1.0155770778656006,
188
+ "rewards/margins": 1.9236654043197632,
189
+ "rewards/rejected": -0.9080885648727417,
190
+ "step": 55
191
+ },
192
+ {
193
+ "epoch": 0.5607476635514018,
194
+ "grad_norm": 38.23797310786567,
195
+ "learning_rate": 9.375680759151206e-07,
196
+ "logits/chosen": -2.474236249923706,
197
+ "logits/rejected": -2.4737977981567383,
198
+ "logps/chosen": -255.09298706054688,
199
+ "logps/rejected": -200.73593139648438,
200
+ "loss": 0.5654,
201
+ "rewards/accuracies": 0.768750011920929,
202
+ "rewards/chosen": 1.0740002393722534,
203
+ "rewards/margins": 1.5434155464172363,
204
+ "rewards/rejected": -0.4694152772426605,
205
+ "step": 60
206
+ },
207
+ {
208
+ "epoch": 0.6074766355140186,
209
+ "grad_norm": 42.648181943788025,
210
+ "learning_rate": 9.247954560462927e-07,
211
+ "logits/chosen": -2.505916118621826,
212
+ "logits/rejected": -2.506608724594116,
213
+ "logps/chosen": -255.432861328125,
214
+ "logps/rejected": -205.4224090576172,
215
+ "loss": 0.5628,
216
+ "rewards/accuracies": 0.8062499761581421,
217
+ "rewards/chosen": 1.1411590576171875,
218
+ "rewards/margins": 1.7762504816055298,
219
+ "rewards/rejected": -0.6350914239883423,
220
+ "step": 65
221
+ },
222
+ {
223
+ "epoch": 0.6542056074766355,
224
+ "grad_norm": 38.81572593341751,
225
+ "learning_rate": 9.109393920723001e-07,
226
+ "logits/chosen": -2.4328043460845947,
227
+ "logits/rejected": -2.4342734813690186,
228
+ "logps/chosen": -233.8389129638672,
229
+ "logps/rejected": -212.91085815429688,
230
+ "loss": 0.5378,
231
+ "rewards/accuracies": 0.7124999761581421,
232
+ "rewards/chosen": 0.6836588978767395,
233
+ "rewards/margins": 1.282029390335083,
234
+ "rewards/rejected": -0.5983705520629883,
235
+ "step": 70
236
+ },
237
+ {
238
+ "epoch": 0.7009345794392523,
239
+ "grad_norm": 32.317612654080975,
240
+ "learning_rate": 8.960352239917699e-07,
241
+ "logits/chosen": -2.450084924697876,
242
+ "logits/rejected": -2.401425361633301,
243
+ "logps/chosen": -240.6315460205078,
244
+ "logps/rejected": -227.21084594726562,
245
+ "loss": 0.5154,
246
+ "rewards/accuracies": 0.78125,
247
+ "rewards/chosen": 0.6731350421905518,
248
+ "rewards/margins": 1.5562646389007568,
249
+ "rewards/rejected": -0.8831297755241394,
250
+ "step": 75
251
+ },
252
+ {
253
+ "epoch": 0.7476635514018691,
254
+ "grad_norm": 30.17721204804764,
255
+ "learning_rate": 8.801209650009814e-07,
256
+ "logits/chosen": -2.4172046184539795,
257
+ "logits/rejected": -2.400567054748535,
258
+ "logps/chosen": -245.4665985107422,
259
+ "logps/rejected": -214.18515014648438,
260
+ "loss": 0.5206,
261
+ "rewards/accuracies": 0.8187500238418579,
262
+ "rewards/chosen": 0.9583255052566528,
263
+ "rewards/margins": 1.7562158107757568,
264
+ "rewards/rejected": -0.7978904843330383,
265
+ "step": 80
266
+ },
267
+ {
268
+ "epoch": 0.794392523364486,
269
+ "grad_norm": 32.24535114623233,
270
+ "learning_rate": 8.632372045409141e-07,
271
+ "logits/chosen": -2.320589065551758,
272
+ "logits/rejected": -2.3311946392059326,
273
+ "logps/chosen": -245.598388671875,
274
+ "logps/rejected": -234.7646026611328,
275
+ "loss": 0.5472,
276
+ "rewards/accuracies": 0.78125,
277
+ "rewards/chosen": 1.1685658693313599,
278
+ "rewards/margins": 1.754003882408142,
279
+ "rewards/rejected": -0.5854381322860718,
280
+ "step": 85
281
+ },
282
+ {
283
+ "epoch": 0.8411214953271028,
284
+ "grad_norm": 34.061291659967246,
285
+ "learning_rate": 8.454270047735642e-07,
286
+ "logits/chosen": -2.329784870147705,
287
+ "logits/rejected": -2.304997682571411,
288
+ "logps/chosen": -238.0483856201172,
289
+ "logps/rejected": -195.24313354492188,
290
+ "loss": 0.5291,
291
+ "rewards/accuracies": 0.8125,
292
+ "rewards/chosen": 0.6335947513580322,
293
+ "rewards/margins": 1.5654070377349854,
294
+ "rewards/rejected": -0.9318124055862427,
295
+ "step": 90
296
+ },
297
+ {
298
+ "epoch": 0.8878504672897196,
299
+ "grad_norm": 31.735542564732725,
300
+ "learning_rate": 8.267357907515661e-07,
301
+ "logits/chosen": -2.298316478729248,
302
+ "logits/rejected": -2.2975010871887207,
303
+ "logps/chosen": -246.3526153564453,
304
+ "logps/rejected": -206.03524780273438,
305
+ "loss": 0.5418,
306
+ "rewards/accuracies": 0.7562500238418579,
307
+ "rewards/chosen": 0.5418449640274048,
308
+ "rewards/margins": 1.539952039718628,
309
+ "rewards/rejected": -0.9981070756912231,
310
+ "step": 95
311
+ },
312
+ {
313
+ "epoch": 0.9345794392523364,
314
+ "grad_norm": 32.65915062987667,
315
+ "learning_rate": 8.072112345612433e-07,
316
+ "logits/chosen": -2.2663910388946533,
317
+ "logits/rejected": -2.218681812286377,
318
+ "logps/chosen": -246.5704803466797,
319
+ "logps/rejected": -218.6560821533203,
320
+ "loss": 0.4997,
321
+ "rewards/accuracies": 0.8125,
322
+ "rewards/chosen": 0.8927062153816223,
323
+ "rewards/margins": 2.236736297607422,
324
+ "rewards/rejected": -1.3440301418304443,
325
+ "step": 100
326
+ },
327
+ {
328
+ "epoch": 0.9345794392523364,
329
+ "eval_logits/chosen": -2.2007782459259033,
330
+ "eval_logits/rejected": -2.177567720413208,
331
+ "eval_logps/chosen": -247.5850067138672,
332
+ "eval_logps/rejected": -224.4142608642578,
333
+ "eval_loss": 0.5101521015167236,
334
+ "eval_rewards/accuracies": 0.7864583134651184,
335
+ "eval_rewards/chosen": 0.8652558326721191,
336
+ "eval_rewards/margins": 1.7787574529647827,
337
+ "eval_rewards/rejected": -0.9135015606880188,
338
+ "eval_runtime": 201.8599,
339
+ "eval_samples_per_second": 15.06,
340
+ "eval_steps_per_second": 0.238,
341
+ "step": 100
342
+ },
343
+ {
344
+ "epoch": 0.9813084112149533,
345
+ "grad_norm": 28.6005139133492,
346
+ "learning_rate": 7.869031337345827e-07,
347
+ "logits/chosen": -2.1810142993927,
348
+ "logits/rejected": -2.1466403007507324,
349
+ "logps/chosen": -273.2081298828125,
350
+ "logps/rejected": -224.4601593017578,
351
+ "loss": 0.474,
352
+ "rewards/accuracies": 0.8125,
353
+ "rewards/chosen": 0.8940876126289368,
354
+ "rewards/margins": 2.1252331733703613,
355
+ "rewards/rejected": -1.2311456203460693,
356
+ "step": 105
357
+ },
358
+ {
359
+ "epoch": 1.02803738317757,
360
+ "grad_norm": 17.790000678929353,
361
+ "learning_rate": 7.658632842402432e-07,
362
+ "logits/chosen": -2.1617965698242188,
363
+ "logits/rejected": -2.118025064468384,
364
+ "logps/chosen": -248.0548858642578,
365
+ "logps/rejected": -209.18603515625,
366
+ "loss": 0.3432,
367
+ "rewards/accuracies": 0.8687499761581421,
368
+ "rewards/chosen": 0.46495524048805237,
369
+ "rewards/margins": 2.547100067138672,
370
+ "rewards/rejected": -2.0821449756622314,
371
+ "step": 110
372
+ },
373
+ {
374
+ "epoch": 1.074766355140187,
375
+ "grad_norm": 18.641161706325903,
376
+ "learning_rate": 7.441453483775353e-07,
377
+ "logits/chosen": -2.2370879650115967,
378
+ "logits/rejected": -2.1639482975006104,
379
+ "logps/chosen": -252.3055419921875,
380
+ "logps/rejected": -231.727294921875,
381
+ "loss": 0.2131,
382
+ "rewards/accuracies": 0.8999999761581421,
383
+ "rewards/chosen": 1.1295344829559326,
384
+ "rewards/margins": 2.7920079231262207,
385
+ "rewards/rejected": -1.6624739170074463,
386
+ "step": 115
387
+ },
388
+ {
389
+ "epoch": 1.1214953271028036,
390
+ "grad_norm": 22.27642795647513,
391
+ "learning_rate": 7.218047179103112e-07,
392
+ "logits/chosen": -2.2182936668395996,
393
+ "logits/rejected": -2.2140285968780518,
394
+ "logps/chosen": -248.0435028076172,
395
+ "logps/rejected": -233.4251251220703,
396
+ "loss": 0.2493,
397
+ "rewards/accuracies": 0.90625,
398
+ "rewards/chosen": 1.1873703002929688,
399
+ "rewards/margins": 3.075556755065918,
400
+ "rewards/rejected": -1.8881866931915283,
401
+ "step": 120
402
+ },
403
+ {
404
+ "epoch": 1.1682242990654206,
405
+ "grad_norm": 24.129663206875104,
406
+ "learning_rate": 6.988983727898413e-07,
407
+ "logits/chosen": -2.2516915798187256,
408
+ "logits/rejected": -2.2213852405548096,
409
+ "logps/chosen": -239.94125366210938,
410
+ "logps/rejected": -213.9071807861328,
411
+ "loss": 0.2434,
412
+ "rewards/accuracies": 0.875,
413
+ "rewards/chosen": 1.149621844291687,
414
+ "rewards/margins": 2.956752300262451,
415
+ "rewards/rejected": -1.8071304559707642,
416
+ "step": 125
417
+ },
418
+ {
419
+ "epoch": 1.2149532710280373,
420
+ "grad_norm": 19.098258762584823,
421
+ "learning_rate": 6.754847358270066e-07,
422
+ "logits/chosen": -2.268832206726074,
423
+ "logits/rejected": -2.2357370853424072,
424
+ "logps/chosen": -252.8026885986328,
425
+ "logps/rejected": -218.2414093017578,
426
+ "loss": 0.2656,
427
+ "rewards/accuracies": 0.9125000238418579,
428
+ "rewards/chosen": 1.428450345993042,
429
+ "rewards/margins": 3.458483934402466,
430
+ "rewards/rejected": -2.030033826828003,
431
+ "step": 130
432
+ },
433
+ {
434
+ "epoch": 1.2616822429906542,
435
+ "grad_norm": 16.69096458544893,
436
+ "learning_rate": 6.516235236844661e-07,
437
+ "logits/chosen": -2.2503199577331543,
438
+ "logits/rejected": -2.223175048828125,
439
+ "logps/chosen": -251.1974639892578,
440
+ "logps/rejected": -224.8771209716797,
441
+ "loss": 0.229,
442
+ "rewards/accuracies": 0.9125000238418579,
443
+ "rewards/chosen": 1.4385788440704346,
444
+ "rewards/margins": 3.5852439403533936,
445
+ "rewards/rejected": -2.146665573120117,
446
+ "step": 135
447
+ },
448
+ {
449
+ "epoch": 1.308411214953271,
450
+ "grad_norm": 24.370271103680654,
451
+ "learning_rate": 6.273755945688457e-07,
452
+ "logits/chosen": -2.30786395072937,
453
+ "logits/rejected": -2.2820160388946533,
454
+ "logps/chosen": -247.8080596923828,
455
+ "logps/rejected": -242.51596069335938,
456
+ "loss": 0.2745,
457
+ "rewards/accuracies": 0.893750011920929,
458
+ "rewards/chosen": 1.6472270488739014,
459
+ "rewards/margins": 3.4453282356262207,
460
+ "rewards/rejected": -1.7981010675430298,
461
+ "step": 140
462
+ },
463
+ {
464
+ "epoch": 1.355140186915888,
465
+ "grad_norm": 21.195332919733744,
466
+ "learning_rate": 6.02802793011411e-07,
467
+ "logits/chosen": -2.304081916809082,
468
+ "logits/rejected": -2.2989423274993896,
469
+ "logps/chosen": -221.5400848388672,
470
+ "logps/rejected": -236.2010955810547,
471
+ "loss": 0.2682,
472
+ "rewards/accuracies": 0.9125000238418579,
473
+ "rewards/chosen": 1.4084885120391846,
474
+ "rewards/margins": 3.7067673206329346,
475
+ "rewards/rejected": -2.29827880859375,
476
+ "step": 145
477
+ },
478
+ {
479
+ "epoch": 1.4018691588785046,
480
+ "grad_norm": 22.224652369004666,
481
+ "learning_rate": 5.779677921331093e-07,
482
+ "logits/chosen": -2.2803831100463867,
483
+ "logits/rejected": -2.2711875438690186,
484
+ "logps/chosen": -247.6720733642578,
485
+ "logps/rejected": -217.12509155273438,
486
+ "loss": 0.2873,
487
+ "rewards/accuracies": 0.893750011920929,
488
+ "rewards/chosen": 1.7340141534805298,
489
+ "rewards/margins": 3.378777265548706,
490
+ "rewards/rejected": -1.6447633504867554,
491
+ "step": 150
492
+ },
493
+ {
494
+ "epoch": 1.4018691588785046,
495
+ "eval_logits/chosen": -2.275045394897461,
496
+ "eval_logits/rejected": -2.263206720352173,
497
+ "eval_logps/chosen": -245.67860412597656,
498
+ "eval_logps/rejected": -227.55787658691406,
499
+ "eval_loss": 0.5675327181816101,
500
+ "eval_rewards/accuracies": 0.7890625,
501
+ "eval_rewards/chosen": 1.055895209312439,
502
+ "eval_rewards/margins": 2.2837564945220947,
503
+ "eval_rewards/rejected": -1.2278612852096558,
504
+ "eval_runtime": 202.0364,
505
+ "eval_samples_per_second": 15.047,
506
+ "eval_steps_per_second": 0.238,
507
+ "step": 150
508
+ },
509
+ {
510
+ "epoch": 1.4485981308411215,
511
+ "grad_norm": 24.024418676682174,
512
+ "learning_rate": 5.529339337962897e-07,
513
+ "logits/chosen": -2.26741361618042,
514
+ "logits/rejected": -2.2619667053222656,
515
+ "logps/chosen": -228.74258422851562,
516
+ "logps/rejected": -199.869873046875,
517
+ "loss": 0.3185,
518
+ "rewards/accuracies": 0.9312499761581421,
519
+ "rewards/chosen": 1.8829383850097656,
520
+ "rewards/margins": 3.547306776046753,
521
+ "rewards/rejected": -1.6643686294555664,
522
+ "step": 155
523
+ },
524
+ {
525
+ "epoch": 1.4953271028037383,
526
+ "grad_norm": 22.765211926437665,
527
+ "learning_rate": 5.277650670507915e-07,
528
+ "logits/chosen": -2.2662367820739746,
529
+ "logits/rejected": -2.241522789001465,
530
+ "logps/chosen": -241.50253295898438,
531
+ "logps/rejected": -211.10791015625,
532
+ "loss": 0.2582,
533
+ "rewards/accuracies": 0.9375,
534
+ "rewards/chosen": 1.677080512046814,
535
+ "rewards/margins": 3.3089568614959717,
536
+ "rewards/rejected": -1.6318763494491577,
537
+ "step": 160
538
+ },
539
+ {
540
+ "epoch": 1.542056074766355,
541
+ "grad_norm": 20.952241807232628,
542
+ "learning_rate": 5.025253852864471e-07,
543
+ "logits/chosen": -2.2016148567199707,
544
+ "logits/rejected": -2.2075283527374268,
545
+ "logps/chosen": -247.7741241455078,
546
+ "logps/rejected": -224.11892700195312,
547
+ "loss": 0.2816,
548
+ "rewards/accuracies": 0.8999999761581421,
549
+ "rewards/chosen": 1.5324174165725708,
550
+ "rewards/margins": 3.230978488922119,
551
+ "rewards/rejected": -1.6985607147216797,
552
+ "step": 165
553
+ },
554
+ {
555
+ "epoch": 1.588785046728972,
556
+ "grad_norm": 23.145444455236966,
557
+ "learning_rate": 4.77279262507344e-07,
558
+ "logits/chosen": -2.2021024227142334,
559
+ "logits/rejected": -2.1827890872955322,
560
+ "logps/chosen": -243.9816131591797,
561
+ "logps/rejected": -249.20703125,
562
+ "loss": 0.2787,
563
+ "rewards/accuracies": 0.918749988079071,
564
+ "rewards/chosen": 1.669757604598999,
565
+ "rewards/margins": 3.6465446949005127,
566
+ "rewards/rejected": -1.9767868518829346,
567
+ "step": 170
568
+ },
569
+ {
570
+ "epoch": 1.6355140186915889,
571
+ "grad_norm": 26.563830659774606,
572
+ "learning_rate": 4.5209108914542714e-07,
573
+ "logits/chosen": -2.1774230003356934,
574
+ "logits/rejected": -2.1725821495056152,
575
+ "logps/chosen": -224.1102294921875,
576
+ "logps/rejected": -234.7071075439453,
577
+ "loss": 0.3123,
578
+ "rewards/accuracies": 0.8812500238418579,
579
+ "rewards/chosen": 1.2634233236312866,
580
+ "rewards/margins": 3.5007872581481934,
581
+ "rewards/rejected": -2.2373640537261963,
582
+ "step": 175
583
+ },
584
+ {
585
+ "epoch": 1.6822429906542056,
586
+ "grad_norm": 25.254447136991615,
587
+ "learning_rate": 4.2702510783220475e-07,
588
+ "logits/chosen": -2.168032169342041,
589
+ "logits/rejected": -2.1451544761657715,
590
+ "logps/chosen": -228.2743682861328,
591
+ "logps/rejected": -211.07705688476562,
592
+ "loss": 0.2948,
593
+ "rewards/accuracies": 0.918749988079071,
594
+ "rewards/chosen": 1.1032750606536865,
595
+ "rewards/margins": 3.2025279998779297,
596
+ "rewards/rejected": -2.0992531776428223,
597
+ "step": 180
598
+ },
599
+ {
600
+ "epoch": 1.7289719626168223,
601
+ "grad_norm": 20.89817797522474,
602
+ "learning_rate": 4.0214524954741586e-07,
603
+ "logits/chosen": -2.1714885234832764,
604
+ "logits/rejected": -2.148820400238037,
605
+ "logps/chosen": -250.5221405029297,
606
+ "logps/rejected": -223.18399047851562,
607
+ "loss": 0.2802,
608
+ "rewards/accuracies": 0.90625,
609
+ "rewards/chosen": 1.3431367874145508,
610
+ "rewards/margins": 3.7224392890930176,
611
+ "rewards/rejected": -2.3793022632598877,
612
+ "step": 185
613
+ },
614
+ {
615
+ "epoch": 1.7757009345794392,
616
+ "grad_norm": 17.107545008827852,
617
+ "learning_rate": 3.7751497056257305e-07,
618
+ "logits/chosen": -2.1603405475616455,
619
+ "logits/rejected": -2.145948648452759,
620
+ "logps/chosen": -234.90872192382812,
621
+ "logps/rejected": -240.06298828125,
622
+ "loss": 0.269,
623
+ "rewards/accuracies": 0.925000011920929,
624
+ "rewards/chosen": 1.084341049194336,
625
+ "rewards/margins": 3.5826897621154785,
626
+ "rewards/rejected": -2.4983482360839844,
627
+ "step": 190
628
+ },
629
+ {
630
+ "epoch": 1.8224299065420562,
631
+ "grad_norm": 27.137427341683352,
632
+ "learning_rate": 3.531970905952478e-07,
633
+ "logits/chosen": -2.1491293907165527,
634
+ "logits/rejected": -2.1209685802459717,
635
+ "logps/chosen": -221.9265899658203,
636
+ "logps/rejected": -215.990478515625,
637
+ "loss": 0.2937,
638
+ "rewards/accuracies": 0.875,
639
+ "rewards/chosen": 0.9956735372543335,
640
+ "rewards/margins": 3.271005630493164,
641
+ "rewards/rejected": -2.275331974029541,
642
+ "step": 195
643
+ },
644
+ {
645
+ "epoch": 1.8691588785046729,
646
+ "grad_norm": 23.508295318902285,
647
+ "learning_rate": 3.2925363258689553e-07,
648
+ "logits/chosen": -2.161498785018921,
649
+ "logits/rejected": -2.1209306716918945,
650
+ "logps/chosen": -248.2667999267578,
651
+ "logps/rejected": -227.0295867919922,
652
+ "loss": 0.2853,
653
+ "rewards/accuracies": 0.862500011920929,
654
+ "rewards/chosen": 1.2955918312072754,
655
+ "rewards/margins": 3.347618818283081,
656
+ "rewards/rejected": -2.0520269870758057,
657
+ "step": 200
658
+ },
659
+ {
660
+ "epoch": 1.8691588785046729,
661
+ "eval_logits/chosen": -2.147773504257202,
662
+ "eval_logits/rejected": -2.1250855922698975,
663
+ "eval_logps/chosen": -249.0491485595703,
664
+ "eval_logps/rejected": -232.39312744140625,
665
+ "eval_loss": 0.5163093209266663,
666
+ "eval_rewards/accuracies": 0.8203125,
667
+ "eval_rewards/chosen": 0.718841552734375,
668
+ "eval_rewards/margins": 2.4302282333374023,
669
+ "eval_rewards/rejected": -1.711386799812317,
670
+ "eval_runtime": 201.455,
671
+ "eval_samples_per_second": 15.09,
672
+ "eval_steps_per_second": 0.238,
673
+ "step": 200
674
+ }
675
+ ],
676
+ "logging_steps": 5,
677
+ "max_steps": 321,
678
+ "num_input_tokens_seen": 0,
679
+ "num_train_epochs": 3,
680
+ "save_steps": 100,
681
+ "stateful_callbacks": {
682
+ "TrainerControl": {
683
+ "args": {
684
+ "should_epoch_stop": false,
685
+ "should_evaluate": false,
686
+ "should_log": false,
687
+ "should_save": true,
688
+ "should_training_stop": false
689
+ },
690
+ "attributes": {}
691
+ }
692
+ },
693
+ "total_flos": 2358113407598592.0,
694
+ "train_batch_size": 8,
695
+ "trial_name": null,
696
+ "trial_params": null
697
+ }
checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18434785ed5c1cf7b24cfe9bc32bfda4c423eb14a3664f74540e373b8660d0e
3
+ size 7096
checkpoint-300/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image>": 32000,
3
+ "<pad>": 32001
4
+ }
checkpoint-300/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/align-anything/hantao/models/llava-v1.6-mistral-7b-hf",
3
+ "architectures": [
4
+ "LlavaNextForConditionalGeneration"
5
+ ],
6
+ "hidden_size": 4096,
7
+ "ignore_index": -100,
8
+ "image_grid_pinpoints": [
9
+ [
10
+ 336,
11
+ 672
12
+ ],
13
+ [
14
+ 672,
15
+ 336
16
+ ],
17
+ [
18
+ 672,
19
+ 672
20
+ ],
21
+ [
22
+ 1008,
23
+ 336
24
+ ],
25
+ [
26
+ 336,
27
+ 1008
28
+ ]
29
+ ],
30
+ "image_seq_length": 576,
31
+ "image_token_index": 32000,
32
+ "model_type": "llava_next",
33
+ "projector_hidden_act": "gelu",
34
+ "text_config": {
35
+ "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
36
+ "architectures": [
37
+ "MistralForCausalLM"
38
+ ],
39
+ "intermediate_size": 14336,
40
+ "max_position_embeddings": 32768,
41
+ "model_type": "mistral",
42
+ "num_key_value_heads": 8,
43
+ "rms_norm_eps": 1e-05,
44
+ "rope_theta": 1000000.0,
45
+ "sliding_window": null,
46
+ "torch_dtype": "bfloat16",
47
+ "vocab_size": 32064
48
+ },
49
+ "tie_word_embeddings": false,
50
+ "torch_dtype": "bfloat16",
51
+ "transformers_version": "4.45.2",
52
+ "use_cache": false,
53
+ "use_image_newline_parameter": true,
54
+ "vision_config": {
55
+ "hidden_size": 1024,
56
+ "image_size": 336,
57
+ "intermediate_size": 4096,
58
+ "model_type": "clip_vision_model",
59
+ "num_attention_heads": 16,
60
+ "num_hidden_layers": 24,
61
+ "patch_size": 14,
62
+ "projection_dim": 768,
63
+ "vocab_size": 32000
64
+ },
65
+ "vision_feature_layer": -2,
66
+ "vision_feature_select_strategy": "default",
67
+ "vocab_size": 32064
68
+ }
checkpoint-300/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.45.2"
6
+ }
checkpoint-300/model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ae67fb588ebd759072762c8b27dc3ddce6342e77823294e4d35393f41c4a128
3
+ size 4921618624
checkpoint-300/model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc724f3a1da9e46687ab7e93bd1901dba41fc3e5c6facc60327499c38ebd32e
3
+ size 4915917672
checkpoint-300/model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76eb2229dd49d86a95bbd1b7471cafcfe8f46c5fcf474ddcfdd63e4826d4485e
3
+ size 4915917680
checkpoint-300/model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61540fc27fe79f3019227e34418df2f6cae77c33daf9311b4650de1e9851d176
3
+ size 380134008
checkpoint-300/model.safetensors.index.json ADDED
@@ -0,0 +1,694 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 15133495296
4
+ },
5
+ "weight_map": {
6
+ "image_newline": "model-00001-of-00004.safetensors",
7
+ "language_model.lm_head.weight": "model-00004-of-00004.safetensors",
8
+ "language_model.model.embed_tokens.weight": "model-00001-of-00004.safetensors",
9
+ "language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
10
+ "language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
11
+ "language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
12
+ "language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
13
+ "language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
14
+ "language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
15
+ "language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
16
+ "language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
17
+ "language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
18
+ "language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
19
+ "language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
20
+ "language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
21
+ "language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
22
+ "language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
23
+ "language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
24
+ "language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
25
+ "language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
26
+ "language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
27
+ "language_model.model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
28
+ "language_model.model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
29
+ "language_model.model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
30
+ "language_model.model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
31
+ "language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
32
+ "language_model.model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
33
+ "language_model.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
34
+ "language_model.model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
35
+ "language_model.model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
36
+ "language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
37
+ "language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
38
+ "language_model.model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
39
+ "language_model.model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
40
+ "language_model.model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
41
+ "language_model.model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
42
+ "language_model.model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
43
+ "language_model.model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
44
+ "language_model.model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
45
+ "language_model.model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
46
+ "language_model.model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
47
+ "language_model.model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
48
+ "language_model.model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
49
+ "language_model.model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
50
+ "language_model.model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
51
+ "language_model.model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
52
+ "language_model.model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
53
+ "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
54
+ "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
55
+ "language_model.model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
56
+ "language_model.model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
57
+ "language_model.model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
58
+ "language_model.model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
59
+ "language_model.model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
60
+ "language_model.model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
61
+ "language_model.model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
62
+ "language_model.model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
63
+ "language_model.model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
64
+ "language_model.model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
65
+ "language_model.model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
66
+ "language_model.model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
67
+ "language_model.model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
68
+ "language_model.model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
69
+ "language_model.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
70
+ "language_model.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
71
+ "language_model.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
72
+ "language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
73
+ "language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
74
+ "language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
75
+ "language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
76
+ "language_model.model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
77
+ "language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
78
+ "language_model.model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
79
+ "language_model.model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
80
+ "language_model.model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
81
+ "language_model.model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
82
+ "language_model.model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
83
+ "language_model.model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
84
+ "language_model.model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
85
+ "language_model.model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
86
+ "language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
87
+ "language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
88
+ "language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
89
+ "language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
90
+ "language_model.model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
91
+ "language_model.model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
92
+ "language_model.model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
93
+ "language_model.model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
94
+ "language_model.model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
95
+ "language_model.model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
96
+ "language_model.model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
97
+ "language_model.model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
98
+ "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
99
+ "language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
100
+ "language_model.model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
101
+ "language_model.model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
102
+ "language_model.model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
103
+ "language_model.model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
104
+ "language_model.model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
105
+ "language_model.model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
106
+ "language_model.model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
107
+ "language_model.model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
108
+ "language_model.model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
109
+ "language_model.model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
110
+ "language_model.model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
111
+ "language_model.model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
112
+ "language_model.model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
113
+ "language_model.model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
114
+ "language_model.model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
115
+ "language_model.model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
116
+ "language_model.model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
117
+ "language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
118
+ "language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
119
+ "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
120
+ "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
121
+ "language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
122
+ "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
123
+ "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
124
+ "language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
125
+ "language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
126
+ "language_model.model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
127
+ "language_model.model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
128
+ "language_model.model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
129
+ "language_model.model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
130
+ "language_model.model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
131
+ "language_model.model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
132
+ "language_model.model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
133
+ "language_model.model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
134
+ "language_model.model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
135
+ "language_model.model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
136
+ "language_model.model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
137
+ "language_model.model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
138
+ "language_model.model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
139
+ "language_model.model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
140
+ "language_model.model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
141
+ "language_model.model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
142
+ "language_model.model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
143
+ "language_model.model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
144
+ "language_model.model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
145
+ "language_model.model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
146
+ "language_model.model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
147
+ "language_model.model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
148
+ "language_model.model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
149
+ "language_model.model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
150
+ "language_model.model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
151
+ "language_model.model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
152
+ "language_model.model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
153
+ "language_model.model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
154
+ "language_model.model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
155
+ "language_model.model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
156
+ "language_model.model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
157
+ "language_model.model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
158
+ "language_model.model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
159
+ "language_model.model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
160
+ "language_model.model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
161
+ "language_model.model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
162
+ "language_model.model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
163
+ "language_model.model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
164
+ "language_model.model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
165
+ "language_model.model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
166
+ "language_model.model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
167
+ "language_model.model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
168
+ "language_model.model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
169
+ "language_model.model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
170
+ "language_model.model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
171
+ "language_model.model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
172
+ "language_model.model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
173
+ "language_model.model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
174
+ "language_model.model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
175
+ "language_model.model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
176
+ "language_model.model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
177
+ "language_model.model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
178
+ "language_model.model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
179
+ "language_model.model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
180
+ "language_model.model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
181
+ "language_model.model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
182
+ "language_model.model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
183
+ "language_model.model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
184
+ "language_model.model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
185
+ "language_model.model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
186
+ "language_model.model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
187
+ "language_model.model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
188
+ "language_model.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
189
+ "language_model.model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
190
+ "language_model.model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
191
+ "language_model.model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
192
+ "language_model.model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
193
+ "language_model.model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
194
+ "language_model.model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
195
+ "language_model.model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
196
+ "language_model.model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
197
+ "language_model.model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
198
+ "language_model.model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
199
+ "language_model.model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
200
+ "language_model.model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
201
+ "language_model.model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
202
+ "language_model.model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
203
+ "language_model.model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
204
+ "language_model.model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
205
+ "language_model.model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
206
+ "language_model.model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
207
+ "language_model.model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
208
+ "language_model.model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
209
+ "language_model.model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
210
+ "language_model.model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
211
+ "language_model.model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
212
+ "language_model.model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
213
+ "language_model.model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
214
+ "language_model.model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
215
+ "language_model.model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
216
+ "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
217
+ "language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
218
+ "language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
219
+ "language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
220
+ "language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
221
+ "language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
222
+ "language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
223
+ "language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
224
+ "language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
225
+ "language_model.model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
226
+ "language_model.model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
227
+ "language_model.model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
228
+ "language_model.model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
229
+ "language_model.model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
230
+ "language_model.model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
231
+ "language_model.model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
232
+ "language_model.model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
233
+ "language_model.model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
234
+ "language_model.model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
235
+ "language_model.model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
236
+ "language_model.model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
237
+ "language_model.model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
238
+ "language_model.model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
239
+ "language_model.model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
240
+ "language_model.model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
241
+ "language_model.model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
242
+ "language_model.model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
243
+ "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
244
+ "language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
245
+ "language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
246
+ "language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
247
+ "language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
248
+ "language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
249
+ "language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
250
+ "language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
251
+ "language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
252
+ "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
253
+ "language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
254
+ "language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
255
+ "language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
256
+ "language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
257
+ "language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
258
+ "language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
259
+ "language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
260
+ "language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
261
+ "language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
262
+ "language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
263
+ "language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
264
+ "language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
265
+ "language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
266
+ "language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
267
+ "language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
268
+ "language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
269
+ "language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
270
+ "language_model.model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
271
+ "language_model.model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
272
+ "language_model.model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
273
+ "language_model.model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
274
+ "language_model.model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
275
+ "language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
276
+ "language_model.model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
277
+ "language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
278
+ "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
279
+ "language_model.model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
280
+ "language_model.model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
281
+ "language_model.model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
282
+ "language_model.model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
283
+ "language_model.model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
284
+ "language_model.model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
285
+ "language_model.model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
286
+ "language_model.model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
287
+ "language_model.model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
288
+ "language_model.model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
289
+ "language_model.model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
290
+ "language_model.model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
291
+ "language_model.model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
292
+ "language_model.model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
293
+ "language_model.model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
294
+ "language_model.model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
295
+ "language_model.model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
296
+ "language_model.model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
297
+ "language_model.model.norm.weight": "model-00004-of-00004.safetensors",
298
+ "multi_modal_projector.linear_1.bias": "model-00001-of-00004.safetensors",
299
+ "multi_modal_projector.linear_1.weight": "model-00001-of-00004.safetensors",
300
+ "multi_modal_projector.linear_2.bias": "model-00001-of-00004.safetensors",
301
+ "multi_modal_projector.linear_2.weight": "model-00001-of-00004.safetensors",
302
+ "vision_tower.vision_model.embeddings.class_embedding": "model-00001-of-00004.safetensors",
303
+ "vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00001-of-00004.safetensors",
304
+ "vision_tower.vision_model.embeddings.position_embedding.weight": "model-00001-of-00004.safetensors",
305
+ "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00001-of-00004.safetensors",
306
+ "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00001-of-00004.safetensors",
307
+ "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00001-of-00004.safetensors",
308
+ "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00001-of-00004.safetensors",
309
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00001-of-00004.safetensors",
310
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00001-of-00004.safetensors",
311
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00001-of-00004.safetensors",
312
+ "vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00001-of-00004.safetensors",
313
+ "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
314
+ "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
315
+ "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
316
+ "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
317
+ "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
318
+ "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
319
+ "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
320
+ "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
321
+ "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00001-of-00004.safetensors",
322
+ "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00001-of-00004.safetensors",
323
+ "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00001-of-00004.safetensors",
324
+ "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00001-of-00004.safetensors",
325
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00001-of-00004.safetensors",
326
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00001-of-00004.safetensors",
327
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00001-of-00004.safetensors",
328
+ "vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00001-of-00004.safetensors",
329
+ "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
330
+ "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
331
+ "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
332
+ "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
333
+ "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
334
+ "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
335
+ "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
336
+ "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
337
+ "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00001-of-00004.safetensors",
338
+ "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00001-of-00004.safetensors",
339
+ "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00001-of-00004.safetensors",
340
+ "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00001-of-00004.safetensors",
341
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00001-of-00004.safetensors",
342
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00001-of-00004.safetensors",
343
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00001-of-00004.safetensors",
344
+ "vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00001-of-00004.safetensors",
345
+ "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
346
+ "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
347
+ "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
348
+ "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
349
+ "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
350
+ "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
351
+ "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
352
+ "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
353
+ "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00001-of-00004.safetensors",
354
+ "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00001-of-00004.safetensors",
355
+ "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00001-of-00004.safetensors",
356
+ "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00001-of-00004.safetensors",
357
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00001-of-00004.safetensors",
358
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00001-of-00004.safetensors",
359
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00001-of-00004.safetensors",
360
+ "vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00001-of-00004.safetensors",
361
+ "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
362
+ "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
363
+ "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
364
+ "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
365
+ "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
366
+ "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
367
+ "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
368
+ "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
369
+ "vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00001-of-00004.safetensors",
370
+ "vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00001-of-00004.safetensors",
371
+ "vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00001-of-00004.safetensors",
372
+ "vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00001-of-00004.safetensors",
373
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00001-of-00004.safetensors",
374
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00001-of-00004.safetensors",
375
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00001-of-00004.safetensors",
376
+ "vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00001-of-00004.safetensors",
377
+ "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
378
+ "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
379
+ "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
380
+ "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
381
+ "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
382
+ "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
383
+ "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
384
+ "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
385
+ "vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00001-of-00004.safetensors",
386
+ "vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00001-of-00004.safetensors",
387
+ "vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00001-of-00004.safetensors",
388
+ "vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00001-of-00004.safetensors",
389
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00001-of-00004.safetensors",
390
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00001-of-00004.safetensors",
391
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00001-of-00004.safetensors",
392
+ "vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00001-of-00004.safetensors",
393
+ "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
394
+ "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
395
+ "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
396
+ "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
397
+ "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
398
+ "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
399
+ "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
400
+ "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
401
+ "vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00001-of-00004.safetensors",
402
+ "vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00001-of-00004.safetensors",
403
+ "vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00001-of-00004.safetensors",
404
+ "vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00001-of-00004.safetensors",
405
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00001-of-00004.safetensors",
406
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00001-of-00004.safetensors",
407
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00001-of-00004.safetensors",
408
+ "vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00001-of-00004.safetensors",
409
+ "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
410
+ "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
411
+ "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
412
+ "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
413
+ "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
414
+ "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
415
+ "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
416
+ "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
417
+ "vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00001-of-00004.safetensors",
418
+ "vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00001-of-00004.safetensors",
419
+ "vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00001-of-00004.safetensors",
420
+ "vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00001-of-00004.safetensors",
421
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00001-of-00004.safetensors",
422
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00001-of-00004.safetensors",
423
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00001-of-00004.safetensors",
424
+ "vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00001-of-00004.safetensors",
425
+ "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
426
+ "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
427
+ "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
428
+ "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
429
+ "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
430
+ "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
431
+ "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
432
+ "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
433
+ "vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00001-of-00004.safetensors",
434
+ "vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00001-of-00004.safetensors",
435
+ "vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00001-of-00004.safetensors",
436
+ "vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00001-of-00004.safetensors",
437
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00001-of-00004.safetensors",
438
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00001-of-00004.safetensors",
439
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00001-of-00004.safetensors",
440
+ "vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00001-of-00004.safetensors",
441
+ "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
442
+ "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
443
+ "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
444
+ "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
445
+ "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
446
+ "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
447
+ "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
448
+ "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
449
+ "vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00001-of-00004.safetensors",
450
+ "vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00001-of-00004.safetensors",
451
+ "vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00001-of-00004.safetensors",
452
+ "vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00001-of-00004.safetensors",
453
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00001-of-00004.safetensors",
454
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00001-of-00004.safetensors",
455
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00001-of-00004.safetensors",
456
+ "vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00001-of-00004.safetensors",
457
+ "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
458
+ "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
459
+ "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
460
+ "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
461
+ "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
462
+ "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
463
+ "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
464
+ "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
465
+ "vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00001-of-00004.safetensors",
466
+ "vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00001-of-00004.safetensors",
467
+ "vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00001-of-00004.safetensors",
468
+ "vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00001-of-00004.safetensors",
469
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00001-of-00004.safetensors",
470
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00001-of-00004.safetensors",
471
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00001-of-00004.safetensors",
472
+ "vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00001-of-00004.safetensors",
473
+ "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
474
+ "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
475
+ "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
476
+ "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
477
+ "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
478
+ "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
479
+ "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
480
+ "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
481
+ "vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00001-of-00004.safetensors",
482
+ "vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00001-of-00004.safetensors",
483
+ "vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00001-of-00004.safetensors",
484
+ "vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00001-of-00004.safetensors",
485
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00001-of-00004.safetensors",
486
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00001-of-00004.safetensors",
487
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00001-of-00004.safetensors",
488
+ "vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00001-of-00004.safetensors",
489
+ "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
490
+ "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
491
+ "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
492
+ "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
493
+ "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
494
+ "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
495
+ "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
496
+ "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
497
+ "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00001-of-00004.safetensors",
498
+ "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00001-of-00004.safetensors",
499
+ "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00001-of-00004.safetensors",
500
+ "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00001-of-00004.safetensors",
501
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00001-of-00004.safetensors",
502
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00001-of-00004.safetensors",
503
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00001-of-00004.safetensors",
504
+ "vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00001-of-00004.safetensors",
505
+ "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
506
+ "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
507
+ "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
508
+ "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
509
+ "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
510
+ "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
511
+ "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
512
+ "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
513
+ "vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00001-of-00004.safetensors",
514
+ "vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00001-of-00004.safetensors",
515
+ "vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00001-of-00004.safetensors",
516
+ "vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00001-of-00004.safetensors",
517
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00001-of-00004.safetensors",
518
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00001-of-00004.safetensors",
519
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00001-of-00004.safetensors",
520
+ "vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00001-of-00004.safetensors",
521
+ "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
522
+ "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
523
+ "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
524
+ "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
525
+ "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
526
+ "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
527
+ "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
528
+ "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
529
+ "vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00001-of-00004.safetensors",
530
+ "vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00001-of-00004.safetensors",
531
+ "vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00001-of-00004.safetensors",
532
+ "vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00001-of-00004.safetensors",
533
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00001-of-00004.safetensors",
534
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00001-of-00004.safetensors",
535
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00001-of-00004.safetensors",
536
+ "vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00001-of-00004.safetensors",
537
+ "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
538
+ "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
539
+ "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
540
+ "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
541
+ "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
542
+ "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
543
+ "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
544
+ "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
545
+ "vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00001-of-00004.safetensors",
546
+ "vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00001-of-00004.safetensors",
547
+ "vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00001-of-00004.safetensors",
548
+ "vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00001-of-00004.safetensors",
549
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00001-of-00004.safetensors",
550
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00001-of-00004.safetensors",
551
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00001-of-00004.safetensors",
552
+ "vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00001-of-00004.safetensors",
553
+ "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
554
+ "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
555
+ "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
556
+ "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
557
+ "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
558
+ "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
559
+ "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
560
+ "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
561
+ "vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00001-of-00004.safetensors",
562
+ "vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00001-of-00004.safetensors",
563
+ "vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00001-of-00004.safetensors",
564
+ "vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00001-of-00004.safetensors",
565
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00001-of-00004.safetensors",
566
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00001-of-00004.safetensors",
567
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00001-of-00004.safetensors",
568
+ "vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00001-of-00004.safetensors",
569
+ "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
570
+ "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
571
+ "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
572
+ "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
573
+ "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
574
+ "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
575
+ "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
576
+ "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
577
+ "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00001-of-00004.safetensors",
578
+ "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00001-of-00004.safetensors",
579
+ "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00001-of-00004.safetensors",
580
+ "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00001-of-00004.safetensors",
581
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00001-of-00004.safetensors",
582
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00001-of-00004.safetensors",
583
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00001-of-00004.safetensors",
584
+ "vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00001-of-00004.safetensors",
585
+ "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
586
+ "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
587
+ "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
588
+ "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
589
+ "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
590
+ "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
591
+ "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
592
+ "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
593
+ "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00001-of-00004.safetensors",
594
+ "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00001-of-00004.safetensors",
595
+ "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00001-of-00004.safetensors",
596
+ "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00001-of-00004.safetensors",
597
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00001-of-00004.safetensors",
598
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00001-of-00004.safetensors",
599
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00001-of-00004.safetensors",
600
+ "vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00001-of-00004.safetensors",
601
+ "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
602
+ "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
603
+ "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
604
+ "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
605
+ "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
606
+ "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
607
+ "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
608
+ "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
609
+ "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00001-of-00004.safetensors",
610
+ "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00001-of-00004.safetensors",
611
+ "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00001-of-00004.safetensors",
612
+ "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00001-of-00004.safetensors",
613
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00001-of-00004.safetensors",
614
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00001-of-00004.safetensors",
615
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00001-of-00004.safetensors",
616
+ "vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00001-of-00004.safetensors",
617
+ "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
618
+ "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
619
+ "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
620
+ "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
621
+ "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
622
+ "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
623
+ "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
624
+ "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
625
+ "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00001-of-00004.safetensors",
626
+ "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00001-of-00004.safetensors",
627
+ "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00001-of-00004.safetensors",
628
+ "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00001-of-00004.safetensors",
629
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00001-of-00004.safetensors",
630
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00001-of-00004.safetensors",
631
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00001-of-00004.safetensors",
632
+ "vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00001-of-00004.safetensors",
633
+ "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
634
+ "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
635
+ "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
636
+ "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
637
+ "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
638
+ "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
639
+ "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
640
+ "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
641
+ "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00001-of-00004.safetensors",
642
+ "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00001-of-00004.safetensors",
643
+ "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00001-of-00004.safetensors",
644
+ "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00001-of-00004.safetensors",
645
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00001-of-00004.safetensors",
646
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00001-of-00004.safetensors",
647
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00001-of-00004.safetensors",
648
+ "vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00001-of-00004.safetensors",
649
+ "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
650
+ "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
651
+ "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
652
+ "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
653
+ "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
654
+ "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
655
+ "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
656
+ "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
657
+ "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00001-of-00004.safetensors",
658
+ "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00001-of-00004.safetensors",
659
+ "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00001-of-00004.safetensors",
660
+ "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00001-of-00004.safetensors",
661
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00001-of-00004.safetensors",
662
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00001-of-00004.safetensors",
663
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00001-of-00004.safetensors",
664
+ "vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00001-of-00004.safetensors",
665
+ "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
666
+ "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
667
+ "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
668
+ "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
669
+ "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
670
+ "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
671
+ "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
672
+ "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
673
+ "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00001-of-00004.safetensors",
674
+ "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00001-of-00004.safetensors",
675
+ "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00001-of-00004.safetensors",
676
+ "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00001-of-00004.safetensors",
677
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00001-of-00004.safetensors",
678
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00001-of-00004.safetensors",
679
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00001-of-00004.safetensors",
680
+ "vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00001-of-00004.safetensors",
681
+ "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
682
+ "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
683
+ "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00004.safetensors",
684
+ "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00004.safetensors",
685
+ "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
686
+ "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
687
+ "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
688
+ "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
689
+ "vision_tower.vision_model.post_layernorm.bias": "model-00001-of-00004.safetensors",
690
+ "vision_tower.vision_model.post_layernorm.weight": "model-00001-of-00004.safetensors",
691
+ "vision_tower.vision_model.pre_layrnorm.bias": "model-00001-of-00004.safetensors",
692
+ "vision_tower.vision_model.pre_layrnorm.weight": "model-00001-of-00004.safetensors"
693
+ }
694
+ }
checkpoint-300/preprocessor_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aspect_ratio_setting": "anyres",
3
+ "crop_size": {
4
+ "height": 336,
5
+ "width": 336
6
+ },
7
+ "do_center_crop": true,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": true,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_grid_pinpoints": [
14
+ [
15
+ 336,
16
+ 672
17
+ ],
18
+ [
19
+ 672,
20
+ 336
21
+ ],
22
+ [
23
+ 672,
24
+ 672
25
+ ],
26
+ [
27
+ 1008,
28
+ 336
29
+ ],
30
+ [
31
+ 336,
32
+ 1008
33
+ ]
34
+ ],
35
+ "image_mean": [
36
+ 0.48145466,
37
+ 0.4578275,
38
+ 0.40821073
39
+ ],
40
+ "image_processor_type": "LlavaNextImageProcessor",
41
+ "image_std": [
42
+ 0.26862954,
43
+ 0.26130258,
44
+ 0.27577711
45
+ ],
46
+ "processor_class": "LlavaNextProcessor",
47
+ "resample": 3,
48
+ "rescale_factor": 0.00392156862745098,
49
+ "size": {
50
+ "shortest_edge": 336
51
+ }
52
+ }
checkpoint-300/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoint-300/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-300/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoint-300/tokenizer_config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<image>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<pad>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ }
46
+ },
47
+ "additional_special_tokens": [],
48
+ "bos_token": "<s>",
49
+ "chat_template": "{{ '<s>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '`[INST] `' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
50
+ "clean_up_tokenization_spaces": false,
51
+ "eos_token": "</s>",
52
+ "extra_special_tokens": {
53
+ "image_token": "<image>"
54
+ },
55
+ "image_token": "<image>",
56
+ "legacy": true,
57
+ "max_length": null,
58
+ "model_max_length": 1000000000000000019884624838656,
59
+ "pad_to_multiple_of": null,
60
+ "pad_token": "<pad>",
61
+ "pad_token_type_id": 0,
62
+ "padding_side": "right",
63
+ "processor_class": "LlavaNextProcessor",
64
+ "sp_model_kwargs": {},
65
+ "spaces_between_special_tokens": false,
66
+ "split_special_tokens": false,
67
+ "tokenizer_class": "LlamaTokenizer",
68
+ "unk_token": "<unk>",
69
+ "use_default_system_prompt": false
70
+ }
checkpoint-300/trainer_state.json ADDED
@@ -0,0 +1,1029 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.803738317757009,
5
+ "eval_steps": 50,
6
+ "global_step": 300,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04672897196261682,
13
+ "grad_norm": 55.02521133123827,
14
+ "learning_rate": 5e-07,
15
+ "logits/chosen": -2.7216238975524902,
16
+ "logits/rejected": -2.7209055423736572,
17
+ "logps/chosen": -268.4510192871094,
18
+ "logps/rejected": -203.9590606689453,
19
+ "loss": 0.6914,
20
+ "rewards/accuracies": 0.33125001192092896,
21
+ "rewards/chosen": 0.017813727259635925,
22
+ "rewards/margins": 0.009322145953774452,
23
+ "rewards/rejected": 0.008491581305861473,
24
+ "step": 5
25
+ },
26
+ {
27
+ "epoch": 0.09345794392523364,
28
+ "grad_norm": 50.12554517439661,
29
+ "learning_rate": 1e-06,
30
+ "logits/chosen": -2.660832166671753,
31
+ "logits/rejected": -2.6669700145721436,
32
+ "logps/chosen": -256.93609619140625,
33
+ "logps/rejected": -211.64938354492188,
34
+ "loss": 0.6467,
35
+ "rewards/accuracies": 0.6812499761581421,
36
+ "rewards/chosen": 0.4671781659126282,
37
+ "rewards/margins": 0.18480566143989563,
38
+ "rewards/rejected": 0.28237253427505493,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.14018691588785046,
43
+ "grad_norm": 48.457281147169425,
44
+ "learning_rate": 9.993623730611148e-07,
45
+ "logits/chosen": -2.494657278060913,
46
+ "logits/rejected": -2.5133018493652344,
47
+ "logps/chosen": -232.6891632080078,
48
+ "logps/rejected": -212.8677215576172,
49
+ "loss": 0.6204,
50
+ "rewards/accuracies": 0.6625000238418579,
51
+ "rewards/chosen": 1.2435152530670166,
52
+ "rewards/margins": 0.6126660108566284,
53
+ "rewards/rejected": 0.6308490037918091,
54
+ "step": 15
55
+ },
56
+ {
57
+ "epoch": 0.18691588785046728,
58
+ "grad_norm": 42.23668651632898,
59
+ "learning_rate": 9.97451118516912e-07,
60
+ "logits/chosen": -2.3121209144592285,
61
+ "logits/rejected": -2.302377462387085,
62
+ "logps/chosen": -234.3399658203125,
63
+ "logps/rejected": -191.3181610107422,
64
+ "loss": 0.6223,
65
+ "rewards/accuracies": 0.706250011920929,
66
+ "rewards/chosen": 1.3283790349960327,
67
+ "rewards/margins": 0.9071598052978516,
68
+ "rewards/rejected": 0.4212193489074707,
69
+ "step": 20
70
+ },
71
+ {
72
+ "epoch": 0.2336448598130841,
73
+ "grad_norm": 49.68505743626908,
74
+ "learning_rate": 9.94271111036929e-07,
75
+ "logits/chosen": -2.2619333267211914,
76
+ "logits/rejected": -2.2323482036590576,
77
+ "logps/chosen": -230.17385864257812,
78
+ "logps/rejected": -205.64108276367188,
79
+ "loss": 0.616,
80
+ "rewards/accuracies": 0.762499988079071,
81
+ "rewards/chosen": 1.3353136777877808,
82
+ "rewards/margins": 1.208017110824585,
83
+ "rewards/rejected": 0.127296581864357,
84
+ "step": 25
85
+ },
86
+ {
87
+ "epoch": 0.2803738317757009,
88
+ "grad_norm": 39.50573463077347,
89
+ "learning_rate": 9.898304612549066e-07,
90
+ "logits/chosen": -2.308243989944458,
91
+ "logits/rejected": -2.2968266010284424,
92
+ "logps/chosen": -241.26632690429688,
93
+ "logps/rejected": -207.3319854736328,
94
+ "loss": 0.5998,
95
+ "rewards/accuracies": 0.731249988079071,
96
+ "rewards/chosen": 0.9092999696731567,
97
+ "rewards/margins": 0.9556086659431458,
98
+ "rewards/rejected": -0.046308644115924835,
99
+ "step": 30
100
+ },
101
+ {
102
+ "epoch": 0.32710280373831774,
103
+ "grad_norm": 42.78736900246308,
104
+ "learning_rate": 9.841404950825536e-07,
105
+ "logits/chosen": -2.3728604316711426,
106
+ "logits/rejected": -2.3580102920532227,
107
+ "logps/chosen": -256.8548583984375,
108
+ "logps/rejected": -205.154052734375,
109
+ "loss": 0.5996,
110
+ "rewards/accuracies": 0.768750011920929,
111
+ "rewards/chosen": 1.0152027606964111,
112
+ "rewards/margins": 1.2137099504470825,
113
+ "rewards/rejected": -0.1985071450471878,
114
+ "step": 35
115
+ },
116
+ {
117
+ "epoch": 0.37383177570093457,
118
+ "grad_norm": 41.73908597429494,
119
+ "learning_rate": 9.77215724822721e-07,
120
+ "logits/chosen": -2.4492850303649902,
121
+ "logits/rejected": -2.4539356231689453,
122
+ "logps/chosen": -243.1707763671875,
123
+ "logps/rejected": -213.95166015625,
124
+ "loss": 0.6098,
125
+ "rewards/accuracies": 0.71875,
126
+ "rewards/chosen": 1.0353302955627441,
127
+ "rewards/margins": 1.2659428119659424,
128
+ "rewards/rejected": -0.2306123673915863,
129
+ "step": 40
130
+ },
131
+ {
132
+ "epoch": 0.4205607476635514,
133
+ "grad_norm": 41.530064757148224,
134
+ "learning_rate": 9.69073812155662e-07,
135
+ "logits/chosen": -2.5637125968933105,
136
+ "logits/rejected": -2.5535428524017334,
137
+ "logps/chosen": -244.7168731689453,
138
+ "logps/rejected": -205.80923461914062,
139
+ "loss": 0.5974,
140
+ "rewards/accuracies": 0.71875,
141
+ "rewards/chosen": 0.8133939504623413,
142
+ "rewards/margins": 0.9837163686752319,
143
+ "rewards/rejected": -0.17032238841056824,
144
+ "step": 45
145
+ },
146
+ {
147
+ "epoch": 0.4672897196261682,
148
+ "grad_norm": 38.26706141308248,
149
+ "learning_rate": 9.597355230927788e-07,
150
+ "logits/chosen": -2.5823917388916016,
151
+ "logits/rejected": -2.562842607498169,
152
+ "logps/chosen": -240.04067993164062,
153
+ "logps/rejected": -209.23428344726562,
154
+ "loss": 0.572,
155
+ "rewards/accuracies": 0.800000011920929,
156
+ "rewards/chosen": 0.9298027753829956,
157
+ "rewards/margins": 1.2456680536270142,
158
+ "rewards/rejected": -0.3158652186393738,
159
+ "step": 50
160
+ },
161
+ {
162
+ "epoch": 0.4672897196261682,
163
+ "eval_logits/chosen": -2.530949115753174,
164
+ "eval_logits/rejected": -2.529101610183716,
165
+ "eval_logps/chosen": -245.5291748046875,
166
+ "eval_logps/rejected": -217.46429443359375,
167
+ "eval_loss": 0.5720326900482178,
168
+ "eval_rewards/accuracies": 0.7578125,
169
+ "eval_rewards/chosen": 1.0708366632461548,
170
+ "eval_rewards/margins": 1.28933846950531,
171
+ "eval_rewards/rejected": -0.2185017466545105,
172
+ "eval_runtime": 202.2601,
173
+ "eval_samples_per_second": 15.03,
174
+ "eval_steps_per_second": 0.237,
175
+ "step": 50
176
+ },
177
+ {
178
+ "epoch": 0.514018691588785,
179
+ "grad_norm": 40.54073508413725,
180
+ "learning_rate": 9.4922467501275e-07,
181
+ "logits/chosen": -2.495945930480957,
182
+ "logits/rejected": -2.487422466278076,
183
+ "logps/chosen": -250.51620483398438,
184
+ "logps/rejected": -228.90200805664062,
185
+ "loss": 0.5176,
186
+ "rewards/accuracies": 0.7875000238418579,
187
+ "rewards/chosen": 1.0155770778656006,
188
+ "rewards/margins": 1.9236654043197632,
189
+ "rewards/rejected": -0.9080885648727417,
190
+ "step": 55
191
+ },
192
+ {
193
+ "epoch": 0.5607476635514018,
194
+ "grad_norm": 38.23797310786567,
195
+ "learning_rate": 9.375680759151206e-07,
196
+ "logits/chosen": -2.474236249923706,
197
+ "logits/rejected": -2.4737977981567383,
198
+ "logps/chosen": -255.09298706054688,
199
+ "logps/rejected": -200.73593139648438,
200
+ "loss": 0.5654,
201
+ "rewards/accuracies": 0.768750011920929,
202
+ "rewards/chosen": 1.0740002393722534,
203
+ "rewards/margins": 1.5434155464172363,
204
+ "rewards/rejected": -0.4694152772426605,
205
+ "step": 60
206
+ },
207
+ {
208
+ "epoch": 0.6074766355140186,
209
+ "grad_norm": 42.648181943788025,
210
+ "learning_rate": 9.247954560462927e-07,
211
+ "logits/chosen": -2.505916118621826,
212
+ "logits/rejected": -2.506608724594116,
213
+ "logps/chosen": -255.432861328125,
214
+ "logps/rejected": -205.4224090576172,
215
+ "loss": 0.5628,
216
+ "rewards/accuracies": 0.8062499761581421,
217
+ "rewards/chosen": 1.1411590576171875,
218
+ "rewards/margins": 1.7762504816055298,
219
+ "rewards/rejected": -0.6350914239883423,
220
+ "step": 65
221
+ },
222
+ {
223
+ "epoch": 0.6542056074766355,
224
+ "grad_norm": 38.81572593341751,
225
+ "learning_rate": 9.109393920723001e-07,
226
+ "logits/chosen": -2.4328043460845947,
227
+ "logits/rejected": -2.4342734813690186,
228
+ "logps/chosen": -233.8389129638672,
229
+ "logps/rejected": -212.91085815429688,
230
+ "loss": 0.5378,
231
+ "rewards/accuracies": 0.7124999761581421,
232
+ "rewards/chosen": 0.6836588978767395,
233
+ "rewards/margins": 1.282029390335083,
234
+ "rewards/rejected": -0.5983705520629883,
235
+ "step": 70
236
+ },
237
+ {
238
+ "epoch": 0.7009345794392523,
239
+ "grad_norm": 32.317612654080975,
240
+ "learning_rate": 8.960352239917699e-07,
241
+ "logits/chosen": -2.450084924697876,
242
+ "logits/rejected": -2.401425361633301,
243
+ "logps/chosen": -240.6315460205078,
244
+ "logps/rejected": -227.21084594726562,
245
+ "loss": 0.5154,
246
+ "rewards/accuracies": 0.78125,
247
+ "rewards/chosen": 0.6731350421905518,
248
+ "rewards/margins": 1.5562646389007568,
249
+ "rewards/rejected": -0.8831297755241394,
250
+ "step": 75
251
+ },
252
+ {
253
+ "epoch": 0.7476635514018691,
254
+ "grad_norm": 30.17721204804764,
255
+ "learning_rate": 8.801209650009814e-07,
256
+ "logits/chosen": -2.4172046184539795,
257
+ "logits/rejected": -2.400567054748535,
258
+ "logps/chosen": -245.4665985107422,
259
+ "logps/rejected": -214.18515014648438,
260
+ "loss": 0.5206,
261
+ "rewards/accuracies": 0.8187500238418579,
262
+ "rewards/chosen": 0.9583255052566528,
263
+ "rewards/margins": 1.7562158107757568,
264
+ "rewards/rejected": -0.7978904843330383,
265
+ "step": 80
266
+ },
267
+ {
268
+ "epoch": 0.794392523364486,
269
+ "grad_norm": 32.24535114623233,
270
+ "learning_rate": 8.632372045409141e-07,
271
+ "logits/chosen": -2.320589065551758,
272
+ "logits/rejected": -2.3311946392059326,
273
+ "logps/chosen": -245.598388671875,
274
+ "logps/rejected": -234.7646026611328,
275
+ "loss": 0.5472,
276
+ "rewards/accuracies": 0.78125,
277
+ "rewards/chosen": 1.1685658693313599,
278
+ "rewards/margins": 1.754003882408142,
279
+ "rewards/rejected": -0.5854381322860718,
280
+ "step": 85
281
+ },
282
+ {
283
+ "epoch": 0.8411214953271028,
284
+ "grad_norm": 34.061291659967246,
285
+ "learning_rate": 8.454270047735642e-07,
286
+ "logits/chosen": -2.329784870147705,
287
+ "logits/rejected": -2.304997682571411,
288
+ "logps/chosen": -238.0483856201172,
289
+ "logps/rejected": -195.24313354492188,
290
+ "loss": 0.5291,
291
+ "rewards/accuracies": 0.8125,
292
+ "rewards/chosen": 0.6335947513580322,
293
+ "rewards/margins": 1.5654070377349854,
294
+ "rewards/rejected": -0.9318124055862427,
295
+ "step": 90
296
+ },
297
+ {
298
+ "epoch": 0.8878504672897196,
299
+ "grad_norm": 31.735542564732725,
300
+ "learning_rate": 8.267357907515661e-07,
301
+ "logits/chosen": -2.298316478729248,
302
+ "logits/rejected": -2.2975010871887207,
303
+ "logps/chosen": -246.3526153564453,
304
+ "logps/rejected": -206.03524780273438,
305
+ "loss": 0.5418,
306
+ "rewards/accuracies": 0.7562500238418579,
307
+ "rewards/chosen": 0.5418449640274048,
308
+ "rewards/margins": 1.539952039718628,
309
+ "rewards/rejected": -0.9981070756912231,
310
+ "step": 95
311
+ },
312
+ {
313
+ "epoch": 0.9345794392523364,
314
+ "grad_norm": 32.65915062987667,
315
+ "learning_rate": 8.072112345612433e-07,
316
+ "logits/chosen": -2.2663910388946533,
317
+ "logits/rejected": -2.218681812286377,
318
+ "logps/chosen": -246.5704803466797,
319
+ "logps/rejected": -218.6560821533203,
320
+ "loss": 0.4997,
321
+ "rewards/accuracies": 0.8125,
322
+ "rewards/chosen": 0.8927062153816223,
323
+ "rewards/margins": 2.236736297607422,
324
+ "rewards/rejected": -1.3440301418304443,
325
+ "step": 100
326
+ },
327
+ {
328
+ "epoch": 0.9345794392523364,
329
+ "eval_logits/chosen": -2.2007782459259033,
330
+ "eval_logits/rejected": -2.177567720413208,
331
+ "eval_logps/chosen": -247.5850067138672,
332
+ "eval_logps/rejected": -224.4142608642578,
333
+ "eval_loss": 0.5101521015167236,
334
+ "eval_rewards/accuracies": 0.7864583134651184,
335
+ "eval_rewards/chosen": 0.8652558326721191,
336
+ "eval_rewards/margins": 1.7787574529647827,
337
+ "eval_rewards/rejected": -0.9135015606880188,
338
+ "eval_runtime": 201.8599,
339
+ "eval_samples_per_second": 15.06,
340
+ "eval_steps_per_second": 0.238,
341
+ "step": 100
342
+ },
343
+ {
344
+ "epoch": 0.9813084112149533,
345
+ "grad_norm": 28.6005139133492,
346
+ "learning_rate": 7.869031337345827e-07,
347
+ "logits/chosen": -2.1810142993927,
348
+ "logits/rejected": -2.1466403007507324,
349
+ "logps/chosen": -273.2081298828125,
350
+ "logps/rejected": -224.4601593017578,
351
+ "loss": 0.474,
352
+ "rewards/accuracies": 0.8125,
353
+ "rewards/chosen": 0.8940876126289368,
354
+ "rewards/margins": 2.1252331733703613,
355
+ "rewards/rejected": -1.2311456203460693,
356
+ "step": 105
357
+ },
358
+ {
359
+ "epoch": 1.02803738317757,
360
+ "grad_norm": 17.790000678929353,
361
+ "learning_rate": 7.658632842402432e-07,
362
+ "logits/chosen": -2.1617965698242188,
363
+ "logits/rejected": -2.118025064468384,
364
+ "logps/chosen": -248.0548858642578,
365
+ "logps/rejected": -209.18603515625,
366
+ "loss": 0.3432,
367
+ "rewards/accuracies": 0.8687499761581421,
368
+ "rewards/chosen": 0.46495524048805237,
369
+ "rewards/margins": 2.547100067138672,
370
+ "rewards/rejected": -2.0821449756622314,
371
+ "step": 110
372
+ },
373
+ {
374
+ "epoch": 1.074766355140187,
375
+ "grad_norm": 18.641161706325903,
376
+ "learning_rate": 7.441453483775353e-07,
377
+ "logits/chosen": -2.2370879650115967,
378
+ "logits/rejected": -2.1639482975006104,
379
+ "logps/chosen": -252.3055419921875,
380
+ "logps/rejected": -231.727294921875,
381
+ "loss": 0.2131,
382
+ "rewards/accuracies": 0.8999999761581421,
383
+ "rewards/chosen": 1.1295344829559326,
384
+ "rewards/margins": 2.7920079231262207,
385
+ "rewards/rejected": -1.6624739170074463,
386
+ "step": 115
387
+ },
388
+ {
389
+ "epoch": 1.1214953271028036,
390
+ "grad_norm": 22.27642795647513,
391
+ "learning_rate": 7.218047179103112e-07,
392
+ "logits/chosen": -2.2182936668395996,
393
+ "logits/rejected": -2.2140285968780518,
394
+ "logps/chosen": -248.0435028076172,
395
+ "logps/rejected": -233.4251251220703,
396
+ "loss": 0.2493,
397
+ "rewards/accuracies": 0.90625,
398
+ "rewards/chosen": 1.1873703002929688,
399
+ "rewards/margins": 3.075556755065918,
400
+ "rewards/rejected": -1.8881866931915283,
401
+ "step": 120
402
+ },
403
+ {
404
+ "epoch": 1.1682242990654206,
405
+ "grad_norm": 24.129663206875104,
406
+ "learning_rate": 6.988983727898413e-07,
407
+ "logits/chosen": -2.2516915798187256,
408
+ "logits/rejected": -2.2213852405548096,
409
+ "logps/chosen": -239.94125366210938,
410
+ "logps/rejected": -213.9071807861328,
411
+ "loss": 0.2434,
412
+ "rewards/accuracies": 0.875,
413
+ "rewards/chosen": 1.149621844291687,
414
+ "rewards/margins": 2.956752300262451,
415
+ "rewards/rejected": -1.8071304559707642,
416
+ "step": 125
417
+ },
418
+ {
419
+ "epoch": 1.2149532710280373,
420
+ "grad_norm": 19.098258762584823,
421
+ "learning_rate": 6.754847358270066e-07,
422
+ "logits/chosen": -2.268832206726074,
423
+ "logits/rejected": -2.2357370853424072,
424
+ "logps/chosen": -252.8026885986328,
425
+ "logps/rejected": -218.2414093017578,
426
+ "loss": 0.2656,
427
+ "rewards/accuracies": 0.9125000238418579,
428
+ "rewards/chosen": 1.428450345993042,
429
+ "rewards/margins": 3.458483934402466,
430
+ "rewards/rejected": -2.030033826828003,
431
+ "step": 130
432
+ },
433
+ {
434
+ "epoch": 1.2616822429906542,
435
+ "grad_norm": 16.69096458544893,
436
+ "learning_rate": 6.516235236844661e-07,
437
+ "logits/chosen": -2.2503199577331543,
438
+ "logits/rejected": -2.223175048828125,
439
+ "logps/chosen": -251.1974639892578,
440
+ "logps/rejected": -224.8771209716797,
441
+ "loss": 0.229,
442
+ "rewards/accuracies": 0.9125000238418579,
443
+ "rewards/chosen": 1.4385788440704346,
444
+ "rewards/margins": 3.5852439403533936,
445
+ "rewards/rejected": -2.146665573120117,
446
+ "step": 135
447
+ },
448
+ {
449
+ "epoch": 1.308411214953271,
450
+ "grad_norm": 24.370271103680654,
451
+ "learning_rate": 6.273755945688457e-07,
452
+ "logits/chosen": -2.30786395072937,
453
+ "logits/rejected": -2.2820160388946533,
454
+ "logps/chosen": -247.8080596923828,
455
+ "logps/rejected": -242.51596069335938,
456
+ "loss": 0.2745,
457
+ "rewards/accuracies": 0.893750011920929,
458
+ "rewards/chosen": 1.6472270488739014,
459
+ "rewards/margins": 3.4453282356262207,
460
+ "rewards/rejected": -1.7981010675430298,
461
+ "step": 140
462
+ },
463
+ {
464
+ "epoch": 1.355140186915888,
465
+ "grad_norm": 21.195332919733744,
466
+ "learning_rate": 6.02802793011411e-07,
467
+ "logits/chosen": -2.304081916809082,
468
+ "logits/rejected": -2.2989423274993896,
469
+ "logps/chosen": -221.5400848388672,
470
+ "logps/rejected": -236.2010955810547,
471
+ "loss": 0.2682,
472
+ "rewards/accuracies": 0.9125000238418579,
473
+ "rewards/chosen": 1.4084885120391846,
474
+ "rewards/margins": 3.7067673206329346,
475
+ "rewards/rejected": -2.29827880859375,
476
+ "step": 145
477
+ },
478
+ {
479
+ "epoch": 1.4018691588785046,
480
+ "grad_norm": 22.224652369004666,
481
+ "learning_rate": 5.779677921331093e-07,
482
+ "logits/chosen": -2.2803831100463867,
483
+ "logits/rejected": -2.2711875438690186,
484
+ "logps/chosen": -247.6720733642578,
485
+ "logps/rejected": -217.12509155273438,
486
+ "loss": 0.2873,
487
+ "rewards/accuracies": 0.893750011920929,
488
+ "rewards/chosen": 1.7340141534805298,
489
+ "rewards/margins": 3.378777265548706,
490
+ "rewards/rejected": -1.6447633504867554,
491
+ "step": 150
492
+ },
493
+ {
494
+ "epoch": 1.4018691588785046,
495
+ "eval_logits/chosen": -2.275045394897461,
496
+ "eval_logits/rejected": -2.263206720352173,
497
+ "eval_logps/chosen": -245.67860412597656,
498
+ "eval_logps/rejected": -227.55787658691406,
499
+ "eval_loss": 0.5675327181816101,
500
+ "eval_rewards/accuracies": 0.7890625,
501
+ "eval_rewards/chosen": 1.055895209312439,
502
+ "eval_rewards/margins": 2.2837564945220947,
503
+ "eval_rewards/rejected": -1.2278612852096558,
504
+ "eval_runtime": 202.0364,
505
+ "eval_samples_per_second": 15.047,
506
+ "eval_steps_per_second": 0.238,
507
+ "step": 150
508
+ },
509
+ {
510
+ "epoch": 1.4485981308411215,
511
+ "grad_norm": 24.024418676682174,
512
+ "learning_rate": 5.529339337962897e-07,
513
+ "logits/chosen": -2.26741361618042,
514
+ "logits/rejected": -2.2619667053222656,
515
+ "logps/chosen": -228.74258422851562,
516
+ "logps/rejected": -199.869873046875,
517
+ "loss": 0.3185,
518
+ "rewards/accuracies": 0.9312499761581421,
519
+ "rewards/chosen": 1.8829383850097656,
520
+ "rewards/margins": 3.547306776046753,
521
+ "rewards/rejected": -1.6643686294555664,
522
+ "step": 155
523
+ },
524
+ {
525
+ "epoch": 1.4953271028037383,
526
+ "grad_norm": 22.765211926437665,
527
+ "learning_rate": 5.277650670507915e-07,
528
+ "logits/chosen": -2.2662367820739746,
529
+ "logits/rejected": -2.241522789001465,
530
+ "logps/chosen": -241.50253295898438,
531
+ "logps/rejected": -211.10791015625,
532
+ "loss": 0.2582,
533
+ "rewards/accuracies": 0.9375,
534
+ "rewards/chosen": 1.677080512046814,
535
+ "rewards/margins": 3.3089568614959717,
536
+ "rewards/rejected": -1.6318763494491577,
537
+ "step": 160
538
+ },
539
+ {
540
+ "epoch": 1.542056074766355,
541
+ "grad_norm": 20.952241807232628,
542
+ "learning_rate": 5.025253852864471e-07,
543
+ "logits/chosen": -2.2016148567199707,
544
+ "logits/rejected": -2.2075283527374268,
545
+ "logps/chosen": -247.7741241455078,
546
+ "logps/rejected": -224.11892700195312,
547
+ "loss": 0.2816,
548
+ "rewards/accuracies": 0.8999999761581421,
549
+ "rewards/chosen": 1.5324174165725708,
550
+ "rewards/margins": 3.230978488922119,
551
+ "rewards/rejected": -1.6985607147216797,
552
+ "step": 165
553
+ },
554
+ {
555
+ "epoch": 1.588785046728972,
556
+ "grad_norm": 23.145444455236966,
557
+ "learning_rate": 4.77279262507344e-07,
558
+ "logits/chosen": -2.2021024227142334,
559
+ "logits/rejected": -2.1827890872955322,
560
+ "logps/chosen": -243.9816131591797,
561
+ "logps/rejected": -249.20703125,
562
+ "loss": 0.2787,
563
+ "rewards/accuracies": 0.918749988079071,
564
+ "rewards/chosen": 1.669757604598999,
565
+ "rewards/margins": 3.6465446949005127,
566
+ "rewards/rejected": -1.9767868518829346,
567
+ "step": 170
568
+ },
569
+ {
570
+ "epoch": 1.6355140186915889,
571
+ "grad_norm": 26.563830659774606,
572
+ "learning_rate": 4.5209108914542714e-07,
573
+ "logits/chosen": -2.1774230003356934,
574
+ "logits/rejected": -2.1725821495056152,
575
+ "logps/chosen": -224.1102294921875,
576
+ "logps/rejected": -234.7071075439453,
577
+ "loss": 0.3123,
578
+ "rewards/accuracies": 0.8812500238418579,
579
+ "rewards/chosen": 1.2634233236312866,
580
+ "rewards/margins": 3.5007872581481934,
581
+ "rewards/rejected": -2.2373640537261963,
582
+ "step": 175
583
+ },
584
+ {
585
+ "epoch": 1.6822429906542056,
586
+ "grad_norm": 25.254447136991615,
587
+ "learning_rate": 4.2702510783220475e-07,
588
+ "logits/chosen": -2.168032169342041,
589
+ "logits/rejected": -2.1451544761657715,
590
+ "logps/chosen": -228.2743682861328,
591
+ "logps/rejected": -211.07705688476562,
592
+ "loss": 0.2948,
593
+ "rewards/accuracies": 0.918749988079071,
594
+ "rewards/chosen": 1.1032750606536865,
595
+ "rewards/margins": 3.2025279998779297,
596
+ "rewards/rejected": -2.0992531776428223,
597
+ "step": 180
598
+ },
599
+ {
600
+ "epoch": 1.7289719626168223,
601
+ "grad_norm": 20.89817797522474,
602
+ "learning_rate": 4.0214524954741586e-07,
603
+ "logits/chosen": -2.1714885234832764,
604
+ "logits/rejected": -2.148820400238037,
605
+ "logps/chosen": -250.5221405029297,
606
+ "logps/rejected": -223.18399047851562,
607
+ "loss": 0.2802,
608
+ "rewards/accuracies": 0.90625,
609
+ "rewards/chosen": 1.3431367874145508,
610
+ "rewards/margins": 3.7224392890930176,
611
+ "rewards/rejected": -2.3793022632598877,
612
+ "step": 185
613
+ },
614
+ {
615
+ "epoch": 1.7757009345794392,
616
+ "grad_norm": 17.107545008827852,
617
+ "learning_rate": 3.7751497056257305e-07,
618
+ "logits/chosen": -2.1603405475616455,
619
+ "logits/rejected": -2.145948648452759,
620
+ "logps/chosen": -234.90872192382812,
621
+ "logps/rejected": -240.06298828125,
622
+ "loss": 0.269,
623
+ "rewards/accuracies": 0.925000011920929,
624
+ "rewards/chosen": 1.084341049194336,
625
+ "rewards/margins": 3.5826897621154785,
626
+ "rewards/rejected": -2.4983482360839844,
627
+ "step": 190
628
+ },
629
+ {
630
+ "epoch": 1.8224299065420562,
631
+ "grad_norm": 27.137427341683352,
632
+ "learning_rate": 3.531970905952478e-07,
633
+ "logits/chosen": -2.1491293907165527,
634
+ "logits/rejected": -2.1209685802459717,
635
+ "logps/chosen": -221.9265899658203,
636
+ "logps/rejected": -215.990478515625,
637
+ "loss": 0.2937,
638
+ "rewards/accuracies": 0.875,
639
+ "rewards/chosen": 0.9956735372543335,
640
+ "rewards/margins": 3.271005630493164,
641
+ "rewards/rejected": -2.275331974029541,
642
+ "step": 195
643
+ },
644
+ {
645
+ "epoch": 1.8691588785046729,
646
+ "grad_norm": 23.508295318902285,
647
+ "learning_rate": 3.2925363258689553e-07,
648
+ "logits/chosen": -2.161498785018921,
649
+ "logits/rejected": -2.1209306716918945,
650
+ "logps/chosen": -248.2667999267578,
651
+ "logps/rejected": -227.0295867919922,
652
+ "loss": 0.2853,
653
+ "rewards/accuracies": 0.862500011920929,
654
+ "rewards/chosen": 1.2955918312072754,
655
+ "rewards/margins": 3.347618818283081,
656
+ "rewards/rejected": -2.0520269870758057,
657
+ "step": 200
658
+ },
659
+ {
660
+ "epoch": 1.8691588785046729,
661
+ "eval_logits/chosen": -2.147773504257202,
662
+ "eval_logits/rejected": -2.1250855922698975,
663
+ "eval_logps/chosen": -249.0491485595703,
664
+ "eval_logps/rejected": -232.39312744140625,
665
+ "eval_loss": 0.5163093209266663,
666
+ "eval_rewards/accuracies": 0.8203125,
667
+ "eval_rewards/chosen": 0.718841552734375,
668
+ "eval_rewards/margins": 2.4302282333374023,
669
+ "eval_rewards/rejected": -1.711386799812317,
670
+ "eval_runtime": 201.455,
671
+ "eval_samples_per_second": 15.09,
672
+ "eval_steps_per_second": 0.238,
673
+ "step": 200
674
+ },
675
+ {
676
+ "epoch": 1.9158878504672896,
677
+ "grad_norm": 23.066010809349862,
678
+ "learning_rate": 3.0574566451286086e-07,
679
+ "logits/chosen": -2.1637778282165527,
680
+ "logits/rejected": -2.132652997970581,
681
+ "logps/chosen": -242.0740203857422,
682
+ "logps/rejected": -231.40139770507812,
683
+ "loss": 0.3089,
684
+ "rewards/accuracies": 0.9125000238418579,
685
+ "rewards/chosen": 1.1851091384887695,
686
+ "rewards/margins": 3.5020480155944824,
687
+ "rewards/rejected": -2.316938877105713,
688
+ "step": 205
689
+ },
690
+ {
691
+ "epoch": 1.9626168224299065,
692
+ "grad_norm": 21.925966755838537,
693
+ "learning_rate": 2.8273314362803333e-07,
694
+ "logits/chosen": -2.143448829650879,
695
+ "logits/rejected": -2.142066240310669,
696
+ "logps/chosen": -250.38720703125,
697
+ "logps/rejected": -226.64315795898438,
698
+ "loss": 0.2716,
699
+ "rewards/accuracies": 0.918749988079071,
700
+ "rewards/chosen": 1.1868922710418701,
701
+ "rewards/margins": 3.174391269683838,
702
+ "rewards/rejected": -1.9874988794326782,
703
+ "step": 210
704
+ },
705
+ {
706
+ "epoch": 2.0093457943925235,
707
+ "grad_norm": 13.114607410778014,
708
+ "learning_rate": 2.602747635454047e-07,
709
+ "logits/chosen": -2.18164324760437,
710
+ "logits/rejected": -2.160330295562744,
711
+ "logps/chosen": -235.2315216064453,
712
+ "logps/rejected": -223.3993682861328,
713
+ "loss": 0.2259,
714
+ "rewards/accuracies": 0.8999999761581421,
715
+ "rewards/chosen": 1.4251452684402466,
716
+ "rewards/margins": 3.5938689708709717,
717
+ "rewards/rejected": -2.1687240600585938,
718
+ "step": 215
719
+ },
720
+ {
721
+ "epoch": 2.05607476635514,
722
+ "grad_norm": 16.881870959615693,
723
+ "learning_rate": 2.384278045375523e-07,
724
+ "logits/chosen": -2.2383639812469482,
725
+ "logits/rejected": -2.227437973022461,
726
+ "logps/chosen": -235.4667205810547,
727
+ "logps/rejected": -229.47811889648438,
728
+ "loss": 0.1641,
729
+ "rewards/accuracies": 0.925000011920929,
730
+ "rewards/chosen": 1.6337192058563232,
731
+ "rewards/margins": 3.944089412689209,
732
+ "rewards/rejected": -2.310370683670044,
733
+ "step": 220
734
+ },
735
+ {
736
+ "epoch": 2.102803738317757,
737
+ "grad_norm": 17.530383979048825,
738
+ "learning_rate": 2.1724798744286071e-07,
739
+ "logits/chosen": -2.266674518585205,
740
+ "logits/rejected": -2.2329540252685547,
741
+ "logps/chosen": -245.1803436279297,
742
+ "logps/rejected": -238.8377685546875,
743
+ "loss": 0.1503,
744
+ "rewards/accuracies": 0.9437500238418579,
745
+ "rewards/chosen": 1.8679962158203125,
746
+ "rewards/margins": 4.181756496429443,
747
+ "rewards/rejected": -2.3137600421905518,
748
+ "step": 225
749
+ },
750
+ {
751
+ "epoch": 2.149532710280374,
752
+ "grad_norm": 19.41034220051436,
753
+ "learning_rate": 1.9678933154909095e-07,
754
+ "logits/chosen": -2.2616019248962402,
755
+ "logits/rejected": -2.260685443878174,
756
+ "logps/chosen": -251.1139373779297,
757
+ "logps/rejected": -249.1596221923828,
758
+ "loss": 0.1462,
759
+ "rewards/accuracies": 0.925000011920929,
760
+ "rewards/chosen": 2.074453115463257,
761
+ "rewards/margins": 4.364731788635254,
762
+ "rewards/rejected": -2.290278911590576,
763
+ "step": 230
764
+ },
765
+ {
766
+ "epoch": 2.196261682242991,
767
+ "grad_norm": 15.416496217582845,
768
+ "learning_rate": 1.77104016816768e-07,
769
+ "logits/chosen": -2.259556293487549,
770
+ "logits/rejected": -2.2295475006103516,
771
+ "logps/chosen": -243.61294555664062,
772
+ "logps/rejected": -226.0421600341797,
773
+ "loss": 0.155,
774
+ "rewards/accuracies": 0.918749988079071,
775
+ "rewards/chosen": 1.5110995769500732,
776
+ "rewards/margins": 4.157734394073486,
777
+ "rewards/rejected": -2.646634578704834,
778
+ "step": 235
779
+ },
780
+ {
781
+ "epoch": 2.2429906542056073,
782
+ "grad_norm": 21.341300239829316,
783
+ "learning_rate": 1.5824225079378684e-07,
784
+ "logits/chosen": -2.2581722736358643,
785
+ "logits/rejected": -2.2538020610809326,
786
+ "logps/chosen": -234.7251434326172,
787
+ "logps/rejected": -247.6671600341797,
788
+ "loss": 0.1613,
789
+ "rewards/accuracies": 0.9375,
790
+ "rewards/chosen": 1.555023193359375,
791
+ "rewards/margins": 4.231776237487793,
792
+ "rewards/rejected": -2.676752805709839,
793
+ "step": 240
794
+ },
795
+ {
796
+ "epoch": 2.289719626168224,
797
+ "grad_norm": 19.28302760698744,
798
+ "learning_rate": 1.4025214056067237e-07,
799
+ "logits/chosen": -2.26164174079895,
800
+ "logits/rejected": -2.2327027320861816,
801
+ "logps/chosen": -230.2368621826172,
802
+ "logps/rejected": -244.13900756835938,
803
+ "loss": 0.159,
804
+ "rewards/accuracies": 0.9437500238418579,
805
+ "rewards/chosen": 1.3178222179412842,
806
+ "rewards/margins": 4.314841270446777,
807
+ "rewards/rejected": -2.9970195293426514,
808
+ "step": 245
809
+ },
810
+ {
811
+ "epoch": 2.336448598130841,
812
+ "grad_norm": 20.865529370197397,
813
+ "learning_rate": 1.2317957003309725e-07,
814
+ "logits/chosen": -2.2660677433013916,
815
+ "logits/rejected": -2.2249627113342285,
816
+ "logps/chosen": -249.3228759765625,
817
+ "logps/rejected": -236.3594512939453,
818
+ "loss": 0.1541,
819
+ "rewards/accuracies": 0.956250011920929,
820
+ "rewards/chosen": 1.5125614404678345,
821
+ "rewards/margins": 4.342096328735352,
822
+ "rewards/rejected": -2.8295350074768066,
823
+ "step": 250
824
+ },
825
+ {
826
+ "epoch": 2.336448598130841,
827
+ "eval_logits/chosen": -2.235158920288086,
828
+ "eval_logits/rejected": -2.2153029441833496,
829
+ "eval_logps/chosen": -250.26039123535156,
830
+ "eval_logps/rejected": -236.7135467529297,
831
+ "eval_loss": 0.5270811319351196,
832
+ "eval_rewards/accuracies": 0.8177083134651184,
833
+ "eval_rewards/chosen": 0.5977155566215515,
834
+ "eval_rewards/margins": 2.7411410808563232,
835
+ "eval_rewards/rejected": -2.143425226211548,
836
+ "eval_runtime": 203.3157,
837
+ "eval_samples_per_second": 14.952,
838
+ "eval_steps_per_second": 0.236,
839
+ "step": 250
840
+ },
841
+ {
842
+ "epoch": 2.383177570093458,
843
+ "grad_norm": 12.155538402681515,
844
+ "learning_rate": 1.0706808293459873e-07,
845
+ "logits/chosen": -2.2205164432525635,
846
+ "logits/rejected": -2.209555149078369,
847
+ "logps/chosen": -223.6109619140625,
848
+ "logps/rejected": -224.11154174804688,
849
+ "loss": 0.1437,
850
+ "rewards/accuracies": 0.90625,
851
+ "rewards/chosen": 1.015878438949585,
852
+ "rewards/margins": 3.95696759223938,
853
+ "rewards/rejected": -2.941089153289795,
854
+ "step": 255
855
+ },
856
+ {
857
+ "epoch": 2.4299065420560746,
858
+ "grad_norm": 13.826515054647423,
859
+ "learning_rate": 9.195877173797534e-08,
860
+ "logits/chosen": -2.226680040359497,
861
+ "logits/rejected": -2.2163913249969482,
862
+ "logps/chosen": -226.86441040039062,
863
+ "logps/rejected": -242.27633666992188,
864
+ "loss": 0.163,
865
+ "rewards/accuracies": 0.9624999761581421,
866
+ "rewards/chosen": 1.5521247386932373,
867
+ "rewards/margins": 4.2657151222229,
868
+ "rewards/rejected": -2.7135910987854004,
869
+ "step": 260
870
+ },
871
+ {
872
+ "epoch": 2.4766355140186915,
873
+ "grad_norm": 15.75649594362957,
874
+ "learning_rate": 7.789017285861438e-08,
875
+ "logits/chosen": -2.2129428386688232,
876
+ "logits/rejected": -2.1899213790893555,
877
+ "logps/chosen": -253.51864624023438,
878
+ "logps/rejected": -233.0913848876953,
879
+ "loss": 0.1529,
880
+ "rewards/accuracies": 0.956250011920929,
881
+ "rewards/chosen": 1.8306633234024048,
882
+ "rewards/margins": 4.6044087409973145,
883
+ "rewards/rejected": -2.773745059967041,
884
+ "step": 265
885
+ },
886
+ {
887
+ "epoch": 2.5233644859813085,
888
+ "grad_norm": 14.072200590843078,
889
+ "learning_rate": 6.489816836706785e-08,
890
+ "logits/chosen": -2.1907477378845215,
891
+ "logits/rejected": -2.1698105335235596,
892
+ "logps/chosen": -234.4886932373047,
893
+ "logps/rejected": -205.0101318359375,
894
+ "loss": 0.1452,
895
+ "rewards/accuracies": 0.949999988079071,
896
+ "rewards/chosen": 1.6402314901351929,
897
+ "rewards/margins": 4.284368515014648,
898
+ "rewards/rejected": -2.644136905670166,
899
+ "step": 270
900
+ },
901
+ {
902
+ "epoch": 2.5700934579439254,
903
+ "grad_norm": 15.994347574292076,
904
+ "learning_rate": 5.3015894471550914e-08,
905
+ "logits/chosen": -2.1613574028015137,
906
+ "logits/rejected": -2.137498378753662,
907
+ "logps/chosen": -229.953125,
908
+ "logps/rejected": -234.6221923828125,
909
+ "loss": 0.1289,
910
+ "rewards/accuracies": 0.9624999761581421,
911
+ "rewards/chosen": 1.5998098850250244,
912
+ "rewards/margins": 4.529351234436035,
913
+ "rewards/rejected": -2.9295413494110107,
914
+ "step": 275
915
+ },
916
+ {
917
+ "epoch": 2.616822429906542,
918
+ "grad_norm": 15.819926843742866,
919
+ "learning_rate": 4.227365700378799e-08,
920
+ "logits/chosen": -2.1940014362335205,
921
+ "logits/rejected": -2.140695571899414,
922
+ "logps/chosen": -248.3275909423828,
923
+ "logps/rejected": -266.1819763183594,
924
+ "loss": 0.1526,
925
+ "rewards/accuracies": 0.9437500238418579,
926
+ "rewards/chosen": 1.9989855289459229,
927
+ "rewards/margins": 4.9421706199646,
928
+ "rewards/rejected": -2.9431850910186768,
929
+ "step": 280
930
+ },
931
+ {
932
+ "epoch": 2.663551401869159,
933
+ "grad_norm": 17.059099314353197,
934
+ "learning_rate": 3.269885412375223e-08,
935
+ "logits/chosen": -2.1719181537628174,
936
+ "logits/rejected": -2.1409342288970947,
937
+ "logps/chosen": -241.63809204101562,
938
+ "logps/rejected": -226.8559112548828,
939
+ "loss": 0.151,
940
+ "rewards/accuracies": 0.949999988079071,
941
+ "rewards/chosen": 1.2887296676635742,
942
+ "rewards/margins": 4.394392490386963,
943
+ "rewards/rejected": -3.1056625843048096,
944
+ "step": 285
945
+ },
946
+ {
947
+ "epoch": 2.710280373831776,
948
+ "grad_norm": 16.789145370360544,
949
+ "learning_rate": 2.4315906440446952e-08,
950
+ "logits/chosen": -2.1815662384033203,
951
+ "logits/rejected": -2.1283011436462402,
952
+ "logps/chosen": -244.0872802734375,
953
+ "logps/rejected": -240.93655395507812,
954
+ "loss": 0.1788,
955
+ "rewards/accuracies": 0.9312499761581421,
956
+ "rewards/chosen": 1.2021782398223877,
957
+ "rewards/margins": 4.522456169128418,
958
+ "rewards/rejected": -3.320277452468872,
959
+ "step": 290
960
+ },
961
+ {
962
+ "epoch": 2.7570093457943923,
963
+ "grad_norm": 16.725526736330913,
964
+ "learning_rate": 1.7146194726952778e-08,
965
+ "logits/chosen": -2.1704020500183105,
966
+ "logits/rejected": -2.1348023414611816,
967
+ "logps/chosen": -229.75509643554688,
968
+ "logps/rejected": -227.176513671875,
969
+ "loss": 0.139,
970
+ "rewards/accuracies": 0.956250011920929,
971
+ "rewards/chosen": 1.2168376445770264,
972
+ "rewards/margins": 4.267210483551025,
973
+ "rewards/rejected": -3.05037260055542,
974
+ "step": 295
975
+ },
976
+ {
977
+ "epoch": 2.803738317757009,
978
+ "grad_norm": 15.148406064058884,
979
+ "learning_rate": 1.1208005388599951e-08,
980
+ "logits/chosen": -2.1668269634246826,
981
+ "logits/rejected": -2.1430649757385254,
982
+ "logps/chosen": -227.9604034423828,
983
+ "logps/rejected": -239.50302124023438,
984
+ "loss": 0.1566,
985
+ "rewards/accuracies": 0.949999988079071,
986
+ "rewards/chosen": 1.1434178352355957,
987
+ "rewards/margins": 4.731930732727051,
988
+ "rewards/rejected": -3.588513135910034,
989
+ "step": 300
990
+ },
991
+ {
992
+ "epoch": 2.803738317757009,
993
+ "eval_logits/chosen": -2.16743540763855,
994
+ "eval_logits/rejected": -2.144243001937866,
995
+ "eval_logps/chosen": -250.66944885253906,
996
+ "eval_logps/rejected": -238.1006622314453,
997
+ "eval_loss": 0.5241575837135315,
998
+ "eval_rewards/accuracies": 0.8307291865348816,
999
+ "eval_rewards/chosen": 0.5568115711212158,
1000
+ "eval_rewards/margins": 2.8389499187469482,
1001
+ "eval_rewards/rejected": -2.2821381092071533,
1002
+ "eval_runtime": 202.2678,
1003
+ "eval_samples_per_second": 15.03,
1004
+ "eval_steps_per_second": 0.237,
1005
+ "step": 300
1006
+ }
1007
+ ],
1008
+ "logging_steps": 5,
1009
+ "max_steps": 321,
1010
+ "num_input_tokens_seen": 0,
1011
+ "num_train_epochs": 3,
1012
+ "save_steps": 100,
1013
+ "stateful_callbacks": {
1014
+ "TrainerControl": {
1015
+ "args": {
1016
+ "should_epoch_stop": false,
1017
+ "should_evaluate": false,
1018
+ "should_log": false,
1019
+ "should_save": true,
1020
+ "should_training_stop": false
1021
+ },
1022
+ "attributes": {}
1023
+ }
1024
+ },
1025
+ "total_flos": 3537404052897792.0,
1026
+ "train_batch_size": 8,
1027
+ "trial_name": null,
1028
+ "trial_params": null
1029
+ }
checkpoint-300/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18434785ed5c1cf7b24cfe9bc32bfda4c423eb14a3664f74540e373b8660d0e
3
+ size 7096
checkpoint-321/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image>": 32000,
3
+ "<pad>": 32001
4
+ }
checkpoint-321/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/align-anything/hantao/models/llava-v1.6-mistral-7b-hf",
3
+ "architectures": [
4
+ "LlavaNextForConditionalGeneration"
5
+ ],
6
+ "hidden_size": 4096,
7
+ "ignore_index": -100,
8
+ "image_grid_pinpoints": [
9
+ [
10
+ 336,
11
+ 672
12
+ ],
13
+ [
14
+ 672,
15
+ 336
16
+ ],
17
+ [
18
+ 672,
19
+ 672
20
+ ],
21
+ [
22
+ 1008,
23
+ 336
24
+ ],
25
+ [
26
+ 336,
27
+ 1008
28
+ ]
29
+ ],
30
+ "image_seq_length": 576,
31
+ "image_token_index": 32000,
32
+ "model_type": "llava_next",
33
+ "projector_hidden_act": "gelu",
34
+ "text_config": {
35
+ "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
36
+ "architectures": [
37
+ "MistralForCausalLM"
38
+ ],
39
+ "intermediate_size": 14336,
40
+ "max_position_embeddings": 32768,
41
+ "model_type": "mistral",
42
+ "num_key_value_heads": 8,
43
+ "rms_norm_eps": 1e-05,
44
+ "rope_theta": 1000000.0,
45
+ "sliding_window": null,
46
+ "torch_dtype": "bfloat16",
47
+ "vocab_size": 32064
48
+ },
49
+ "tie_word_embeddings": false,
50
+ "torch_dtype": "bfloat16",
51
+ "transformers_version": "4.45.2",
52
+ "use_cache": false,
53
+ "use_image_newline_parameter": true,
54
+ "vision_config": {
55
+ "hidden_size": 1024,
56
+ "image_size": 336,
57
+ "intermediate_size": 4096,
58
+ "model_type": "clip_vision_model",
59
+ "num_attention_heads": 16,
60
+ "num_hidden_layers": 24,
61
+ "patch_size": 14,
62
+ "projection_dim": 768,
63
+ "vocab_size": 32000
64
+ },
65
+ "vision_feature_layer": -2,
66
+ "vision_feature_select_strategy": "default",
67
+ "vocab_size": 32064
68
+ }