Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- config.json +50 -0
- model.safetensors +3 -0
- quant_log.csv +281 -0
- quantize_config.json +21 -0
- special_tokens_map.json +24 -0
- tokenizer.json +3 -0
- tokenizer_config.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_attn_implementation_autoset": true,
|
3 |
+
"_name_or_path": "/fsx/ubuntu/.cache/huggingface/hub/models--cyberagent--Mistral-Nemo-Japanese-Instruct-2408/snapshots/8591f78522a5d651209fc9f354c848508db7a3eb",
|
4 |
+
"architectures": [
|
5 |
+
"MistralForCausalLM"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 131072,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 5120,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 1024000,
|
16 |
+
"model_type": "mistral",
|
17 |
+
"num_attention_heads": 32,
|
18 |
+
"num_hidden_layers": 40,
|
19 |
+
"num_key_value_heads": 8,
|
20 |
+
"pad_token_id": 10,
|
21 |
+
"quantization_config": {
|
22 |
+
"bits": 4,
|
23 |
+
"checkpoint_format": "gptq",
|
24 |
+
"desc_act": false,
|
25 |
+
"dynamic": null,
|
26 |
+
"group_size": 32,
|
27 |
+
"lm_head": false,
|
28 |
+
"meta": {
|
29 |
+
"damp_auto_increment": 0.0025,
|
30 |
+
"damp_percent": 0.01,
|
31 |
+
"mse": 0.0,
|
32 |
+
"quantizer": [
|
33 |
+
"gptqmodel:1.7.0"
|
34 |
+
],
|
35 |
+
"static_groups": false,
|
36 |
+
"true_sequential": true,
|
37 |
+
"uri": "https://github.com/modelcloud/gptqmodel"
|
38 |
+
},
|
39 |
+
"quant_method": "gptq",
|
40 |
+
"sym": true
|
41 |
+
},
|
42 |
+
"rms_norm_eps": 1e-05,
|
43 |
+
"rope_theta": 1000000.0,
|
44 |
+
"sliding_window": null,
|
45 |
+
"tie_word_embeddings": false,
|
46 |
+
"torch_dtype": "bfloat16",
|
47 |
+
"transformers_version": "4.48.0",
|
48 |
+
"use_cache": false,
|
49 |
+
"vocab_size": 131074
|
50 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8488aa9e46217746d344e4095fe1d6f1fefb30ade44eddf6357875e0934ac3f1
|
3 |
+
size 8996969392
|
quant_log.csv
ADDED
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
layer,module,loss,damp,time
|
2 |
+
0,self_attn.k_proj,0.34174,0.01000,2.186
|
3 |
+
0,self_attn.v_proj,0.00688,0.01000,1.543
|
4 |
+
0,self_attn.q_proj,0.93132,0.01000,1.569
|
5 |
+
0,self_attn.o_proj,0.00028,0.01000,1.277
|
6 |
+
0,mlp.up_proj,0.24894,0.01000,1.844
|
7 |
+
0,mlp.gate_proj,0.28264,0.01000,1.861
|
8 |
+
0,mlp.down_proj,0.14896,0.01000,4.614
|
9 |
+
1,self_attn.k_proj,0.68438,0.01000,1.535
|
10 |
+
1,self_attn.v_proj,0.10901,0.01000,1.488
|
11 |
+
1,self_attn.q_proj,1.54074,0.01000,1.547
|
12 |
+
1,self_attn.o_proj,0.00141,0.01000,1.285
|
13 |
+
1,mlp.up_proj,0.49999,0.01000,1.866
|
14 |
+
1,mlp.gate_proj,0.56625,0.01000,1.846
|
15 |
+
1,mlp.down_proj,0.00424,0.01000,4.700
|
16 |
+
2,self_attn.k_proj,0.55106,0.01000,1.548
|
17 |
+
2,self_attn.v_proj,0.11657,0.01000,1.538
|
18 |
+
2,self_attn.q_proj,1.27605,0.01000,1.576
|
19 |
+
2,self_attn.o_proj,0.00316,0.01000,1.272
|
20 |
+
2,mlp.up_proj,1.10488,0.01000,1.843
|
21 |
+
2,mlp.gate_proj,1.30114,0.01000,1.831
|
22 |
+
2,mlp.down_proj,0.06977,0.01000,4.721
|
23 |
+
3,self_attn.k_proj,1.16495,0.01000,1.573
|
24 |
+
3,self_attn.v_proj,0.22999,0.01000,1.560
|
25 |
+
3,self_attn.q_proj,2.96266,0.01000,1.614
|
26 |
+
3,self_attn.o_proj,0.00390,0.01000,1.288
|
27 |
+
3,mlp.up_proj,1.36464,0.01000,1.842
|
28 |
+
3,mlp.gate_proj,1.73010,0.01000,1.842
|
29 |
+
3,mlp.down_proj,0.02003,0.01000,4.841
|
30 |
+
4,self_attn.k_proj,0.87335,0.01000,1.599
|
31 |
+
4,self_attn.v_proj,0.22404,0.01000,1.495
|
32 |
+
4,self_attn.q_proj,2.08053,0.01000,1.583
|
33 |
+
4,self_attn.o_proj,0.00737,0.01000,1.312
|
34 |
+
4,mlp.up_proj,1.84295,0.01000,1.866
|
35 |
+
4,mlp.gate_proj,2.44115,0.01000,1.836
|
36 |
+
4,mlp.down_proj,0.03305,0.01000,4.791
|
37 |
+
5,self_attn.k_proj,1.15905,0.01000,1.599
|
38 |
+
5,self_attn.v_proj,0.28296,0.01000,1.533
|
39 |
+
5,self_attn.q_proj,3.03484,0.01000,1.628
|
40 |
+
5,self_attn.o_proj,0.00899,0.01000,1.333
|
41 |
+
5,mlp.up_proj,2.47513,0.01000,1.882
|
42 |
+
5,mlp.gate_proj,3.14948,0.01000,1.868
|
43 |
+
5,mlp.down_proj,0.05154,0.01000,4.817
|
44 |
+
6,self_attn.k_proj,1.38265,0.01000,1.607
|
45 |
+
6,self_attn.v_proj,0.31207,0.01000,1.546
|
46 |
+
6,self_attn.q_proj,3.41902,0.01000,1.618
|
47 |
+
6,self_attn.o_proj,0.02043,0.01000,1.337
|
48 |
+
6,mlp.up_proj,3.01841,0.01000,1.874
|
49 |
+
6,mlp.gate_proj,3.90744,0.01000,1.854
|
50 |
+
6,mlp.down_proj,0.07268,0.01000,4.802
|
51 |
+
7,self_attn.k_proj,1.35015,0.01000,1.595
|
52 |
+
7,self_attn.v_proj,0.37607,0.01000,1.474
|
53 |
+
7,self_attn.q_proj,3.46004,0.01000,1.558
|
54 |
+
7,self_attn.o_proj,0.03111,0.01000,1.291
|
55 |
+
7,mlp.up_proj,3.55542,0.01000,1.820
|
56 |
+
7,mlp.gate_proj,4.37709,0.01000,1.771
|
57 |
+
7,mlp.down_proj,0.08729,0.01000,4.688
|
58 |
+
8,self_attn.k_proj,1.47437,0.01000,1.549
|
59 |
+
8,self_attn.v_proj,0.37873,0.01000,1.480
|
60 |
+
8,self_attn.q_proj,3.70286,0.01000,1.566
|
61 |
+
8,self_attn.o_proj,0.03336,0.01000,1.283
|
62 |
+
8,mlp.up_proj,4.09177,0.01000,1.820
|
63 |
+
8,mlp.gate_proj,4.67661,0.01000,1.808
|
64 |
+
8,mlp.down_proj,0.10785,0.01000,4.629
|
65 |
+
9,self_attn.k_proj,1.57433,0.01000,1.534
|
66 |
+
9,self_attn.v_proj,0.52414,0.01000,1.476
|
67 |
+
9,self_attn.q_proj,3.93775,0.01000,1.555
|
68 |
+
9,self_attn.o_proj,0.05368,0.01000,1.293
|
69 |
+
9,mlp.up_proj,4.52447,0.01000,1.819
|
70 |
+
9,mlp.gate_proj,4.83508,0.01000,1.817
|
71 |
+
9,mlp.down_proj,0.14589,0.01000,4.752
|
72 |
+
10,self_attn.k_proj,1.51211,0.01000,1.548
|
73 |
+
10,self_attn.v_proj,0.48440,0.01000,1.485
|
74 |
+
10,self_attn.q_proj,3.95158,0.01000,1.576
|
75 |
+
10,self_attn.o_proj,0.06715,0.01000,1.298
|
76 |
+
10,mlp.up_proj,4.98417,0.01000,1.850
|
77 |
+
10,mlp.gate_proj,5.13272,0.01000,1.780
|
78 |
+
10,mlp.down_proj,0.14810,0.01000,4.657
|
79 |
+
11,self_attn.k_proj,1.85439,0.01000,1.541
|
80 |
+
11,self_attn.v_proj,0.53719,0.01000,1.483
|
81 |
+
11,self_attn.q_proj,4.73929,0.01000,1.565
|
82 |
+
11,self_attn.o_proj,0.07281,0.01000,1.307
|
83 |
+
11,mlp.up_proj,5.58413,0.01000,1.847
|
84 |
+
11,mlp.gate_proj,6.19074,0.01000,1.780
|
85 |
+
11,mlp.down_proj,0.15939,0.01000,4.645
|
86 |
+
12,self_attn.k_proj,1.75254,0.01000,1.545
|
87 |
+
12,self_attn.v_proj,0.67888,0.01000,1.515
|
88 |
+
12,self_attn.q_proj,4.60119,0.01000,1.606
|
89 |
+
12,self_attn.o_proj,0.07729,0.01000,1.329
|
90 |
+
12,mlp.up_proj,6.18354,0.01000,1.889
|
91 |
+
12,mlp.gate_proj,6.79671,0.01000,1.814
|
92 |
+
12,mlp.down_proj,0.17589,0.01000,4.764
|
93 |
+
13,self_attn.k_proj,2.24022,0.01000,1.567
|
94 |
+
13,self_attn.v_proj,0.71202,0.01000,1.511
|
95 |
+
13,self_attn.q_proj,5.85935,0.01000,1.609
|
96 |
+
13,self_attn.o_proj,0.08961,0.01000,1.319
|
97 |
+
13,mlp.up_proj,6.70540,0.01000,1.871
|
98 |
+
13,mlp.gate_proj,7.09266,0.01000,1.876
|
99 |
+
13,mlp.down_proj,0.25894,0.01000,4.755
|
100 |
+
14,self_attn.k_proj,2.51240,0.01000,1.582
|
101 |
+
14,self_attn.v_proj,0.79869,0.01000,1.513
|
102 |
+
14,self_attn.q_proj,6.54318,0.01000,1.598
|
103 |
+
14,self_attn.o_proj,0.08623,0.01000,1.329
|
104 |
+
14,mlp.up_proj,7.52908,0.01000,1.868
|
105 |
+
14,mlp.gate_proj,7.52712,0.01000,1.850
|
106 |
+
14,mlp.down_proj,0.24156,0.01000,4.821
|
107 |
+
15,self_attn.k_proj,2.24002,0.01000,1.583
|
108 |
+
15,self_attn.v_proj,0.74680,0.01000,1.512
|
109 |
+
15,self_attn.q_proj,5.56417,0.01000,1.600
|
110 |
+
15,self_attn.o_proj,0.10931,0.01000,1.323
|
111 |
+
15,mlp.up_proj,7.90322,0.01000,1.881
|
112 |
+
15,mlp.gate_proj,7.46762,0.01000,1.845
|
113 |
+
15,mlp.down_proj,0.27886,0.01000,4.797
|
114 |
+
16,self_attn.k_proj,2.30226,0.01000,1.574
|
115 |
+
16,self_attn.v_proj,0.86189,0.01000,1.519
|
116 |
+
16,self_attn.q_proj,5.84766,0.01000,1.597
|
117 |
+
16,self_attn.o_proj,0.13839,0.01000,1.327
|
118 |
+
16,mlp.up_proj,8.51025,0.01000,1.875
|
119 |
+
16,mlp.gate_proj,7.79725,0.01000,1.847
|
120 |
+
16,mlp.down_proj,0.33317,0.01000,4.726
|
121 |
+
17,self_attn.k_proj,2.12047,0.01000,1.588
|
122 |
+
17,self_attn.v_proj,1.04065,0.01000,1.526
|
123 |
+
17,self_attn.q_proj,5.80184,0.01000,1.649
|
124 |
+
17,self_attn.o_proj,0.18177,0.01000,1.348
|
125 |
+
17,mlp.up_proj,9.50042,0.01000,1.837
|
126 |
+
17,mlp.gate_proj,8.48879,0.01000,1.838
|
127 |
+
17,mlp.down_proj,0.41554,0.01000,4.645
|
128 |
+
18,self_attn.k_proj,2.57716,0.01000,1.617
|
129 |
+
18,self_attn.v_proj,1.09571,0.01000,1.589
|
130 |
+
18,self_attn.q_proj,6.91603,0.01000,1.584
|
131 |
+
18,self_attn.o_proj,0.23456,0.01000,1.291
|
132 |
+
18,mlp.up_proj,10.50572,0.01000,1.904
|
133 |
+
18,mlp.gate_proj,9.85006,0.01000,1.897
|
134 |
+
18,mlp.down_proj,0.50532,0.01000,4.897
|
135 |
+
19,self_attn.k_proj,2.59389,0.01000,1.623
|
136 |
+
19,self_attn.v_proj,1.52294,0.01000,1.506
|
137 |
+
19,self_attn.q_proj,7.81570,0.01000,1.628
|
138 |
+
19,self_attn.o_proj,0.17006,0.01000,1.343
|
139 |
+
19,mlp.up_proj,11.11729,0.01000,1.865
|
140 |
+
19,mlp.gate_proj,10.67446,0.01000,1.864
|
141 |
+
19,mlp.down_proj,0.54766,0.01000,4.851
|
142 |
+
20,self_attn.k_proj,2.70448,0.01000,1.617
|
143 |
+
20,self_attn.v_proj,1.38711,0.01000,1.506
|
144 |
+
20,self_attn.q_proj,7.77470,0.01000,1.571
|
145 |
+
20,self_attn.o_proj,0.16538,0.01000,1.288
|
146 |
+
20,mlp.up_proj,11.94513,0.01000,1.820
|
147 |
+
20,mlp.gate_proj,12.15438,0.01000,1.779
|
148 |
+
20,mlp.down_proj,0.61681,0.01000,4.641
|
149 |
+
21,self_attn.k_proj,2.62276,0.01000,1.577
|
150 |
+
21,self_attn.v_proj,1.24534,0.01000,1.510
|
151 |
+
21,self_attn.q_proj,7.43140,0.01000,1.648
|
152 |
+
21,self_attn.o_proj,0.14362,0.01000,1.362
|
153 |
+
21,mlp.up_proj,13.03621,0.01000,1.919
|
154 |
+
21,mlp.gate_proj,13.76658,0.01000,1.872
|
155 |
+
21,mlp.down_proj,0.70165,0.01000,4.872
|
156 |
+
22,self_attn.k_proj,2.53961,0.01000,1.620
|
157 |
+
22,self_attn.v_proj,1.77617,0.01000,1.559
|
158 |
+
22,self_attn.q_proj,7.66903,0.01000,1.635
|
159 |
+
22,self_attn.o_proj,0.17495,0.01000,1.351
|
160 |
+
22,mlp.up_proj,14.48208,0.01000,1.908
|
161 |
+
22,mlp.gate_proj,15.64957,0.01000,1.875
|
162 |
+
22,mlp.down_proj,0.88834,0.01000,4.855
|
163 |
+
23,self_attn.k_proj,2.69558,0.01000,1.598
|
164 |
+
23,self_attn.v_proj,1.71599,0.01000,1.566
|
165 |
+
23,self_attn.q_proj,7.92795,0.01000,1.634
|
166 |
+
23,self_attn.o_proj,0.17280,0.01000,1.349
|
167 |
+
23,mlp.up_proj,15.38903,0.01000,1.895
|
168 |
+
23,mlp.gate_proj,16.40285,0.01000,1.866
|
169 |
+
23,mlp.down_proj,1.02650,0.01000,4.812
|
170 |
+
24,self_attn.k_proj,2.98528,0.01000,1.596
|
171 |
+
24,self_attn.v_proj,1.84463,0.01000,1.473
|
172 |
+
24,self_attn.q_proj,8.62414,0.01000,1.567
|
173 |
+
24,self_attn.o_proj,0.23563,0.01000,1.283
|
174 |
+
24,mlp.up_proj,16.58416,0.01000,1.819
|
175 |
+
24,mlp.gate_proj,18.03217,0.01000,1.829
|
176 |
+
24,mlp.down_proj,1.06244,0.01000,4.691
|
177 |
+
25,self_attn.k_proj,3.23795,0.01000,1.574
|
178 |
+
25,self_attn.v_proj,1.93531,0.01000,1.475
|
179 |
+
25,self_attn.q_proj,9.35893,0.01000,1.551
|
180 |
+
25,self_attn.o_proj,0.16193,0.01000,1.278
|
181 |
+
25,mlp.up_proj,17.61059,0.01000,1.819
|
182 |
+
25,mlp.gate_proj,19.45450,0.01000,1.795
|
183 |
+
25,mlp.down_proj,1.18593,0.01000,4.622
|
184 |
+
26,self_attn.k_proj,3.24546,0.01000,1.529
|
185 |
+
26,self_attn.v_proj,2.20333,0.01000,1.473
|
186 |
+
26,self_attn.q_proj,9.52539,0.01000,1.551
|
187 |
+
26,self_attn.o_proj,0.19465,0.01000,1.285
|
188 |
+
26,mlp.up_proj,19.02632,0.01000,1.829
|
189 |
+
26,mlp.gate_proj,21.40164,0.01000,1.820
|
190 |
+
26,mlp.down_proj,1.23821,0.01000,4.627
|
191 |
+
27,self_attn.k_proj,3.38141,0.01000,1.543
|
192 |
+
27,self_attn.v_proj,2.59268,0.01000,1.536
|
193 |
+
27,self_attn.q_proj,10.15105,0.01000,1.619
|
194 |
+
27,self_attn.o_proj,0.14687,0.01000,1.355
|
195 |
+
27,mlp.up_proj,20.57957,0.01000,1.902
|
196 |
+
27,mlp.gate_proj,23.47207,0.01000,1.845
|
197 |
+
27,mlp.down_proj,1.36720,0.01000,4.818
|
198 |
+
28,self_attn.k_proj,3.66731,0.01000,1.604
|
199 |
+
28,self_attn.v_proj,2.78862,0.01000,1.568
|
200 |
+
28,self_attn.q_proj,10.99899,0.01000,1.648
|
201 |
+
28,self_attn.o_proj,0.30151,0.01000,1.336
|
202 |
+
28,mlp.up_proj,22.28920,0.01000,1.805
|
203 |
+
28,mlp.gate_proj,25.07884,0.01000,1.861
|
204 |
+
28,mlp.down_proj,1.55125,0.01000,4.799
|
205 |
+
29,self_attn.k_proj,3.46947,0.01000,1.606
|
206 |
+
29,self_attn.v_proj,3.69166,0.01000,1.469
|
207 |
+
29,self_attn.q_proj,10.72053,0.01000,1.622
|
208 |
+
29,self_attn.o_proj,0.38309,0.01000,1.271
|
209 |
+
29,mlp.up_proj,24.48141,0.01000,1.785
|
210 |
+
29,mlp.gate_proj,27.68266,0.01000,1.782
|
211 |
+
29,mlp.down_proj,1.74520,0.01000,4.641
|
212 |
+
30,self_attn.k_proj,3.92125,0.01000,1.532
|
213 |
+
30,self_attn.v_proj,3.25593,0.01000,1.541
|
214 |
+
30,self_attn.q_proj,11.32519,0.01000,1.548
|
215 |
+
30,self_attn.o_proj,0.24195,0.01000,1.280
|
216 |
+
30,mlp.up_proj,26.97208,0.01000,1.867
|
217 |
+
30,mlp.gate_proj,30.63704,0.01000,1.869
|
218 |
+
30,mlp.down_proj,2.06163,0.01000,4.792
|
219 |
+
31,self_attn.k_proj,3.63291,0.01000,1.587
|
220 |
+
31,self_attn.v_proj,3.42567,0.01000,1.493
|
221 |
+
31,self_attn.q_proj,10.88180,0.01000,1.561
|
222 |
+
31,self_attn.o_proj,0.38272,0.01000,1.301
|
223 |
+
31,mlp.up_proj,29.25208,0.01000,1.815
|
224 |
+
31,mlp.gate_proj,32.44777,0.01000,1.870
|
225 |
+
31,mlp.down_proj,2.42586,0.01000,4.673
|
226 |
+
32,self_attn.k_proj,3.54336,0.01000,1.560
|
227 |
+
32,self_attn.v_proj,4.55904,0.01000,1.473
|
228 |
+
32,self_attn.q_proj,11.49044,0.01000,1.570
|
229 |
+
32,self_attn.o_proj,0.36024,0.01000,1.279
|
230 |
+
32,mlp.up_proj,31.63492,0.01000,1.813
|
231 |
+
32,mlp.gate_proj,34.47448,0.01000,1.791
|
232 |
+
32,mlp.down_proj,2.90630,0.01000,4.640
|
233 |
+
33,self_attn.k_proj,3.92611,0.01000,1.543
|
234 |
+
33,self_attn.v_proj,3.95392,0.01000,1.483
|
235 |
+
33,self_attn.q_proj,12.06905,0.01000,1.578
|
236 |
+
33,self_attn.o_proj,0.49538,0.01000,1.310
|
237 |
+
33,mlp.up_proj,33.93890,0.01000,1.886
|
238 |
+
33,mlp.gate_proj,35.75291,0.01000,1.818
|
239 |
+
33,mlp.down_proj,3.79784,0.01000,4.666
|
240 |
+
34,self_attn.k_proj,3.32339,0.01000,1.570
|
241 |
+
34,self_attn.v_proj,7.65888,0.01000,1.507
|
242 |
+
34,self_attn.q_proj,12.00349,0.01000,1.569
|
243 |
+
34,self_attn.o_proj,1.03670,0.01000,1.302
|
244 |
+
34,mlp.up_proj,37.02930,0.01000,1.824
|
245 |
+
34,mlp.gate_proj,38.60052,0.01000,1.776
|
246 |
+
34,mlp.down_proj,4.30309,0.01000,4.653
|
247 |
+
35,self_attn.k_proj,3.73706,0.01000,1.550
|
248 |
+
35,self_attn.v_proj,6.83144,0.01000,1.487
|
249 |
+
35,self_attn.q_proj,12.41380,0.01000,1.572
|
250 |
+
35,self_attn.o_proj,0.70283,0.01000,1.274
|
251 |
+
35,mlp.up_proj,40.62575,0.01000,1.870
|
252 |
+
35,mlp.gate_proj,41.55278,0.01000,1.851
|
253 |
+
35,mlp.down_proj,5.40784,0.01000,4.668
|
254 |
+
36,self_attn.k_proj,3.31415,0.01000,1.613
|
255 |
+
36,self_attn.v_proj,8.01466,0.01000,1.549
|
256 |
+
36,self_attn.q_proj,11.79734,0.01000,1.629
|
257 |
+
36,self_attn.o_proj,0.96024,0.01000,1.294
|
258 |
+
36,mlp.up_proj,43.97441,0.01000,1.831
|
259 |
+
36,mlp.gate_proj,43.57873,0.01000,1.814
|
260 |
+
36,mlp.down_proj,6.87738,0.01000,4.683
|
261 |
+
37,self_attn.k_proj,3.01777,0.01000,1.633
|
262 |
+
37,self_attn.v_proj,9.46064,0.01000,1.548
|
263 |
+
37,self_attn.q_proj,11.42208,0.01000,1.604
|
264 |
+
37,self_attn.o_proj,2.27809,0.01000,1.325
|
265 |
+
37,mlp.up_proj,44.40799,0.01000,1.839
|
266 |
+
37,mlp.gate_proj,44.36054,0.01000,1.860
|
267 |
+
37,mlp.down_proj,9.07977,0.01000,4.803
|
268 |
+
38,self_attn.k_proj,3.30560,0.01000,1.613
|
269 |
+
38,self_attn.v_proj,13.28640,0.01000,1.540
|
270 |
+
38,self_attn.q_proj,12.83793,0.01000,1.628
|
271 |
+
38,self_attn.o_proj,2.14735,0.01000,1.304
|
272 |
+
38,mlp.up_proj,44.31187,0.01000,1.866
|
273 |
+
38,mlp.gate_proj,45.34725,0.01000,1.819
|
274 |
+
38,mlp.down_proj,12.56778,0.01000,4.781
|
275 |
+
39,self_attn.k_proj,3.03322,0.01000,1.611
|
276 |
+
39,self_attn.v_proj,6.71417,0.01000,1.551
|
277 |
+
39,self_attn.q_proj,10.78402,0.01000,1.623
|
278 |
+
39,self_attn.o_proj,1.77810,0.01000,1.361
|
279 |
+
39,mlp.up_proj,45.51768,0.01000,1.937
|
280 |
+
39,mlp.gate_proj,48.72448,0.01000,1.899
|
281 |
+
39,mlp.down_proj,25.16159,0.01000,4.956
|
quantize_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bits": 4,
|
3 |
+
"dynamic": null,
|
4 |
+
"group_size": 32,
|
5 |
+
"desc_act": false,
|
6 |
+
"sym": true,
|
7 |
+
"lm_head": false,
|
8 |
+
"quant_method": "gptq",
|
9 |
+
"checkpoint_format": "gptq",
|
10 |
+
"meta": {
|
11 |
+
"quantizer": [
|
12 |
+
"gptqmodel:1.7.0"
|
13 |
+
],
|
14 |
+
"uri": "https://github.com/modelcloud/gptqmodel",
|
15 |
+
"damp_percent": 0.01,
|
16 |
+
"damp_auto_increment": 0.0025,
|
17 |
+
"static_groups": false,
|
18 |
+
"true_sequential": true,
|
19 |
+
"mse": 0.0
|
20 |
+
}
|
21 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|im_end|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<pad>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<unk>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea88f9940a84ab7e0100bc369506a28ec8d5d821691dc47d4dd63f1bbdf105ed
|
3 |
+
size 17078669
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|