3v324v23's picture
bf16 to fp16
45be9e6
layer,module,loss,damp,time
0,self_attn.k_proj,0.00136,0.01000,0.325
0,self_attn.v_proj,0.00003,0.01000,0.129
0,self_attn.q_proj,0.01018,0.01000,0.137
0,self_attn.o_proj,0.00001,0.01000,0.134
0,mlp.up_proj,0.09437,0.01000,0.139
0,mlp.gate_proj,0.18865,0.01000,0.139
0,mlp.down_proj,0.00143,0.01000,0.735
1,self_attn.k_proj,0.00999,0.01000,0.135
1,self_attn.v_proj,0.00103,0.01000,0.135
1,self_attn.q_proj,0.03432,0.01000,0.135
1,self_attn.o_proj,0.00249,0.01000,0.132
1,mlp.up_proj,0.10145,0.01000,0.145
1,mlp.gate_proj,0.19494,0.01000,0.138
1,mlp.down_proj,0.00111,0.01000,0.741
2,self_attn.k_proj,0.01739,0.01000,0.135
2,self_attn.v_proj,0.00208,0.01000,0.130
2,self_attn.q_proj,0.05895,0.01000,0.135
2,self_attn.o_proj,0.00084,0.01000,0.132
2,mlp.up_proj,0.15928,0.01000,0.139
2,mlp.gate_proj,0.29904,0.01000,0.137
2,mlp.down_proj,3.25813,0.01000,0.724
3,self_attn.k_proj,0.02043,0.01000,0.141
3,self_attn.v_proj,0.00490,0.01000,0.138
3,self_attn.q_proj,0.08232,0.01000,0.139
3,self_attn.o_proj,0.00070,0.01000,0.141
3,mlp.up_proj,1.52620,0.01000,0.140
3,mlp.gate_proj,13.87913,0.01000,0.153
3,mlp.down_proj,3.20606,0.01000,0.734
4,self_attn.k_proj,0.01640,0.01000,0.135
4,self_attn.v_proj,0.00691,0.01000,0.131
4,self_attn.q_proj,0.07569,0.01000,0.132
4,self_attn.o_proj,0.00309,0.01000,0.134
4,mlp.up_proj,0.24986,0.01000,0.136
4,mlp.gate_proj,0.42210,0.01000,0.138
4,mlp.down_proj,0.00362,0.01000,0.737
5,self_attn.k_proj,0.01574,0.01000,0.137
5,self_attn.v_proj,0.00762,0.01000,0.133
5,self_attn.q_proj,0.07645,0.01000,0.136
5,self_attn.o_proj,0.00106,0.01000,0.138
5,mlp.up_proj,0.30390,0.01000,0.143
5,mlp.gate_proj,0.71931,0.01000,0.147
5,mlp.down_proj,0.00614,0.01000,0.733
6,self_attn.k_proj,0.02019,0.01000,0.139
6,self_attn.v_proj,0.00567,0.01000,0.138
6,self_attn.q_proj,0.07846,0.01000,0.140
6,self_attn.o_proj,0.00102,0.01000,0.140
6,mlp.up_proj,0.25922,0.01000,0.147
6,mlp.gate_proj,0.42813,0.01000,0.144
6,mlp.down_proj,0.00318,0.01000,0.753
7,self_attn.k_proj,0.02236,0.01000,0.144
7,self_attn.v_proj,0.00721,0.01000,0.146
7,self_attn.q_proj,0.08888,0.01000,0.135
7,self_attn.o_proj,0.00106,0.01000,0.140
7,mlp.up_proj,0.27111,0.01000,0.147
7,mlp.gate_proj,0.37811,0.01000,0.143
7,mlp.down_proj,0.00381,0.01000,0.762
8,self_attn.k_proj,0.01918,0.01000,0.137
8,self_attn.v_proj,0.00650,0.01000,0.138
8,self_attn.q_proj,0.08556,0.01000,0.140
8,self_attn.o_proj,0.00224,0.01000,0.140
8,mlp.up_proj,0.23711,0.01000,0.140
8,mlp.gate_proj,0.33877,0.01000,0.141
8,mlp.down_proj,0.00247,0.01000,0.745
9,self_attn.k_proj,0.03478,0.01000,0.141
9,self_attn.v_proj,0.01390,0.01000,0.145
9,self_attn.q_proj,0.16694,0.01000,0.139
9,self_attn.o_proj,0.00133,0.01000,0.140
9,mlp.up_proj,0.25157,0.01000,0.142
9,mlp.gate_proj,0.31524,0.01000,0.148
9,mlp.down_proj,0.00306,0.01000,0.780
10,self_attn.k_proj,0.02065,0.01000,0.140
10,self_attn.v_proj,0.01247,0.01000,0.139
10,self_attn.q_proj,0.09851,0.01000,0.141
10,self_attn.o_proj,0.00224,0.01000,0.141
10,mlp.up_proj,0.20222,0.01000,0.145
10,mlp.gate_proj,0.26411,0.01000,0.144
10,mlp.down_proj,0.00195,0.01000,0.758
11,self_attn.k_proj,0.03751,0.01000,0.138
11,self_attn.v_proj,0.01373,0.01000,0.137
11,self_attn.q_proj,0.16694,0.01000,0.138
11,self_attn.o_proj,0.00163,0.01000,0.136
11,mlp.up_proj,0.23553,0.01000,0.142
11,mlp.gate_proj,0.25575,0.01000,0.141
11,mlp.down_proj,0.00258,0.01000,0.766
12,self_attn.k_proj,0.01699,0.01000,0.141
12,self_attn.v_proj,0.00987,0.01000,0.143
12,self_attn.q_proj,0.09044,0.01000,0.149
12,self_attn.o_proj,0.00252,0.01000,0.145
12,mlp.up_proj,0.20484,0.01000,0.146
12,mlp.gate_proj,0.22553,0.01000,0.146
12,mlp.down_proj,0.00257,0.01000,0.767
13,self_attn.k_proj,0.02551,0.01000,0.135
13,self_attn.v_proj,0.01122,0.01000,0.136
13,self_attn.q_proj,0.13072,0.01000,0.146
13,self_attn.o_proj,0.00144,0.01000,0.134
13,mlp.up_proj,0.23847,0.01000,0.147
13,mlp.gate_proj,0.24350,0.01000,0.151
13,mlp.down_proj,0.00315,0.01000,0.778
14,self_attn.k_proj,0.02141,0.01000,0.139
14,self_attn.v_proj,0.01697,0.01000,0.135
14,self_attn.q_proj,0.12928,0.01000,0.140
14,self_attn.o_proj,0.00471,0.01000,0.136
14,mlp.up_proj,0.22936,0.01000,0.139
14,mlp.gate_proj,0.26044,0.01000,0.147
14,mlp.down_proj,0.00375,0.01000,0.742
15,self_attn.k_proj,0.02037,0.01000,0.137
15,self_attn.v_proj,0.01484,0.01000,0.136
15,self_attn.q_proj,0.10820,0.01000,0.140
15,self_attn.o_proj,0.00345,0.01000,0.138
15,mlp.up_proj,0.30909,0.01000,0.143
15,mlp.gate_proj,0.32287,0.01000,0.149
15,mlp.down_proj,0.00640,0.01000,0.748
16,self_attn.k_proj,0.02956,0.01000,0.139
16,self_attn.v_proj,0.02730,0.01000,0.136
16,self_attn.q_proj,0.21341,0.01000,0.137
16,self_attn.o_proj,0.00164,0.01000,0.142
16,mlp.up_proj,0.36086,0.01000,0.146
16,mlp.gate_proj,0.47303,0.01000,0.153
16,mlp.down_proj,0.01073,0.01000,0.753
17,self_attn.k_proj,0.02010,0.01000,0.144
17,self_attn.v_proj,0.02237,0.01000,0.138
17,self_attn.q_proj,0.13404,0.01000,0.138
17,self_attn.o_proj,0.00350,0.01000,0.138
17,mlp.up_proj,0.42924,0.01000,0.140
17,mlp.gate_proj,0.66441,0.01000,0.142
17,mlp.down_proj,0.00841,0.01000,0.736
18,self_attn.k_proj,0.02365,0.01000,0.142
18,self_attn.v_proj,0.01843,0.01000,0.133
18,self_attn.q_proj,0.11482,0.01000,0.138
18,self_attn.o_proj,0.00114,0.01000,0.136
18,mlp.up_proj,0.41826,0.01000,0.142
18,mlp.gate_proj,0.52589,0.01000,0.144
18,mlp.down_proj,0.00837,0.01000,0.747
19,self_attn.k_proj,0.01885,0.01000,0.144
19,self_attn.v_proj,0.01739,0.01000,0.145
19,self_attn.q_proj,0.11650,0.01000,0.139
19,self_attn.o_proj,0.00337,0.01000,0.143
19,mlp.up_proj,0.61293,0.01000,0.146
19,mlp.gate_proj,0.73786,0.01000,0.147
19,mlp.down_proj,0.01766,0.01000,0.743
20,self_attn.k_proj,0.02636,0.01000,0.137
20,self_attn.v_proj,0.05430,0.01000,0.137
20,self_attn.q_proj,0.16743,0.01000,0.145
20,self_attn.o_proj,0.01072,0.01000,0.140
20,mlp.up_proj,0.71327,0.01000,0.136
20,mlp.gate_proj,0.78980,0.01000,0.147
20,mlp.down_proj,0.02940,0.01000,0.730
21,self_attn.k_proj,0.03263,0.01000,0.137
21,self_attn.v_proj,0.10656,0.01000,0.136
21,self_attn.q_proj,0.22666,0.01000,0.139
21,self_attn.o_proj,0.02572,0.01000,0.138
21,mlp.up_proj,1.93951,0.01000,0.140
21,mlp.gate_proj,4.54990,0.01000,0.143
21,mlp.down_proj,17.82846,0.01000,0.752
22,self_attn.k_proj,0.02069,0.01000,0.143
22,self_attn.v_proj,0.06015,0.01000,0.136
22,self_attn.q_proj,0.15573,0.01000,0.143
22,self_attn.o_proj,0.01185,0.01000,0.138
22,mlp.up_proj,0.59293,0.01000,0.144
22,mlp.gate_proj,0.57112,0.01000,0.149
22,mlp.down_proj,0.05477,0.01000,0.768
23,self_attn.k_proj,0.02633,0.01000,0.137
23,self_attn.v_proj,0.05556,0.01000,0.141
23,self_attn.q_proj,0.19868,0.01000,0.136
23,self_attn.o_proj,0.03036,0.01000,0.139
23,mlp.up_proj,0.86650,0.01000,0.141
23,mlp.gate_proj,0.89305,0.01000,0.142
23,mlp.down_proj,0.48127,0.01000,0.736