merge_method: della_linear base_model: migtissera/Tess-3-Llama-3.1-70B models: - model: bosonai/Higgs-Llama-3-70B parameters: weight: - filter: q_proj value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] - filter: k_proj value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] - filter: v_proj value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] - filter: o_proj value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] - filter: input_layernorm value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] - filter: up_proj value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] - filter: gate_proj value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] - filter: down_proj value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] - filter: post_attention_layernorm value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0] - value: 0 density: 0.5 epsilon: 0.1 lambda: 1.0 - model: migtissera/Tess-3-Llama-3.1-70B parameters: weight: 1.0 density: - filter: q_proj value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] - filter: k_proj value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] - filter: v_proj value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] - filter: o_proj value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] - filter: input_layernorm value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] - filter: up_proj value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] - filter: gate_proj value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] - filter: down_proj value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] - filter: post_attention_layernorm value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1] - value: 0.5 epsilon: - filter: q_proj value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] - filter: k_proj value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] - filter: v_proj value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] - filter: o_proj value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] - filter: input_layernorm value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] - filter: up_proj value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] - filter: gate_proj value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] - filter: down_proj value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] - filter: post_attention_layernorm value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0] - value: 0.1 lambda: 1.0 dtype: bfloat16 out_dtype: bfloat16 parameters: int8_mask: true normalize: true rescale: true filter_wise: false chat_template: auto tokenizer: source: union