models: | |
- model: deepseek-ai/DeepSeek-R1-Distill-Llama-70B | |
parameters: | |
select_topk: | |
- filter: self_attn | |
value: 0.2 | |
- filter: "q_proj|k_proj|v_proj" | |
value: 0.2 | |
- filter: "up_proj|down_proj" | |
value: 0.2 | |
- filter: mlp | |
value: 0.1 | |
- value: 0.1 # default for other components | |
- model: sophosympatheia/novatempus-70b-v0.1 | |
parameters: | |
select_topk: | |
- filter: self_attn | |
value: 0.1 | |
- filter: "q_proj|k_proj|v_proj" | |
value: 0.1 | |
- filter: "up_proj|down_proj" | |
value: 0.1 | |
- filter: mlp | |
value: 0.2 | |
- value: 0.1 # default for other components | |
merge_method: sce | |
base_model: meta-llama/Llama-3.3-70B-Instruct | |
dtype: bfloat16 | |
tokenizer: | |
source: deepseek-ai/DeepSeek-R1-Distill-Llama-70B | |