slices: | |
- sources: | |
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v | |
layer_range: [0, 8] | |
- model: bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V | |
layer_range: [0, 8] | |
parameters: | |
t: | |
- value: 0.72 | |
- sources: | |
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v | |
layer_range: [8, 16] | |
- model: bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V | |
layer_range: [8, 16] | |
parameters: | |
t: | |
- value: [0.75, 0.85, 0.75] | |
- sources: | |
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v | |
layer_range: [16, 24] | |
- model: bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V | |
layer_range: [16, 24] | |
parameters: | |
t: | |
- value: [0.85, 1.0, 0.85] | |
- filter: feed_forward | |
value: [0.9, 1.0, 1.1] | |
- sources: | |
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v | |
layer_range: [24, 32] | |
- model: bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V | |
layer_range: [24, 32] | |
parameters: | |
t: | |
- value: [0.95, 1.0, 0.95] | |
- sources: | |
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v | |
layer_range: [32, 40] | |
- model: bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V | |
layer_range: [32, 40] | |
parameters: | |
t: | |
- value: 1.0 | |
- filter: self_attn | |
value: [0.92, 1.0, 1.08] | |
merge_method: slerp | |
base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v | |
regularization: | |
- method: weight_clipping | |
clip_range: [-0.04, 0.04] | |
postprocessing: | |
- operation: gaussian_smoothing | |
sigma: 0.9 | |
- operation: normalize | |
- operation: quantize | |
target_dtype: int8 | |
dtype: bfloat16 | |