llama3-8x8-no-train / mergekit_moe_config.yml
kloodia's picture
Upload folder using huggingface_hub
6ab70fa verified
raw
history blame contribute delete
563 Bytes
base_model: meta-llama/Meta-Llama-3-8B
gate_mode: random
dtype: bfloat16
experts:
- source_model: meta-llama/Meta-Llama-3-8B
- source_model: meta-llama/Meta-Llama-3-8B
- source_model: meta-llama/Meta-Llama-3-8B
- source_model: meta-llama/Meta-Llama-3-8B
- source_model: meta-llama/Meta-Llama-3-8B
- source_model: meta-llama/Meta-Llama-3-8B
- source_model: meta-llama/Meta-Llama-3-8B
- source_model: meta-llama/Meta-Llama-3-8B
- source_model: meta-llama/Meta-Llama-3-8B
- source_model: meta-llama/Meta-Llama-3-8B
# and then train the sucker!