base_model: meta-llama/Meta-Llama-3-8B | |
gate_mode: random | |
dtype: bfloat16 | |
experts: | |
- source_model: meta-llama/Meta-Llama-3-8B | |
- source_model: meta-llama/Meta-Llama-3-8B | |
- source_model: meta-llama/Meta-Llama-3-8B | |
- source_model: meta-llama/Meta-Llama-3-8B | |
- source_model: meta-llama/Meta-Llama-3-8B | |
- source_model: meta-llama/Meta-Llama-3-8B | |
- source_model: meta-llama/Meta-Llama-3-8B | |
- source_model: meta-llama/Meta-Llama-3-8B | |
- source_model: meta-llama/Meta-Llama-3-8B | |
- source_model: meta-llama/Meta-Llama-3-8B | |
# and then train the sucker! | |