|
--- |
|
license: apache-2.0 |
|
--- |
|
MOE with Mergekit/Mixtral: |
|
|
|
base_model: mistralai/Mistral-7B-Instruct-v0.2 |
|
gate_mode: hidden # one of "hidden", "cheap_embed", or "random" |
|
dtype: bfloat16 # output dtype (float32, float16, or bfloat16) |
|
experts: |
|
- source_model: SanjiWatsuki/Silicon-Maid-7B |
|
positive_prompts: |
|
- "roleplay" |
|
- "story telling" |
|
- "fantasy" |
|
- "dreaming" |
|
- source_model: teknium/OpenHermes-2.5-Mistral-7B |
|
positive_prompts: |
|
- "chat" |
|
- "flow chart" |
|
- "diagrams" |
|
- "reasoning" |
|
- "explanation" |
|
- source_model: Nondzu/Mistral-7B-Instruct-v0.2-code-ft |
|
positive_prompts: |
|
- "programming" |
|
- "code debugging" |
|
- "data transformation" |
|
- "data structures" |
|
negative_prompt: |
|
- "chat" |
|
- source_model: meta-math/MetaMath-Mistral-7B |
|
positive_prompts: |
|
- "math" |
|
- "arithmetic" |
|
- "algebra" |
|
|
|
chatml prompt |