|
merge_method: task_arithmetic |
|
base_model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1 |
|
models: |
|
- model: tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1 |
|
parameters: |
|
weight: 1.0 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-swallow |
|
--- |
|
merge_method: task_arithmetic |
|
base_model: meta-llama/Llama-3.1-70B |
|
models: |
|
- model: tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1 |
|
parameters: |
|
weight: 1.0 |
|
- model: meta-llama/Llama-3.1-70B-Instruct |
|
parameters: |
|
weight: 1.0 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-llamaswallow |
|
--- |
|
merge_method: slerp |
|
base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-llamaswallow |
|
models: |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-llamaswallow |
|
- model: cyberagent/Llama-3.1-70B-Japanese-Instruct-2407 |
|
parameters: |
|
t: 0.5 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-cyberswallow-preset |
|
--- |
|
merge_method: task_arithmetic |
|
base_model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1 |
|
models: |
|
- model: meta-llama/Llama-3.1-70B-Instruct |
|
parameters: |
|
weight: -1.0 |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-cyberswallow-preset |
|
parameters: |
|
weight: 1.0 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-cyberswallow |
|
--- |
|
merge_method: slerp |
|
base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-llamaswallow |
|
models: |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-llamaswallow |
|
- model: AXCXEPT/Llama-3.1-70B-EZO-1.1-it |
|
parameters: |
|
t: 0.5 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-ezoswallow-preset |
|
--- |
|
merge_method: task_arithmetic |
|
base_model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1 |
|
models: |
|
- model: meta-llama/Llama-3.1-70B-Instruct |
|
parameters: |
|
weight: -1.0 |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-ezoswallow-preset |
|
parameters: |
|
weight: 1.0 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-ezoswallow |
|
--- |
|
merge_method: model_stock |
|
base_model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1 |
|
models: |
|
- model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1 |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-swallow |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-cyberswallow |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-ezoswallow |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base |
|
--- |
|
merge_method: task_arithmetic |
|
base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base |
|
models: |
|
- model: meta-llama/Llama-3.1-70B-Instruct |
|
parameters: |
|
weight: -1.0 |
|
- model: HPAI-BSC/Llama3.1-Aloe-Beta-70B |
|
parameters: |
|
weight: 1.0 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-aloe |
|
--- |
|
merge_method: task_arithmetic |
|
base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base |
|
models: |
|
- model: meta-llama/Llama-3.1-70B-Instruct |
|
parameters: |
|
weight: -1.0 |
|
- model: marcelbinz/Llama-3.1-Centaur-70B |
|
parameters: |
|
weight: 1.0 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-centaur |
|
--- |
|
merge_method: task_arithmetic |
|
base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base |
|
models: |
|
- model: meta-llama/Llama-3.1-70B-Instruct |
|
parameters: |
|
weight: -1.0 |
|
- model: hitachi-nlp/Llama-3.1-70B-FLDx2 |
|
parameters: |
|
weight: 1.0 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-fldx2 |
|
--- |
|
merge_method: task_arithmetic |
|
base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base |
|
models: |
|
- model: meta-llama/Llama-3.1-70B-Instruct |
|
parameters: |
|
weight: -1.0 |
|
- model: Skywork/Skywork-Critic-Llama-3.1-70B |
|
parameters: |
|
weight: 1.0 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-skywork |
|
--- |
|
merge_method: task_arithmetic |
|
base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base |
|
models: |
|
- model: meta-llama/Llama-3.1-70B-Instruct |
|
parameters: |
|
weight: -1.0 |
|
- model: DISLab/SummLlama3.1-70B |
|
parameters: |
|
weight: 1.0 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-summ |
|
--- |
|
merge_method: model_stock |
|
base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base |
|
models: |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-aloe |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-centaur |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-fldx2 |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-summ |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-skywork |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-flavor |
|
--- |
|
merge_method: slerp |
|
base_model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1 |
|
models: |
|
- model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1 |
|
- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-flavor |
|
parameters: |
|
t: 0.5 |
|
dtype: bfloat16 |
|
name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1 |