Llama-3.3-SuperSwallowX-70B-Instruct-v0.1 / mergekit_config.yml

Upload 37 files

1a80318 verified 3 months ago

5 kB

	merge_method: task_arithmetic
	base_model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1
	models:
	- model: tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1
	parameters:
	weight: 1.0
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-swallow
	---
	merge_method: task_arithmetic
	base_model: meta-llama/Llama-3.1-70B
	models:
	- model: tokyotech-llm/Llama-3.1-Swallow-70B-Instruct-v0.1
	parameters:
	weight: 1.0
	- model: meta-llama/Llama-3.1-70B-Instruct
	parameters:
	weight: 1.0
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-llamaswallow
	---
	merge_method: slerp
	base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-llamaswallow
	models:
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-llamaswallow
	- model: cyberagent/Llama-3.1-70B-Japanese-Instruct-2407
	parameters:
	t: 0.5
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-cyberswallow-preset
	---
	merge_method: task_arithmetic
	base_model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1
	models:
	- model: meta-llama/Llama-3.1-70B-Instruct
	parameters:
	weight: -1.0
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-cyberswallow-preset
	parameters:
	weight: 1.0
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-cyberswallow
	---
	merge_method: slerp
	base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-llamaswallow
	models:
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-llamaswallow
	- model: AXCXEPT/Llama-3.1-70B-EZO-1.1-it
	parameters:
	t: 0.5
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-ezoswallow-preset
	---
	merge_method: task_arithmetic
	base_model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1
	models:
	- model: meta-llama/Llama-3.1-70B-Instruct
	parameters:
	weight: -1.0
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-ezoswallow-preset
	parameters:
	weight: 1.0
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-ezoswallow
	---
	merge_method: model_stock
	base_model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1
	models:
	- model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-swallow
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-cyberswallow
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-ezoswallow
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base
	---
	merge_method: task_arithmetic
	base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base
	models:
	- model: meta-llama/Llama-3.1-70B-Instruct
	parameters:
	weight: -1.0
	- model: HPAI-BSC/Llama3.1-Aloe-Beta-70B
	parameters:
	weight: 1.0
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-aloe
	---
	merge_method: task_arithmetic
	base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base
	models:
	- model: meta-llama/Llama-3.1-70B-Instruct
	parameters:
	weight: -1.0
	- model: marcelbinz/Llama-3.1-Centaur-70B
	parameters:
	weight: 1.0
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-centaur
	---
	merge_method: task_arithmetic
	base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base
	models:
	- model: meta-llama/Llama-3.1-70B-Instruct
	parameters:
	weight: -1.0
	- model: hitachi-nlp/Llama-3.1-70B-FLDx2
	parameters:
	weight: 1.0
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-fldx2
	---
	merge_method: task_arithmetic
	base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base
	models:
	- model: meta-llama/Llama-3.1-70B-Instruct
	parameters:
	weight: -1.0
	- model: Skywork/Skywork-Critic-Llama-3.1-70B
	parameters:
	weight: 1.0
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-skywork
	---
	merge_method: task_arithmetic
	base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base
	models:
	- model: meta-llama/Llama-3.1-70B-Instruct
	parameters:
	weight: -1.0
	- model: DISLab/SummLlama3.1-70B
	parameters:
	weight: 1.0
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-summ
	---
	merge_method: model_stock
	base_model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-base
	models:
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-aloe
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-centaur
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-fldx2
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-summ
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-preset-skywork
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-flavor
	---
	merge_method: slerp
	base_model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1
	models:
	- model: nitky/Llama-3.3-SuperSwallow-70B-Instruct-v0.1
	- model: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1-flavor
	parameters:
	t: 0.5
	dtype: bfloat16
	name: Llama-3.3-SuperSwallowX-70B-Instruct-v0.1