Upload folder using huggingface_hub

233451c verified 5 months ago

4.93 kB

	---
	base_model:
	- migtissera/Tess-3-Llama-3.1-70B
	- aaditya/Llama3-OpenBioLLM-70B
	library_name: transformers
	tags:
	- mergekit
	- merge

	---
	# merge

	This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).

	## Merge Details
	### Merge Method

	This model was merged using the [Linear DELLA](https://arxiv.org/abs/2406.11617) merge method using [migtissera/Tess-3-Llama-3.1-70B](https://huggingface.co/migtissera/Tess-3-Llama-3.1-70B) as a base.

	### Models Merged

	The following models were included in the merge:
	* [aaditya/Llama3-OpenBioLLM-70B](https://huggingface.co/aaditya/Llama3-OpenBioLLM-70B)

	### Configuration

	The following YAML configuration was used to produce this model:

	```yaml
	merge_method: della_linear
	base_model: migtissera/Tess-3-Llama-3.1-70B
	models:
	- model: aaditya/Llama3-OpenBioLLM-70B
	parameters:
	weight:
	- filter: q_proj
	value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
	- filter: k_proj
	value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
	- filter: v_proj
	value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
	- filter: o_proj
	value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
	- filter: input_layernorm
	value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
	- filter: up_proj
	value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
	- filter: gate_proj
	value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
	- filter: down_proj
	value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
	- filter: post_attention_layernorm
	value: [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]
	- value: 0
	density: 0.5
	epsilon: 0.1
	lambda: 1.0
	- model: migtissera/Tess-3-Llama-3.1-70B
	parameters:
	weight: 1.0
	density:
	- filter: q_proj
	value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1]
	- filter: k_proj
	value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1]
	- filter: v_proj
	value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1]
	- filter: o_proj
	value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1]
	- filter: input_layernorm
	value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1]
	- filter: up_proj
	value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1]
	- filter: gate_proj
	value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1]
	- filter: down_proj
	value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1]
	- filter: post_attention_layernorm
	value: [1, 1, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1]
	- value: 0.5
	epsilon:
	- filter: q_proj
	value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0]
	- filter: k_proj
	value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0]
	- filter: v_proj
	value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0]
	- filter: o_proj
	value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0]
	- filter: input_layernorm
	value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0]
	- filter: up_proj
	value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0]
	- filter: gate_proj
	value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0]
	- filter: down_proj
	value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0]
	- filter: post_attention_layernorm
	value: [0, 0, 0, 0, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0,09, 0.08, 0,07, 0.06, 0.05, 0.04, 0, 0, 0, 0]
	- value: 0.1
	lambda: 1.0
	dtype: bfloat16
	out_dtype: bfloat16
	parameters:
	int8_mask: true
	normalize: true
	rescale: true
	filter_wise: false
	chat_template: auto
	tokenizer:
	source: union

	```