sometimesanotion
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -38,6 +38,30 @@ KytheraMix-7B is crafted using semi-automated merges YAML templates. Like AgoraM
|
|
38 |
The following YAML configuration was used to produce this model:
|
39 |
|
40 |
```yaml
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
name: agoramix-7b-reason-della # This contributes the knowledge and reasoning pool, later to be merged
|
42 |
merge_method: della # with the dominant instruction-following model
|
43 |
base_model: Qwen/Qwen2.5-7B
|
|
|
38 |
The following YAML configuration was used to produce this model:
|
39 |
|
40 |
```yaml
|
41 |
+
name: agoramix-7b-if-della # This contributes insruction following
|
42 |
+
merge_method: della
|
43 |
+
base_model: Qwen/Qwen2.5-7B
|
44 |
+
tokenizer_source: base
|
45 |
+
parameters:
|
46 |
+
int8_mask: false
|
47 |
+
normalize: true
|
48 |
+
rescale: false
|
49 |
+
density: 0.30
|
50 |
+
weight: 0.50
|
51 |
+
epsilon: 0.09
|
52 |
+
lambda: 0.95
|
53 |
+
models:
|
54 |
+
- model: newsbang/Homer-v0.5-Qwen2.5-7B # Exceptional instruction following, coding, math
|
55 |
+
parameters:
|
56 |
+
density: 0.80
|
57 |
+
weight: 1.00
|
58 |
+
- model: sethuiyer/Qwen2.5-7B-Anvita # Good instruction following, combined with exceptional recall and reasoning
|
59 |
+
parameters:
|
60 |
+
density: 0.30
|
61 |
+
weight: [ 0.00, 0.40, 0.40, 0.40, 0.40, 0.40, 0.40, 0.40, 0.40, 0.40, 0.40, 0.40, 0.30, 0.30 ]
|
62 |
+
dtype: bfloat16
|
63 |
+
out_dtype: bfloat16
|
64 |
+
---
|
65 |
name: agoramix-7b-reason-della # This contributes the knowledge and reasoning pool, later to be merged
|
66 |
merge_method: della # with the dominant instruction-following model
|
67 |
base_model: Qwen/Qwen2.5-7B
|