Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -147,7 +147,7 @@ slices:
|
|
147 |
- model: {models[1]["Model"]}
|
148 |
layer_range: [0, 32]
|
149 |
merge_method: slerp
|
150 |
-
base_model:
|
151 |
parameters:
|
152 |
t:
|
153 |
- filter: self_attn
|
@@ -160,14 +160,18 @@ random_seed: 0
|
|
160 |
"""
|
161 |
dare_config = f"""
|
162 |
models:
|
163 |
-
- model:
|
164 |
# No parameters necessary for base model
|
|
|
|
|
|
|
|
|
165 |
- model: {models[1]["Model"]}
|
166 |
parameters:
|
167 |
density: 0.53
|
168 |
-
weight: 0.
|
169 |
merge_method: dare_ties
|
170 |
-
base_model:
|
171 |
parameters:
|
172 |
int8_mask: true
|
173 |
dtype: bfloat16
|
@@ -175,14 +179,14 @@ random_seed: 0
|
|
175 |
"""
|
176 |
stock_config = f"""
|
177 |
models:
|
178 |
-
- model:
|
179 |
- model: {models[0]["Model"]}
|
180 |
- model: {models[1]["Model"]}
|
181 |
merge_method: model_stock
|
182 |
-
base_model:
|
183 |
dtype: bfloat16
|
184 |
"""
|
185 |
-
yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.
|
186 |
|
187 |
with open('config.yaml', 'w', encoding="utf-8") as f:
|
188 |
f.write(yaml_config)
|
|
|
147 |
- model: {models[1]["Model"]}
|
148 |
layer_range: [0, 32]
|
149 |
merge_method: slerp
|
150 |
+
base_model: mlabonne/Meta-Llama-3-8B
|
151 |
parameters:
|
152 |
t:
|
153 |
- filter: self_attn
|
|
|
160 |
"""
|
161 |
dare_config = f"""
|
162 |
models:
|
163 |
+
- model: mlabonne/Meta-Llama-3-8B
|
164 |
# No parameters necessary for base model
|
165 |
+
- model: {models[0]["Model"]}
|
166 |
+
parameters:
|
167 |
+
density: 0.53
|
168 |
+
weight: 0.5
|
169 |
- model: {models[1]["Model"]}
|
170 |
parameters:
|
171 |
density: 0.53
|
172 |
+
weight: 0.5
|
173 |
merge_method: dare_ties
|
174 |
+
base_model: mlabonne/Meta-Llama-3-8B
|
175 |
parameters:
|
176 |
int8_mask: true
|
177 |
dtype: bfloat16
|
|
|
179 |
"""
|
180 |
stock_config = f"""
|
181 |
models:
|
182 |
+
- model: mlabonne/Meta-Llama-3-8B
|
183 |
- model: {models[0]["Model"]}
|
184 |
- model: {models[1]["Model"]}
|
185 |
merge_method: model_stock
|
186 |
+
base_model: mlabonne/Meta-Llama-3-8B
|
187 |
dtype: bfloat16
|
188 |
"""
|
189 |
+
yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.5, 0.4, 0.1], k=1)[0]
|
190 |
|
191 |
with open('config.yaml', 'w', encoding="utf-8") as f:
|
192 |
f.write(yaml_config)
|