mlabonne commited on
Commit
1725d13
·
verified ·
1 Parent(s): 79883c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -147,7 +147,7 @@ slices:
147
  - model: {models[1]["Model"]}
148
  layer_range: [0, 32]
149
  merge_method: slerp
150
- base_model: {models[0]["Model"]}
151
  parameters:
152
  t:
153
  - filter: self_attn
@@ -160,14 +160,18 @@ random_seed: 0
160
  """
161
  dare_config = f"""
162
  models:
163
- - model: {models[0]["Model"]}
164
  # No parameters necessary for base model
 
 
 
 
165
  - model: {models[1]["Model"]}
166
  parameters:
167
  density: 0.53
168
- weight: 0.6
169
  merge_method: dare_ties
170
- base_model: {models[0]["Model"]}
171
  parameters:
172
  int8_mask: true
173
  dtype: bfloat16
@@ -175,14 +179,14 @@ random_seed: 0
175
  """
176
  stock_config = f"""
177
  models:
178
- - model: mistralai/Mistral-7B-v0.1
179
  - model: {models[0]["Model"]}
180
  - model: {models[1]["Model"]}
181
  merge_method: model_stock
182
- base_model: mistralai/Mistral-7B-v0.1
183
  dtype: bfloat16
184
  """
185
- yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.01, 0.01, 0.98], k=1)[0]
186
 
187
  with open('config.yaml', 'w', encoding="utf-8") as f:
188
  f.write(yaml_config)
 
147
  - model: {models[1]["Model"]}
148
  layer_range: [0, 32]
149
  merge_method: slerp
150
+ base_model: mlabonne/Meta-Llama-3-8B
151
  parameters:
152
  t:
153
  - filter: self_attn
 
160
  """
161
  dare_config = f"""
162
  models:
163
+ - model: mlabonne/Meta-Llama-3-8B
164
  # No parameters necessary for base model
165
+ - model: {models[0]["Model"]}
166
+ parameters:
167
+ density: 0.53
168
+ weight: 0.5
169
  - model: {models[1]["Model"]}
170
  parameters:
171
  density: 0.53
172
+ weight: 0.5
173
  merge_method: dare_ties
174
+ base_model: mlabonne/Meta-Llama-3-8B
175
  parameters:
176
  int8_mask: true
177
  dtype: bfloat16
 
179
  """
180
  stock_config = f"""
181
  models:
182
+ - model: mlabonne/Meta-Llama-3-8B
183
  - model: {models[0]["Model"]}
184
  - model: {models[1]["Model"]}
185
  merge_method: model_stock
186
+ base_model: mlabonne/Meta-Llama-3-8B
187
  dtype: bfloat16
188
  """
189
+ yaml_config = random.choices([slerp_config, dare_config, stock_config], weights=[0.5, 0.4, 0.1], k=1)[0]
190
 
191
  with open('config.yaml', 'w', encoding="utf-8") as f:
192
  f.write(yaml_config)