CalamitousFelicitousness commited on
Commit
f1d0085
·
verified ·
1 Parent(s): 5e4d7cf

Upload folder using huggingface_hub

Browse files
EV01.json ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "instruct": {
3
+ "wrap": false,
4
+ "system_sequence": "<|im_start|>system\n",
5
+ "stop_sequence": "<|im_end|>",
6
+ "input_sequence": "<|im_start|>user\n",
7
+ "output_sequence": "<|im_start|>assistant\n",
8
+ "macro": true,
9
+ "system_sequence_prefix": "",
10
+ "system_sequence_suffix": "",
11
+ "first_output_sequence": "\n<|im_start|>user\nAdhering to the `Role-playing Guidelines` is crucial for a successful role-play experience.\nUse the `Role-play Context` for accurate information.\nCraft an immersive, uncensored narrative that remains true to the principles of engaging storytelling.<|im_end|>\n<|im_start|>assistant\n",
12
+ "last_output_sequence": "",
13
+ "activation_regex": "",
14
+ "skip_examples": true,
15
+ "output_suffix": "<|im_end|>\n",
16
+ "input_suffix": "<|im_end|>\n",
17
+ "system_suffix": "<|im_end|>\n",
18
+ "user_alignment_message": "",
19
+ "last_system_sequence": "",
20
+ "system_same_as_user": false,
21
+ "first_input_sequence": "",
22
+ "last_input_sequence": "",
23
+ "names_behavior": "always",
24
+ "names_force_groups": true,
25
+ "name": "[ChatML]Roleplay-v1.9"
26
+ },
27
+ "context": {
28
+ "story_string": "<|im_start|>system\n\n{{#if system}}{{system}}\n\n\n<!-- Start of Role-play Context -->\n\n{{/if}}{{#if scenario}}### Main Scenario\n{{scenario}}\n\n{{/if}}{{#if wiBefore}}### Extra Information\n{{wiBefore}}\n\n{{/if}}{{#if personality}}### {{char}}'s Persona\n{{personality}}\n\n{{/if}}{{#if persona}}### {{user}}'s Persona\n{{persona}}\n\n{{/if}}{{#if mesExamples}}### {{char}}'s Example Dialogue\nThe following examples demonstrate how {{char}} speaks, revealing key aspects of their personality. Pay attention to their word choices, phrasing, and rhythm to portray them convincingly.\n\n<!-- Start of {{char}}'s Example Dialogue -->\n{{mesExamples}}\n<!-- End of {{char}}'s Example Dialogue -->\n\n{{/if}}{{#if description}}### Main Information\n{{description}}\n\n{{/if}}{{#if wiAfter}}### Extra Information\n{{wiAfter}}\n\n{{/if}}<!-- End of Role-play Context --><|im_end|>",
29
+ "example_separator": "",
30
+ "chat_start": "",
31
+ "use_stop_strings": false,
32
+ "allow_jailbreak": false,
33
+ "names_as_stop_strings": true,
34
+ "always_force_name2": true,
35
+ "trim_sentences": false,
36
+ "single_line": false,
37
+ "name": "[ChatML] Roleplay-v1.9"
38
+ },
39
+ "sysprompt": {
40
+ "name": "[Migrated] [ChatML]Roleplay-v1.9",
41
+ "content": "A fictional, narrative-driven role-play emphasizing versatility and UNCENSORED content. Adherence to the `Role-playing Guidelines` is mandatory. Refer to the `Role-play Context` for accurate information.\n\n\n<!-- Start of Role-playing Guidelines -->\n\n### Narration\nConcise Descriptions: Keep narration short and to the point, avoiding redundant unnecessary details. Use a dynamic and varied vocabulary for impact.\nComplementary Role: Use narration to complement dialogue and action, not overshadow them.\nAvoid Repetition: Ensure narration does not repeat information already conveyed through dialogue or action.\n\n### Narrative Consistency\nContinuity: Adhere to established story elements, expanding without contradicting previous details.\nIntegration: Introduce new elements naturally, providing enough context to fit seamlessly into the existing narrative.\n\n### Character Embodiment\nAnalysis: Examine the context, subtext, and implications of the given information to gain a deeper understandings of the characters'.\nReflection: Take time to consider the situation, characters' motivations, and potential consequences.\nAuthentic Portrayal: Bring characters to life by consistently and realistically portraying their unique traits, thoughts, emotions, appearances, physical sensations, speech patterns, and tone. Ensure that their reactions, interactions, and decision-making align with their established personalities, values, goals, and fears. Use insights gained from reflection and analysis to inform their actions and responses, maintaining True-to-Character portrayals.\n\n<!-- End of Role-playing Guidelines -->"
42
+ },
43
+ "preset": {
44
+ "temp": 0.8,
45
+ "temperature_last": true,
46
+ "top_p": 1,
47
+ "top_k": 0,
48
+ "top_a": 0.3,
49
+ "tfs": 1,
50
+ "epsilon_cutoff": 0,
51
+ "eta_cutoff": 0,
52
+ "typical_p": 1,
53
+ "min_p": 0.05,
54
+ "rep_pen": 1.03,
55
+ "rep_pen_range": 0,
56
+ "rep_pen_decay": 0,
57
+ "rep_pen_slope": 1,
58
+ "no_repeat_ngram_size": 0,
59
+ "penalty_alpha": 0,
60
+ "num_beams": 1,
61
+ "length_penalty": 1,
62
+ "min_length": 0,
63
+ "encoder_rep_pen": 1,
64
+ "freq_pen": 0,
65
+ "presence_pen": 0,
66
+ "skew": 0,
67
+ "do_sample": true,
68
+ "early_stopping": false,
69
+ "dynatemp": false,
70
+ "min_temp": 0,
71
+ "max_temp": 2,
72
+ "dynatemp_exponent": 1,
73
+ "smoothing_factor": 0,
74
+ "smoothing_curve": 1,
75
+ "dry_allowed_length": 2,
76
+ "dry_multiplier": 0,
77
+ "dry_base": 1.75,
78
+ "dry_sequence_breakers": "[\"\\n\", \":\", \"\\\"\", \"*\"]",
79
+ "dry_penalty_last_n": 0,
80
+ "add_bos_token": true,
81
+ "ban_eos_token": false,
82
+ "skip_special_tokens": false,
83
+ "mirostat_mode": 0,
84
+ "mirostat_tau": 5,
85
+ "mirostat_eta": 0.1,
86
+ "guidance_scale": 1,
87
+ "negative_prompt": "",
88
+ "grammar_string": "",
89
+ "json_schema": {},
90
+ "banned_tokens": "",
91
+ "sampler_priority": [
92
+ "repetition_penalty",
93
+ "presence_penalty",
94
+ "frequency_penalty",
95
+ "dry",
96
+ "temperature",
97
+ "dynamic_temperature",
98
+ "quadratic_sampling",
99
+ "top_k",
100
+ "top_p",
101
+ "typical_p",
102
+ "epsilon_cutoff",
103
+ "eta_cutoff",
104
+ "tfs",
105
+ "top_a",
106
+ "min_p",
107
+ "mirostat",
108
+ "xtc",
109
+ "encoder_repetition_penalty",
110
+ "no_repeat_ngram"
111
+ ],
112
+ "samplers": [
113
+ "top_k",
114
+ "tfs_z",
115
+ "typical_p",
116
+ "top_p",
117
+ "min_p",
118
+ "xtc",
119
+ "temperature"
120
+ ],
121
+ "ignore_eos_token": false,
122
+ "spaces_between_special_tokens": true,
123
+ "speculative_ngram": false,
124
+ "sampler_order": [
125
+ 6,
126
+ 0,
127
+ 1,
128
+ 3,
129
+ 4,
130
+ 2,
131
+ 5
132
+ ],
133
+ "logit_bias": [],
134
+ "xtc_threshold": 0.1,
135
+ "xtc_probability": 0,
136
+ "ignore_eos_token_aphrodite": false,
137
+ "spaces_between_special_tokens_aphrodite": true,
138
+ "rep_pen_size": 0,
139
+ "genamt": 1024,
140
+ "max_length": 16384,
141
+ "name": "Eva Stable"
142
+ }
143
+ }
LICENSE ADDED
File without changes
README.md ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: other
4
+ license_name: qwen
5
+ license_link: https://huggingface.co/Qwen/Qwen2.5-72B-Instruct/blob/main/LICENSE
6
+ base_model: Qwen/Qwen2.5-72B
7
+ datasets:
8
+ - anthracite-org/kalo-opus-instruct-22k-no-refusal
9
+ - Nopm/Opus_WritingStruct
10
+ - Gryphe/Sonnet3.5-SlimOrcaDedupCleaned
11
+ - Gryphe/Sonnet3.5-Charcard-Roleplay
12
+ - Gryphe/ChatGPT-4o-Writing-Prompts
13
+ - Epiculous/Synthstruct-Gens-v1.1-Filtered-n-Cleaned
14
+ - Epiculous/SynthRP-Gens-v1.1-Filtered-n-Cleaned
15
+ - nothingiisreal/Reddit-Dirty-And-WritingPrompts
16
+ - allura-org/Celeste-1.x-data-mixture
17
+ - cognitivecomputations/dolphin-2.9.3
18
+ tags:
19
+ - generated_from_trainer
20
+ model-index:
21
+ - name: EVA-Qwen2.5-72B-SFFT-v0.2
22
+ results: []
23
+ ---
24
+
25
+
26
+
27
+ # EVA Qwen2.5-72B v0.2
28
+
29
+ <p>
30
+ A RP/storywriting specialist model, full-parameter finetune of Qwen2.5-72B on mixture of synthetic and natural data.<br>
31
+ It uses Celeste 70B 0.1 data mixture, greatly expanding it to improve versatility, creativity and "flavor" of the resulting model.<br>
32
+ </p>
33
+
34
+ <p>Dedicated to Nev.</p>
35
+
36
+ <p><b>NOTE: LLM-Compressor quants don't seem to work correctly, quality seems to be much worse than normal. It wasn't the case with previous versions. GGUF and GPTQ seem to be unaffected.</b></p>
37
+ </br>
38
+ <p><b>Version notes for 0.2</b>: Optimized training hyperparameters and increased sequence length. Better instruction following deeper into context and less repetition.</p>
39
+
40
+ <p>
41
+ <p>Prompt format is ChatML.</p><br>
42
+ <h3>Recommended sampler values:</h3>
43
+ <ul>
44
+ <li>Temperature: 0.8</li>
45
+ <li>Min-P: 0.05</li>
46
+ <li>Top-A: 0.3</li>
47
+ <li>Repetition Penalty: 1.03</li>
48
+ </ul>
49
+
50
+ <h3>Recommended SillyTavern preset (via CalamitousFelicitousness):</h3>
51
+ <ul><li><a href="EV01.json">Master import</a></li></ul>
52
+
53
+ </p>
54
+
55
+ <p>
56
+ <br>
57
+ <h3>
58
+ Training data:
59
+ </h3>
60
+ <ul>
61
+ <li>Celeste 70B 0.1 data mixture minus Opus Instruct subset. See that model's <a href=https://huggingface.co/nothingiisreal/L3.1-70B-Celeste-V0.1-BF16>card</a> for details.</li>
62
+ <li>Kalomaze's Opus_Instruct_25k dataset, filtered for refusals.</li>
63
+ <li>A subset (1k rows) of ChatGPT-4o-WritingPrompts by Gryphe</li>
64
+ <li>A subset (2k rows) of Sonnet3.5-Charcards-Roleplay by Gryphe</li>
65
+ <li>Synthstruct and SynthRP datasets by Epiculous</li>
66
+ <li>A subset from Dolphin-2.9.3, including filtered version of not_samantha and a small subset of systemchat.</li>
67
+ </ul>
68
+ <h3>
69
+ Training time and hardware:
70
+ </h3>
71
+ <ul><li>17 hours on 8xH100 SXM</a></li></ul><br>
72
+ </p>
73
+ <p>Model was created by Kearm, Auri and Cahvay.</p>
74
+ <h4>Special thanks:</h4><ul>
75
+ <li>to Featherless for sponsoring this run</li>
76
+ <li>to Cahvay for his work on investigating and reprocessing the corrupted dataset, removing the single biggest source of data poisoning.</li>
77
+ <li>to Gryphe, Lemmy, Kalomaze, Nopm, Epiculous and CognitiveComputations for the data</li>
78
+ <li>and to Allura-org for support, feedback, beta-testing and doing quality control of EVA models.</li></ul>
79
+
80
+
81
+
82
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
83
+ <details><summary>See axolotl config</summary>
84
+
85
+ axolotl version: `0.4.1`
86
+ ```yaml
87
+ base_model: Qwen/Qwen2.5-72B
88
+
89
+ load_in_8bit: false
90
+ load_in_4bit: false
91
+ strict: false
92
+
93
+ plugins:
94
+ - axolotl.integrations.liger.LigerPlugin
95
+ liger_rope: true
96
+ liger_rms_norm: true
97
+ liger_swiglu: true
98
+ liger_fused_linear_cross_entropy: true
99
+
100
+ # plugins:
101
+ # - axolotl.integrations.spectrum.SpectrumPlugin
102
+
103
+ # spectrum_top_fraction: 0.5
104
+ # # Optional if using a pre-scanned model as your base_model. Useful if using a model mirror
105
+ # spectrum_model_name: Qwen/Qwen2.5-32B
106
+
107
+ datasets:
108
+ - path: datasets/Celeste_Filtered_utf8fix.jsonl
109
+ type: sharegpt
110
+ - path: datasets/deduped_not_samantha_norefusals.jsonl
111
+ type: sharegpt
112
+ - path: datasets/deduped_SynthRP-Gens_processed_ShareGPT_converted_cleaned.jsonl
113
+ type: sharegpt
114
+ - path: datasets/deduped_Synthstruct-Gens_processed_sharegpt_converted_cleaned.jsonl
115
+ type: sharegpt
116
+ - path: datasets/Gryphe-4o-WP-filtered-sharegpt_utf8fix.jsonl
117
+ type: sharegpt
118
+ - path: datasets/opus-instruct-22k-no_refusals-filtered_utf8fix.jsonl
119
+ type: sharegpt
120
+ - path: datasets/Sonnet3-5-charcard-names-filtered-sharegpt_utf8fix.jsonl
121
+ type: sharegpt
122
+ - path: datasets/SystemChat_subset_filtered_sharegpt_utf8fix.jsonl
123
+ type: sharegpt
124
+
125
+ chat_template: chatml
126
+ shuffle_merged_datasets: true
127
+ val_set_size: 0.001
128
+ output_dir: EVA-Qwen2.5-72B-SFFT-v0.2
129
+
130
+ sequence_len: 10240
131
+ sample_packing: true
132
+ eval_sample_packing: false
133
+ pad_to_sequence_len: false
134
+
135
+ # adapter: qlora
136
+ # lora_model_dir:
137
+ # lora_r: 64
138
+ # lora_alpha: 128
139
+ # lora_dropout: 0.05
140
+ # lora_target_linear: true
141
+ # peft_use_dora: true
142
+
143
+ unfrozen_parameters:
144
+ - ^lm_head.weight$
145
+ - ^model.embed_tokens.weight$
146
+ # mlp.down_proj layers
147
+ - model.layers.62.mlp.down_proj
148
+ - model.layers.64.mlp.down_proj
149
+ - model.layers.63.mlp.down_proj
150
+ - model.layers.66.mlp.down_proj
151
+ - model.layers.65.mlp.down_proj
152
+ - model.layers.67.mlp.down_proj
153
+ - model.layers.68.mlp.down_proj
154
+ - model.layers.31.mlp.down_proj
155
+ - model.layers.60.mlp.down_proj
156
+ - model.layers.69.mlp.down_proj
157
+ - model.layers.61.mlp.down_proj
158
+ - model.layers.59.mlp.down_proj
159
+ - model.layers.30.mlp.down_proj
160
+ - model.layers.70.mlp.down_proj
161
+ - model.layers.32.mlp.down_proj
162
+ - model.layers.34.mlp.down_proj
163
+ - model.layers.33.mlp.down_proj
164
+ - model.layers.76.mlp.down_proj
165
+ - model.layers.72.mlp.down_proj
166
+ - model.layers.71.mlp.down_proj
167
+ - model.layers.58.mlp.down_proj
168
+ - model.layers.75.mlp.down_proj
169
+ - model.layers.29.mlp.down_proj
170
+ - model.layers.56.mlp.down_proj
171
+ - model.layers.26.mlp.down_proj
172
+ - model.layers.35.mlp.down_proj
173
+ - model.layers.28.mlp.down_proj
174
+ - model.layers.57.mlp.down_proj
175
+ - model.layers.77.mlp.down_proj
176
+ - model.layers.36.mlp.down_proj
177
+ - model.layers.27.mlp.down_proj
178
+ - model.layers.25.mlp.down_proj
179
+ - model.layers.78.mlp.down_proj
180
+ - model.layers.37.mlp.down_proj
181
+ - model.layers.73.mlp.down_proj
182
+ - model.layers.55.mlp.down_proj
183
+ - model.layers.54.mlp.down_proj
184
+ - model.layers.74.mlp.down_proj
185
+ - model.layers.24.mlp.down_proj
186
+ - model.layers.53.mlp.down_proj
187
+ # mlp.gate_proj layers
188
+ - model.layers.78.mlp.gate_proj
189
+ - model.layers.77.mlp.gate_proj
190
+ - model.layers.76.mlp.gate_proj
191
+ - model.layers.79.mlp.gate_proj
192
+ - model.layers.75.mlp.gate_proj
193
+ - model.layers.74.mlp.gate_proj
194
+ - model.layers.73.mlp.gate_proj
195
+ - model.layers.72.mlp.gate_proj
196
+ - model.layers.71.mlp.gate_proj
197
+ - model.layers.70.mlp.gate_proj
198
+ - model.layers.69.mlp.gate_proj
199
+ - model.layers.57.mlp.gate_proj
200
+ - model.layers.54.mlp.gate_proj
201
+ - model.layers.55.mlp.gate_proj
202
+ - model.layers.68.mlp.gate_proj
203
+ - model.layers.63.mlp.gate_proj
204
+ - model.layers.53.mlp.gate_proj
205
+ - model.layers.44.mlp.gate_proj
206
+ - model.layers.45.mlp.gate_proj
207
+ - model.layers.49.mlp.gate_proj
208
+ - model.layers.58.mlp.gate_proj
209
+ - model.layers.46.mlp.gate_proj
210
+ - model.layers.56.mlp.gate_proj
211
+ - model.layers.67.mlp.gate_proj
212
+ - model.layers.62.mlp.gate_proj
213
+ - model.layers.50.mlp.gate_proj
214
+ - model.layers.64.mlp.gate_proj
215
+ - model.layers.52.mlp.gate_proj
216
+ - model.layers.40.mlp.gate_proj
217
+ - model.layers.43.mlp.gate_proj
218
+ - model.layers.48.mlp.gate_proj
219
+ - model.layers.66.mlp.gate_proj
220
+ - model.layers.47.mlp.gate_proj
221
+ - model.layers.59.mlp.gate_proj
222
+ - model.layers.65.mlp.gate_proj
223
+ - model.layers.61.mlp.gate_proj
224
+ - model.layers.60.mlp.gate_proj
225
+ - model.layers.42.mlp.gate_proj
226
+ - model.layers.51.mlp.gate_proj
227
+ - model.layers.41.mlp.gate_proj
228
+ # mlp.up_proj layers
229
+ - model.layers.70.mlp.up_proj
230
+ - model.layers.69.mlp.up_proj
231
+ - model.layers.71.mlp.up_proj
232
+ - model.layers.68.mlp.up_proj
233
+ - model.layers.72.mlp.up_proj
234
+ - model.layers.67.mlp.up_proj
235
+ - model.layers.66.mlp.up_proj
236
+ - model.layers.73.mlp.up_proj
237
+ - model.layers.46.mlp.up_proj
238
+ - model.layers.63.mlp.up_proj
239
+ - model.layers.75.mlp.up_proj
240
+ - model.layers.76.mlp.up_proj
241
+ - model.layers.74.mlp.up_proj
242
+ - model.layers.45.mlp.up_proj
243
+ - model.layers.62.mlp.up_proj
244
+ - model.layers.64.mlp.up_proj
245
+ - model.layers.65.mlp.up_proj
246
+ - model.layers.44.mlp.up_proj
247
+ - model.layers.53.mlp.up_proj
248
+ - model.layers.47.mlp.up_proj
249
+ - model.layers.49.mlp.up_proj
250
+ - model.layers.48.mlp.up_proj
251
+ - model.layers.57.mlp.up_proj
252
+ - model.layers.43.mlp.up_proj
253
+ - model.layers.42.mlp.up_proj
254
+ - model.layers.56.mlp.up_proj
255
+ - model.layers.61.mlp.up_proj
256
+ - model.layers.54.mlp.up_proj
257
+ - model.layers.40.mlp.up_proj
258
+ - model.layers.55.mlp.up_proj
259
+ - model.layers.77.mlp.up_proj
260
+ - model.layers.60.mlp.up_proj
261
+ - model.layers.41.mlp.up_proj
262
+ - model.layers.35.mlp.up_proj
263
+ - model.layers.37.mlp.up_proj
264
+ - model.layers.58.mlp.up_proj
265
+ - model.layers.34.mlp.up_proj
266
+ - model.layers.38.mlp.up_proj
267
+ - model.layers.33.mlp.up_proj
268
+ - model.layers.39.mlp.up_proj
269
+ # self_attn.k_proj layers
270
+ - model.layers.36.self_attn.k_proj
271
+ - model.layers.79.self_attn.k_proj
272
+ - model.layers.35.self_attn.k_proj
273
+ - model.layers.34.self_attn.k_proj
274
+ - model.layers.37.self_attn.k_proj
275
+ - model.layers.33.self_attn.k_proj
276
+ - model.layers.38.self_attn.k_proj
277
+ - model.layers.39.self_attn.k_proj
278
+ - model.layers.74.self_attn.k_proj
279
+ - model.layers.77.self_attn.k_proj
280
+ - model.layers.41.self_attn.k_proj
281
+ - model.layers.69.self_attn.k_proj
282
+ - model.layers.32.self_attn.k_proj
283
+ - model.layers.78.self_attn.k_proj
284
+ - model.layers.30.self_attn.k_proj
285
+ - model.layers.70.self_attn.k_proj
286
+ - model.layers.25.self_attn.k_proj
287
+ - model.layers.42.self_attn.k_proj
288
+ - model.layers.29.self_attn.k_proj
289
+ - model.layers.31.self_attn.k_proj
290
+ - model.layers.68.self_attn.k_proj
291
+ - model.layers.66.self_attn.k_proj
292
+ - model.layers.22.self_attn.k_proj
293
+ - model.layers.65.self_attn.k_proj
294
+ - model.layers.44.self_attn.k_proj
295
+ - model.layers.40.self_attn.k_proj
296
+ - model.layers.63.self_attn.k_proj
297
+ - model.layers.23.self_attn.k_proj
298
+ - model.layers.28.self_attn.k_proj
299
+ - model.layers.24.self_attn.k_proj
300
+ - model.layers.26.self_attn.k_proj
301
+ - model.layers.67.self_attn.k_proj
302
+ - model.layers.75.self_attn.k_proj
303
+ - model.layers.27.self_attn.k_proj
304
+ - model.layers.57.self_attn.k_proj
305
+ - model.layers.64.self_attn.k_proj
306
+ - model.layers.71.self_attn.k_proj
307
+ - model.layers.61.self_attn.k_proj
308
+ - model.layers.72.self_attn.k_proj
309
+ - model.layers.73.self_attn.k_proj
310
+ # self_attn.o_proj layers
311
+ - model.layers.69.self_attn.o_proj
312
+ - model.layers.39.self_attn.o_proj
313
+ - model.layers.16.self_attn.o_proj
314
+ - model.layers.14.self_attn.o_proj
315
+ - model.layers.19.self_attn.o_proj
316
+ - model.layers.42.self_attn.o_proj
317
+ - model.layers.12.self_attn.o_proj
318
+ - model.layers.15.self_attn.o_proj
319
+ - model.layers.17.self_attn.o_proj
320
+ - model.layers.38.self_attn.o_proj
321
+ - model.layers.23.self_attn.o_proj
322
+ - model.layers.22.self_attn.o_proj
323
+ - model.layers.13.self_attn.o_proj
324
+ - model.layers.29.self_attn.o_proj
325
+ - model.layers.41.self_attn.o_proj
326
+ - model.layers.44.self_attn.o_proj
327
+ - model.layers.46.self_attn.o_proj
328
+ - model.layers.45.self_attn.o_proj
329
+ - model.layers.43.self_attn.o_proj
330
+ - model.layers.49.self_attn.o_proj
331
+ - model.layers.30.self_attn.o_proj
332
+ - model.layers.26.self_attn.o_proj
333
+ - model.layers.25.self_attn.o_proj
334
+ - model.layers.37.self_attn.o_proj
335
+ - model.layers.47.self_attn.o_proj
336
+ - model.layers.11.self_attn.o_proj
337
+ - model.layers.18.self_attn.o_proj
338
+ - model.layers.28.self_attn.o_proj
339
+ - model.layers.20.self_attn.o_proj
340
+ - model.layers.27.self_attn.o_proj
341
+ - model.layers.53.self_attn.o_proj
342
+ - model.layers.52.self_attn.o_proj
343
+ - model.layers.35.self_attn.o_proj
344
+ - model.layers.71.self_attn.o_proj
345
+ - model.layers.10.self_attn.o_proj
346
+ - model.layers.3.self_attn.o_proj
347
+ - model.layers.21.self_attn.o_proj
348
+ - model.layers.24.self_attn.o_proj
349
+ - model.layers.68.self_attn.o_proj
350
+ - model.layers.48.self_attn.o_proj
351
+ # self_attn.q_proj layers
352
+ - model.layers.1.self_attn.q_proj
353
+ - model.layers.2.self_attn.q_proj
354
+ - model.layers.3.self_attn.q_proj
355
+ - model.layers.0.self_attn.q_proj
356
+ - model.layers.5.self_attn.q_proj
357
+ - model.layers.4.self_attn.q_proj
358
+ - model.layers.6.self_attn.q_proj
359
+ - model.layers.8.self_attn.q_proj
360
+ - model.layers.7.self_attn.q_proj
361
+ - model.layers.9.self_attn.q_proj
362
+ - model.layers.10.self_attn.q_proj
363
+ - model.layers.68.self_attn.q_proj
364
+ - model.layers.25.self_attn.q_proj
365
+ - model.layers.12.self_attn.q_proj
366
+ - model.layers.54.self_attn.q_proj
367
+ - model.layers.55.self_attn.q_proj
368
+ - model.layers.61.self_attn.q_proj
369
+ - model.layers.18.self_attn.q_proj
370
+ - model.layers.49.self_attn.q_proj
371
+ - model.layers.66.self_attn.q_proj
372
+ - model.layers.72.self_attn.q_proj
373
+ - model.layers.11.self_attn.q_proj
374
+ - model.layers.52.self_attn.q_proj
375
+ - model.layers.64.self_attn.q_proj
376
+ - model.layers.15.self_attn.q_proj
377
+ - model.layers.60.self_attn.q_proj
378
+ - model.layers.50.self_attn.q_proj
379
+ - model.layers.59.self_attn.q_proj
380
+ - model.layers.53.self_attn.q_proj
381
+ - model.layers.48.self_attn.q_proj
382
+ - model.layers.57.self_attn.q_proj
383
+ - model.layers.70.self_attn.q_proj
384
+ - model.layers.17.self_attn.q_proj
385
+ - model.layers.67.self_attn.q_proj
386
+ - model.layers.71.self_attn.q_proj
387
+ - model.layers.62.self_attn.q_proj
388
+ - model.layers.51.self_attn.q_proj
389
+ - model.layers.19.self_attn.q_proj
390
+ - model.layers.58.self_attn.q_proj
391
+ - model.layers.13.self_attn.q_proj
392
+ # self_attn.v_proj layers
393
+ - model.layers.23.self_attn.v_proj
394
+ - model.layers.25.self_attn.v_proj
395
+ - model.layers.26.self_attn.v_proj
396
+ - model.layers.27.self_attn.v_proj
397
+ - model.layers.28.self_attn.v_proj
398
+ - model.layers.29.self_attn.v_proj
399
+ - model.layers.30.self_attn.v_proj
400
+ - model.layers.31.self_attn.v_proj
401
+ - model.layers.34.self_attn.v_proj
402
+ - model.layers.35.self_attn.v_proj
403
+ - model.layers.36.self_attn.v_proj
404
+ - model.layers.37.self_attn.v_proj
405
+ - model.layers.38.self_attn.v_proj
406
+ - model.layers.42.self_attn.v_proj
407
+ - model.layers.48.self_attn.v_proj
408
+ - model.layers.57.self_attn.v_proj
409
+ - model.layers.58.self_attn.v_proj
410
+ - model.layers.61.self_attn.v_proj
411
+ - model.layers.63.self_attn.v_proj
412
+ - model.layers.64.self_attn.v_proj
413
+ - model.layers.65.self_attn.v_proj
414
+ - model.layers.66.self_attn.v_proj
415
+ - model.layers.69.self_attn.v_proj
416
+ - model.layers.70.self_attn.v_proj
417
+ - model.layers.74.self_attn.v_proj
418
+ - model.layers.75.self_attn.v_proj
419
+ - model.layers.72.self_attn.v_proj
420
+ - model.layers.39.self_attn.v_proj
421
+ - model.layers.41.self_attn.v_proj
422
+ - model.layers.40.self_attn.v_proj
423
+ - model.layers.33.self_attn.v_proj
424
+ - model.layers.59.self_attn.v_proj
425
+ - model.layers.16.self_attn.v_proj
426
+ - model.layers.15.self_attn.v_proj
427
+ - model.layers.76.self_attn.v_proj
428
+ - model.layers.24.self_attn.v_proj
429
+ - model.layers.68.self_attn.v_proj
430
+ - model.layers.67.self_attn.v_proj
431
+ - model.layers.55.self_attn.v_proj
432
+ - model.layers.44.self_attn.v_proj
433
+
434
+
435
+
436
+ wandb_project: EVA-Qwen2.5-72B-SFFT-v0.2
437
+ wandb_entity:
438
+ wandb_watch:
439
+ wandb_name: Unit-02
440
+ wandb_log_model:
441
+
442
+ gradient_accumulation_steps: 8
443
+ micro_batch_size: 1
444
+ num_epochs: 3
445
+ optimizer: paged_ademamix_8bit
446
+ lr_scheduler: cosine
447
+ learning_rate: 0.00003
448
+ max_grad_norm: 1.5
449
+
450
+ train_on_inputs: false
451
+ group_by_length: false
452
+ bf16: auto
453
+ fp16:
454
+ tf32: false
455
+
456
+ gradient_checkpointing: "unsloth"
457
+ # gradient_checkpointing_kwargs:
458
+ # use_reentrant: true
459
+ early_stopping_patience:
460
+ resume_from_checkpoint: EVA-Qwen2.5-72B-SFFT-v0.2/checkpoint-128
461
+ local_rank:
462
+ logging_steps: 1
463
+ xformers_attention:
464
+ flash_attention: true
465
+
466
+ warmup_steps: 20
467
+ evals_per_epoch: 4
468
+ saves_per_epoch: 4
469
+ save_safetensors: true
470
+ save_total_limit: 1
471
+ hub_model_id:
472
+ hub_strategy:
473
+ debug:
474
+ deepspeed: deepspeed_configs/zero3_bf16_cpuoffload_params.json
475
+ weight_decay: 0.12
476
+ # fsdp:
477
+ # - full_shard
478
+ # - auto_wrap
479
+ # fsdp_config:
480
+ # fsdp_limit_all_gathers: true
481
+ # fsdp_sync_module_states: false
482
+ # fsdp_offload_params: true
483
+ # fsdp_cpu_ram_efficient_loading: true
484
+ # fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
485
+ # fsdp_transformer_layer_cls_to_wrap: Qwen2DecoderLayer
486
+ # fsdp_activation_checkpointing: true
487
+ # fsdp_state_dict_type: SHARDED_STATE_DICT # Changed from FULL_STATE_DICT
488
+ # fsdp_sharding_strategy: FULL_SHARD
489
+ # fsdp_forward_prefetch: false # Added
490
+ # fsdp_backward_prefetch: "BACKWARD_PRE" # Added
491
+ # fsdp_backward_prefetch_limit: 1 # Added
492
+ # fsdp_mixed_precision: BF16 # Added
493
+ ```
494
+
495
+ </details><br>
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "_name_or_path": "/workspace/EVA-Qwen2.5-72B-v0.2-padded",
4
+ "architectures": [
5
+ "Qwen2ForCausalLM"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8192,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 29696,
13
+ "max_position_embeddings": 131072,
14
+ "max_window_layers": 80,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 64,
17
+ "num_hidden_layers": 80,
18
+ "num_key_value_heads": 8,
19
+ "quantization_config": {
20
+ "bits": 8,
21
+ "checkpoint_format": "gptq",
22
+ "desc_act": true,
23
+ "dynamic": null,
24
+ "group_size": 128,
25
+ "lm_head": false,
26
+ "meta": {
27
+ "damp_auto_increment": 0.0015,
28
+ "damp_percent": 0.005,
29
+ "quantizer": "gptqmodel:1.2.1",
30
+ "uri": "https://github.com/modelcloud/gptqmodel"
31
+ },
32
+ "quant_method": "gptq",
33
+ "static_groups": false,
34
+ "sym": true,
35
+ "true_sequential": true
36
+ },
37
+ "rms_norm_eps": 1e-05,
38
+ "rope_scaling": null,
39
+ "rope_theta": 1000000.0,
40
+ "sliding_window": null,
41
+ "tie_word_embeddings": false,
42
+ "torch_dtype": "bfloat16",
43
+ "transformers_version": "4.46.3",
44
+ "use_cache": false,
45
+ "use_sliding_window": false,
46
+ "vocab_size": 152064
47
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": 151643,
5
+ "max_new_tokens": 2048,
6
+ "transformers_version": "4.45.1"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b7dca75d9c3085e2b1df2567e865f3d0afb53fbbaf3ae224d50cfbfa9521f2d
3
+ size 5849573272
model-00002-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d84cf8fa2fe0464b586475d8958759ece0504c20a89c84402cf7a6779effdb7
3
+ size 5814731208
model-00003-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d894b6c04b516548d8e99ea22b931b9377922fe14183cd962054a7fb034447d
3
+ size 5908929296
model-00004-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5e4393c13fe99b8a3f298739995f3e3b5d2d6966a9667532f0e724ce0d67b76
3
+ size 5814731440
model-00005-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa4df2ce37f6591aad369f0dd6de9e1f6e088c18daebc771a756ee22351ab1f1
3
+ size 5908929296
model-00006-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8b43d329d6808501db21df34cdb6239ee7a47b4e446898ff289113310b768a7
3
+ size 5814731440
model-00007-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:099572c73e11f865b5fe74c14a093d59f49c5fdf870642088d45ba26049466dd
3
+ size 5908929296
model-00008-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b546a4df0440474998775710bb3df4637175ef6a69e92f605e49503f0ebff88
3
+ size 5814731440
model-00009-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af2f6a5fa5985a6375f2cb1ff258c0628df900182be2a6f8e53e49a032d8bd93
3
+ size 5908929296
model-00010-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3fb0e2017751769e51201124b9e84547e7d6cf16e26def7da6849f312188757
3
+ size 5814731440
model-00011-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1d7d49386b000d2a733b98f5693b71d628c9f9f6433683f7ba9a4cbc98a2832
3
+ size 5908929296
model-00012-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da782193ac033c8cdc7acfbc8b32d385a225891529b3e39ef85fe961b4384035
3
+ size 5814731440
model-00013-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a846a22055049ec143ee3bef6799aea21db89ff99b6e67cd9a2e87ca45ff2c3b
3
+ size 4354429160
model-00014-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e968e8597c5d92c683e52693842dc3cebdcb7f7368eb9e25cea1a3eb46b2a89d
3
+ size 2491416704
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
quant_log.csv ADDED
@@ -0,0 +1,561 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,damp,time
2
+ 0,self_attn.k_proj,0.00000,0.00500,1.472
3
+ 0,self_attn.v_proj,0.00000,0.00500,1.277
4
+ 0,self_attn.q_proj,0.00001,0.00500,1.364
5
+ 0,self_attn.o_proj,0.00000,0.00500,1.357
6
+ 0,mlp.up_proj,0.00003,0.00500,1.540
7
+ 0,mlp.gate_proj,0.00003,0.00500,1.460
8
+ 0,mlp.down_proj,0.00000,0.00500,6.658
9
+ 1,self_attn.k_proj,0.00001,0.00500,1.317
10
+ 1,self_attn.v_proj,0.00000,0.00500,1.310
11
+ 1,self_attn.q_proj,0.00003,0.00500,1.388
12
+ 1,self_attn.o_proj,0.00000,0.00500,1.395
13
+ 1,mlp.up_proj,0.00007,0.00500,1.502
14
+ 1,mlp.gate_proj,0.00008,0.00500,1.493
15
+ 1,mlp.down_proj,0.04112,0.00500,6.721
16
+ 2,self_attn.k_proj,0.00015,0.00500,1.297
17
+ 2,self_attn.v_proj,0.00004,0.00500,1.295
18
+ 2,self_attn.q_proj,0.00063,0.00500,1.367
19
+ 2,self_attn.o_proj,0.00000,0.00500,1.365
20
+ 2,mlp.up_proj,0.00019,0.00500,1.477
21
+ 2,mlp.gate_proj,0.00020,0.00500,1.474
22
+ 2,mlp.down_proj,0.00001,0.00500,6.729
23
+ 3,self_attn.k_proj,0.00006,0.00500,1.294
24
+ 3,self_attn.v_proj,0.00003,0.00500,1.285
25
+ 3,self_attn.q_proj,0.00028,0.00500,1.357
26
+ 3,self_attn.o_proj,0.00001,0.00500,1.365
27
+ 3,mlp.up_proj,0.00041,0.00500,1.474
28
+ 3,mlp.gate_proj,0.00042,0.00500,1.476
29
+ 3,mlp.down_proj,0.00002,0.00500,6.746
30
+ 4,self_attn.k_proj,0.00018,0.00500,1.297
31
+ 4,self_attn.v_proj,0.00006,0.00500,1.287
32
+ 4,self_attn.q_proj,0.00088,0.00500,1.406
33
+ 4,self_attn.o_proj,0.00000,0.00500,1.368
34
+ 4,mlp.up_proj,0.00085,0.00500,1.478
35
+ 4,mlp.gate_proj,0.00087,0.00500,1.469
36
+ 4,mlp.down_proj,0.00002,0.00500,6.720
37
+ 5,self_attn.k_proj,0.00034,0.00500,1.290
38
+ 5,self_attn.v_proj,0.00010,0.00500,1.306
39
+ 5,self_attn.q_proj,0.00164,0.00500,1.366
40
+ 5,self_attn.o_proj,0.00000,0.00500,1.367
41
+ 5,mlp.up_proj,0.00133,0.00500,1.491
42
+ 5,mlp.gate_proj,0.00138,0.00500,1.487
43
+ 5,mlp.down_proj,0.00003,0.00500,6.747
44
+ 6,self_attn.k_proj,0.00022,0.00500,1.305
45
+ 6,self_attn.v_proj,0.00010,0.00500,1.295
46
+ 6,self_attn.q_proj,0.00095,0.00500,1.362
47
+ 6,self_attn.o_proj,0.00000,0.00500,1.380
48
+ 6,mlp.up_proj,0.00171,0.00500,1.470
49
+ 6,mlp.gate_proj,0.00177,0.00500,1.468
50
+ 6,mlp.down_proj,0.00004,0.00500,6.719
51
+ 7,self_attn.k_proj,0.00035,0.00500,1.299
52
+ 7,self_attn.v_proj,0.00013,0.00500,1.293
53
+ 7,self_attn.q_proj,0.00144,0.00500,1.390
54
+ 7,self_attn.o_proj,0.00001,0.00500,1.362
55
+ 7,mlp.up_proj,0.00220,0.00500,1.486
56
+ 7,mlp.gate_proj,0.00225,0.00500,1.487
57
+ 7,mlp.down_proj,0.00006,0.00500,6.745
58
+ 8,self_attn.k_proj,0.00026,0.00500,1.299
59
+ 8,self_attn.v_proj,0.00009,0.00500,1.290
60
+ 8,self_attn.q_proj,0.00115,0.00500,1.360
61
+ 8,self_attn.o_proj,0.00002,0.00500,1.370
62
+ 8,mlp.up_proj,0.00275,0.00500,1.478
63
+ 8,mlp.gate_proj,0.00286,0.00500,1.482
64
+ 8,mlp.down_proj,0.00008,0.00500,6.720
65
+ 9,self_attn.k_proj,0.00073,0.00500,1.303
66
+ 9,self_attn.v_proj,0.00022,0.00500,1.298
67
+ 9,self_attn.q_proj,0.00387,0.00500,1.354
68
+ 9,self_attn.o_proj,0.00002,0.00500,1.347
69
+ 9,mlp.up_proj,0.00317,0.00500,1.483
70
+ 9,mlp.gate_proj,0.00328,0.00500,1.477
71
+ 9,mlp.down_proj,0.00009,0.00500,6.742
72
+ 10,self_attn.k_proj,0.00050,0.00500,1.304
73
+ 10,self_attn.v_proj,0.00019,0.00500,1.289
74
+ 10,self_attn.q_proj,0.00223,0.00500,1.361
75
+ 10,self_attn.o_proj,0.00004,0.00500,1.374
76
+ 10,mlp.up_proj,0.00387,0.00500,1.480
77
+ 10,mlp.gate_proj,0.00402,0.00500,1.472
78
+ 10,mlp.down_proj,0.00013,0.00500,6.751
79
+ 11,self_attn.k_proj,0.00069,0.00500,1.301
80
+ 11,self_attn.v_proj,0.00025,0.00500,1.284
81
+ 11,self_attn.q_proj,0.00317,0.00500,1.361
82
+ 11,self_attn.o_proj,0.00005,0.00500,1.356
83
+ 11,mlp.up_proj,0.00457,0.00500,1.486
84
+ 11,mlp.gate_proj,0.00477,0.00500,1.484
85
+ 11,mlp.down_proj,0.00017,0.00500,6.739
86
+ 12,self_attn.k_proj,0.00099,0.00500,1.292
87
+ 12,self_attn.v_proj,0.00030,0.00500,1.275
88
+ 12,self_attn.q_proj,0.00473,0.00500,1.367
89
+ 12,self_attn.o_proj,0.00007,0.00500,1.369
90
+ 12,mlp.up_proj,0.00535,0.00500,1.489
91
+ 12,mlp.gate_proj,0.00557,0.00500,1.487
92
+ 12,mlp.down_proj,0.00022,0.00500,6.710
93
+ 13,self_attn.k_proj,0.00090,0.00500,1.305
94
+ 13,self_attn.v_proj,0.00033,0.00500,1.292
95
+ 13,self_attn.q_proj,0.00432,0.00500,1.353
96
+ 13,self_attn.o_proj,0.00007,0.00500,1.360
97
+ 13,mlp.up_proj,0.00630,0.00500,1.480
98
+ 13,mlp.gate_proj,0.00656,0.00500,1.479
99
+ 13,mlp.down_proj,0.00027,0.00500,6.759
100
+ 14,self_attn.k_proj,0.00084,0.00500,1.287
101
+ 14,self_attn.v_proj,0.00028,0.00500,1.286
102
+ 14,self_attn.q_proj,0.00387,0.00500,1.363
103
+ 14,self_attn.o_proj,0.00010,0.00500,1.369
104
+ 14,mlp.up_proj,0.00666,0.00500,1.492
105
+ 14,mlp.gate_proj,0.00693,0.00500,1.484
106
+ 14,mlp.down_proj,0.00032,0.00500,6.765
107
+ 15,self_attn.k_proj,0.00135,0.00500,1.296
108
+ 15,self_attn.v_proj,0.00059,0.00500,1.285
109
+ 15,self_attn.q_proj,0.00685,0.00500,1.355
110
+ 15,self_attn.o_proj,0.00007,0.00500,1.362
111
+ 15,mlp.up_proj,0.00828,0.00500,1.480
112
+ 15,mlp.gate_proj,0.00866,0.00500,1.473
113
+ 15,mlp.down_proj,0.00040,0.00500,6.735
114
+ 16,self_attn.k_proj,0.00136,0.00500,1.283
115
+ 16,self_attn.v_proj,0.00059,0.00500,1.287
116
+ 16,self_attn.q_proj,0.00657,0.00500,1.367
117
+ 16,self_attn.o_proj,0.00009,0.00500,1.362
118
+ 16,mlp.up_proj,0.00927,0.00500,1.490
119
+ 16,mlp.gate_proj,0.00966,0.00500,1.485
120
+ 16,mlp.down_proj,0.00047,0.00500,6.714
121
+ 17,self_attn.k_proj,0.00099,0.00500,1.298
122
+ 17,self_attn.v_proj,0.00044,0.00500,1.297
123
+ 17,self_attn.q_proj,0.00460,0.00500,1.346
124
+ 17,self_attn.o_proj,0.00012,0.00500,1.357
125
+ 17,mlp.up_proj,0.00894,0.00500,1.480
126
+ 17,mlp.gate_proj,0.00926,0.00500,1.473
127
+ 17,mlp.down_proj,0.00051,0.00500,6.719
128
+ 18,self_attn.k_proj,0.00173,0.00500,1.296
129
+ 18,self_attn.v_proj,0.00055,0.00500,1.292
130
+ 18,self_attn.q_proj,0.00837,0.00500,1.351
131
+ 18,self_attn.o_proj,0.00009,0.00500,1.351
132
+ 18,mlp.up_proj,0.01100,0.00500,1.490
133
+ 18,mlp.gate_proj,0.01145,0.00500,1.489
134
+ 18,mlp.down_proj,0.00063,0.00500,6.743
135
+ 19,self_attn.k_proj,0.00217,0.00500,1.301
136
+ 19,self_attn.v_proj,0.00072,0.00500,1.298
137
+ 19,self_attn.q_proj,0.01089,0.00500,1.362
138
+ 19,self_attn.o_proj,0.00013,0.00500,1.368
139
+ 19,mlp.up_proj,0.01276,0.00500,1.482
140
+ 19,mlp.gate_proj,0.01330,0.00500,1.474
141
+ 19,mlp.down_proj,0.00086,0.00500,6.761
142
+ 20,self_attn.k_proj,0.00099,0.00500,1.293
143
+ 20,self_attn.v_proj,0.00045,0.00500,1.278
144
+ 20,self_attn.q_proj,0.00486,0.00500,1.345
145
+ 20,self_attn.o_proj,0.00019,0.00500,1.367
146
+ 20,mlp.up_proj,0.01526,0.00500,1.492
147
+ 20,mlp.gate_proj,0.01598,0.00500,1.484
148
+ 20,mlp.down_proj,0.00112,0.00500,6.703
149
+ 21,self_attn.k_proj,0.00071,0.00500,1.306
150
+ 21,self_attn.v_proj,0.00032,0.00500,1.299
151
+ 21,self_attn.q_proj,0.00361,0.00500,1.370
152
+ 21,self_attn.o_proj,0.00026,0.00500,1.373
153
+ 21,mlp.up_proj,0.01322,0.00500,1.487
154
+ 21,mlp.gate_proj,0.01376,0.00500,1.485
155
+ 21,mlp.down_proj,0.00109,0.00500,6.736
156
+ 22,self_attn.k_proj,0.00215,0.00500,1.294
157
+ 22,self_attn.v_proj,0.00102,0.00500,1.286
158
+ 22,self_attn.q_proj,0.01186,0.00500,1.342
159
+ 22,self_attn.o_proj,0.00043,0.00500,1.358
160
+ 22,mlp.up_proj,0.01901,0.00500,1.493
161
+ 22,mlp.gate_proj,0.01974,0.00500,1.480
162
+ 22,mlp.down_proj,0.00181,0.00500,6.739
163
+ 23,self_attn.k_proj,0.00207,0.00500,1.306
164
+ 23,self_attn.v_proj,0.00115,0.00500,1.299
165
+ 23,self_attn.q_proj,0.01166,0.00500,1.355
166
+ 23,self_attn.o_proj,0.00053,0.00500,1.351
167
+ 23,mlp.up_proj,0.02099,0.00500,1.492
168
+ 23,mlp.gate_proj,0.02204,0.00500,1.480
169
+ 23,mlp.down_proj,0.00225,0.00500,6.745
170
+ 24,self_attn.k_proj,0.00179,0.00500,1.308
171
+ 24,self_attn.v_proj,0.00118,0.00500,1.299
172
+ 24,self_attn.q_proj,0.00997,0.00500,1.355
173
+ 24,self_attn.o_proj,0.00051,0.00500,1.362
174
+ 24,mlp.up_proj,0.02483,0.00500,1.486
175
+ 24,mlp.gate_proj,0.02627,0.00500,1.472
176
+ 24,mlp.down_proj,0.00262,0.00500,6.737
177
+ 25,self_attn.k_proj,0.00325,0.00500,1.295
178
+ 25,self_attn.v_proj,0.00165,0.00500,1.284
179
+ 25,self_attn.q_proj,0.01767,0.00500,1.351
180
+ 25,self_attn.o_proj,0.00053,0.00500,1.366
181
+ 25,mlp.up_proj,0.02998,0.00500,1.484
182
+ 25,mlp.gate_proj,0.03150,0.00500,1.484
183
+ 25,mlp.down_proj,0.00304,0.00500,6.721
184
+ 26,self_attn.k_proj,0.00257,0.00500,1.290
185
+ 26,self_attn.v_proj,0.00139,0.00500,1.288
186
+ 26,self_attn.q_proj,0.01364,0.00500,1.376
187
+ 26,self_attn.o_proj,0.00082,0.00500,1.353
188
+ 26,mlp.up_proj,0.03417,0.00500,1.485
189
+ 26,mlp.gate_proj,0.03583,0.00500,1.477
190
+ 26,mlp.down_proj,0.00322,0.00500,6.730
191
+ 27,self_attn.k_proj,0.00260,0.00500,1.301
192
+ 27,self_attn.v_proj,0.00150,0.00500,1.294
193
+ 27,self_attn.q_proj,0.01365,0.00500,1.377
194
+ 27,self_attn.o_proj,0.00081,0.00500,1.366
195
+ 27,mlp.up_proj,0.03782,0.00500,1.487
196
+ 27,mlp.gate_proj,0.03950,0.00500,1.477
197
+ 27,mlp.down_proj,0.00350,0.00500,6.755
198
+ 28,self_attn.k_proj,0.00258,0.00500,1.293
199
+ 28,self_attn.v_proj,0.00158,0.00500,1.294
200
+ 28,self_attn.q_proj,0.01428,0.00500,1.367
201
+ 28,self_attn.o_proj,0.00058,0.00500,1.358
202
+ 28,mlp.up_proj,0.04057,0.00500,1.483
203
+ 28,mlp.gate_proj,0.04212,0.00500,1.475
204
+ 28,mlp.down_proj,0.00398,0.00500,6.752
205
+ 29,self_attn.k_proj,0.00358,0.00500,1.301
206
+ 29,self_attn.v_proj,0.00165,0.00500,1.290
207
+ 29,self_attn.q_proj,0.01765,0.00500,1.361
208
+ 29,self_attn.o_proj,0.00106,0.00500,1.371
209
+ 29,mlp.up_proj,0.04323,0.00500,1.475
210
+ 29,mlp.gate_proj,0.04498,0.00500,1.485
211
+ 29,mlp.down_proj,0.00448,0.00500,6.724
212
+ 30,self_attn.k_proj,0.00357,0.00500,1.291
213
+ 30,self_attn.v_proj,0.00223,0.00500,1.289
214
+ 30,self_attn.q_proj,0.01985,0.00500,1.349
215
+ 30,self_attn.o_proj,0.00104,0.00500,1.355
216
+ 30,mlp.up_proj,0.04697,0.00500,1.481
217
+ 30,mlp.gate_proj,0.04954,0.00500,1.481
218
+ 30,mlp.down_proj,0.00492,0.00500,6.734
219
+ 31,self_attn.k_proj,0.00419,0.00500,1.300
220
+ 31,self_attn.v_proj,0.00277,0.00500,1.299
221
+ 31,self_attn.q_proj,0.02266,0.00500,1.358
222
+ 31,self_attn.o_proj,0.00065,0.00500,1.365
223
+ 31,mlp.up_proj,0.05145,0.00500,1.484
224
+ 31,mlp.gate_proj,0.05440,0.00500,1.480
225
+ 31,mlp.down_proj,0.00525,0.00500,6.752
226
+ 32,self_attn.k_proj,0.00472,0.00500,1.301
227
+ 32,self_attn.v_proj,0.00331,0.00500,1.295
228
+ 32,self_attn.q_proj,0.02555,0.00500,1.436
229
+ 32,self_attn.o_proj,0.00079,0.00500,1.351
230
+ 32,mlp.up_proj,0.05446,0.00500,1.483
231
+ 32,mlp.gate_proj,0.05746,0.00500,1.474
232
+ 32,mlp.down_proj,0.00554,0.00500,6.759
233
+ 33,self_attn.k_proj,0.00458,0.00500,1.301
234
+ 33,self_attn.v_proj,0.00369,0.00500,1.296
235
+ 33,self_attn.q_proj,0.02361,0.00500,1.391
236
+ 33,self_attn.o_proj,0.00085,0.00500,1.369
237
+ 33,mlp.up_proj,0.05765,0.00500,1.481
238
+ 33,mlp.gate_proj,0.06171,0.00500,1.476
239
+ 33,mlp.down_proj,0.00583,0.00500,6.755
240
+ 34,self_attn.k_proj,0.00514,0.00500,1.304
241
+ 34,self_attn.v_proj,0.00442,0.00500,1.285
242
+ 34,self_attn.q_proj,0.02823,0.00500,1.349
243
+ 34,self_attn.o_proj,0.00072,0.00500,1.359
244
+ 34,mlp.up_proj,0.06050,0.00500,1.477
245
+ 34,mlp.gate_proj,0.06491,0.00500,1.473
246
+ 34,mlp.down_proj,0.00618,0.00500,6.722
247
+ 35,self_attn.k_proj,0.00577,0.00500,1.297
248
+ 35,self_attn.v_proj,0.00509,0.00500,1.286
249
+ 35,self_attn.q_proj,0.03143,0.00500,1.355
250
+ 35,self_attn.o_proj,0.00075,0.00500,1.381
251
+ 35,mlp.up_proj,0.06311,0.00500,1.477
252
+ 35,mlp.gate_proj,0.06727,0.00500,1.479
253
+ 35,mlp.down_proj,0.00649,0.00500,6.740
254
+ 36,self_attn.k_proj,0.00619,0.00500,1.294
255
+ 36,self_attn.v_proj,0.00542,0.00500,1.332
256
+ 36,self_attn.q_proj,0.03362,0.00500,1.369
257
+ 36,self_attn.o_proj,0.00073,0.00500,1.367
258
+ 36,mlp.up_proj,0.06496,0.00500,1.469
259
+ 36,mlp.gate_proj,0.06879,0.00500,1.463
260
+ 36,mlp.down_proj,0.00653,0.00500,6.721
261
+ 37,self_attn.k_proj,0.00572,0.00500,1.289
262
+ 37,self_attn.v_proj,0.00433,0.00500,1.286
263
+ 37,self_attn.q_proj,0.02991,0.00500,1.361
264
+ 37,self_attn.o_proj,0.00080,0.00500,1.367
265
+ 37,mlp.up_proj,0.06700,0.00500,1.577
266
+ 37,mlp.gate_proj,0.07042,0.00500,1.485
267
+ 37,mlp.down_proj,0.00681,0.00500,6.734
268
+ 38,self_attn.k_proj,0.00584,0.00500,1.290
269
+ 38,self_attn.v_proj,0.00375,0.00500,1.277
270
+ 38,self_attn.q_proj,0.02912,0.00500,1.355
271
+ 38,self_attn.o_proj,0.00109,0.00500,1.366
272
+ 38,mlp.up_proj,0.06946,0.00500,1.474
273
+ 38,mlp.gate_proj,0.07300,0.00500,1.484
274
+ 38,mlp.down_proj,0.00714,0.00500,6.750
275
+ 39,self_attn.k_proj,0.00555,0.00500,1.292
276
+ 39,self_attn.v_proj,0.00415,0.00500,1.278
277
+ 39,self_attn.q_proj,0.02862,0.00500,1.355
278
+ 39,self_attn.o_proj,0.00101,0.00500,1.363
279
+ 39,mlp.up_proj,0.07173,0.00500,1.487
280
+ 39,mlp.gate_proj,0.07493,0.00500,1.488
281
+ 39,mlp.down_proj,0.00751,0.00500,6.709
282
+ 40,self_attn.k_proj,0.00538,0.00500,1.287
283
+ 40,self_attn.v_proj,0.00314,0.00500,1.277
284
+ 40,self_attn.q_proj,0.02625,0.00500,1.365
285
+ 40,self_attn.o_proj,0.00136,0.00500,1.364
286
+ 40,mlp.up_proj,0.07475,0.00500,1.490
287
+ 40,mlp.gate_proj,0.07745,0.00500,1.488
288
+ 40,mlp.down_proj,0.00822,0.00500,6.733
289
+ 41,self_attn.k_proj,0.00499,0.00500,1.292
290
+ 41,self_attn.v_proj,0.00267,0.00500,1.282
291
+ 41,self_attn.q_proj,0.02358,0.00500,1.353
292
+ 41,self_attn.o_proj,0.00164,0.00500,1.359
293
+ 41,mlp.up_proj,0.07964,0.00500,1.489
294
+ 41,mlp.gate_proj,0.08266,0.00500,1.483
295
+ 41,mlp.down_proj,0.00888,0.00500,6.727
296
+ 42,self_attn.k_proj,0.00624,0.00500,1.291
297
+ 42,self_attn.v_proj,0.00398,0.00500,1.281
298
+ 42,self_attn.q_proj,0.03266,0.00500,1.360
299
+ 42,self_attn.o_proj,0.00136,0.00500,1.358
300
+ 42,mlp.up_proj,0.08347,0.00500,1.487
301
+ 42,mlp.gate_proj,0.08600,0.00500,1.523
302
+ 42,mlp.down_proj,0.00997,0.00500,6.716
303
+ 43,self_attn.k_proj,0.00580,0.00500,1.287
304
+ 43,self_attn.v_proj,0.00344,0.00500,1.280
305
+ 43,self_attn.q_proj,0.02967,0.00500,1.357
306
+ 43,self_attn.o_proj,0.00179,0.00500,1.373
307
+ 43,mlp.up_proj,0.08684,0.00500,1.487
308
+ 43,mlp.gate_proj,0.08869,0.00500,1.491
309
+ 43,mlp.down_proj,0.01087,0.00500,6.713
310
+ 44,self_attn.k_proj,0.00573,0.00500,1.289
311
+ 44,self_attn.v_proj,0.00322,0.00500,1.279
312
+ 44,self_attn.q_proj,0.03024,0.00500,1.366
313
+ 44,self_attn.o_proj,0.00235,0.00500,1.381
314
+ 44,mlp.up_proj,0.08912,0.00500,1.485
315
+ 44,mlp.gate_proj,0.08968,0.00500,1.488
316
+ 44,mlp.down_proj,0.01268,0.00500,6.728
317
+ 45,self_attn.k_proj,0.00548,0.00500,1.295
318
+ 45,self_attn.v_proj,0.00269,0.00500,1.291
319
+ 45,self_attn.q_proj,0.02876,0.00500,1.358
320
+ 45,self_attn.o_proj,0.00269,0.00500,1.366
321
+ 45,mlp.up_proj,0.09206,0.00500,1.486
322
+ 45,mlp.gate_proj,0.09188,0.00500,1.485
323
+ 45,mlp.down_proj,0.01779,0.00500,6.722
324
+ 46,self_attn.k_proj,0.00625,0.00500,1.291
325
+ 46,self_attn.v_proj,0.00289,0.00500,1.283
326
+ 46,self_attn.q_proj,0.03244,0.00500,1.348
327
+ 46,self_attn.o_proj,0.00401,0.00500,1.360
328
+ 46,mlp.up_proj,0.09466,0.00500,1.480
329
+ 46,mlp.gate_proj,0.09347,0.00500,1.478
330
+ 46,mlp.down_proj,0.01347,0.00500,6.752
331
+ 47,self_attn.k_proj,0.00644,0.00500,1.295
332
+ 47,self_attn.v_proj,0.00352,0.00500,1.284
333
+ 47,self_attn.q_proj,0.03419,0.00500,1.351
334
+ 47,self_attn.o_proj,0.00412,0.00500,1.368
335
+ 47,mlp.up_proj,0.09487,0.00500,1.484
336
+ 47,mlp.gate_proj,0.09254,0.00500,1.477
337
+ 47,mlp.down_proj,0.01473,0.00500,6.781
338
+ 48,self_attn.k_proj,0.00628,0.00500,1.298
339
+ 48,self_attn.v_proj,0.00417,0.00500,1.287
340
+ 48,self_attn.q_proj,0.03656,0.00500,1.353
341
+ 48,self_attn.o_proj,0.00422,0.00500,1.361
342
+ 48,mlp.up_proj,0.09937,0.00500,1.483
343
+ 48,mlp.gate_proj,0.09672,0.00500,1.478
344
+ 48,mlp.down_proj,0.01623,0.00500,6.725
345
+ 49,self_attn.k_proj,0.00583,0.00500,1.298
346
+ 49,self_attn.v_proj,0.00407,0.00500,1.290
347
+ 49,self_attn.q_proj,0.03329,0.00500,1.358
348
+ 49,self_attn.o_proj,0.00411,0.00500,1.366
349
+ 49,mlp.up_proj,0.09889,0.00500,1.486
350
+ 49,mlp.gate_proj,0.09554,0.00500,1.480
351
+ 49,mlp.down_proj,0.01687,0.00500,6.763
352
+ 50,self_attn.k_proj,0.00701,0.00500,1.298
353
+ 50,self_attn.v_proj,0.00415,0.00500,1.294
354
+ 50,self_attn.q_proj,0.04122,0.00500,1.350
355
+ 50,self_attn.o_proj,0.00517,0.00500,1.353
356
+ 50,mlp.up_proj,0.09707,0.00500,1.483
357
+ 50,mlp.gate_proj,0.09265,0.00500,1.480
358
+ 50,mlp.down_proj,0.01780,0.00500,6.758
359
+ 51,self_attn.k_proj,0.00634,0.00500,1.302
360
+ 51,self_attn.v_proj,0.00402,0.00500,1.298
361
+ 51,self_attn.q_proj,0.03470,0.00500,1.363
362
+ 51,self_attn.o_proj,0.00578,0.00500,1.359
363
+ 51,mlp.up_proj,0.10056,0.00500,1.492
364
+ 51,mlp.gate_proj,0.09631,0.00500,1.483
365
+ 51,mlp.down_proj,0.01916,0.00500,6.719
366
+ 52,self_attn.k_proj,0.00588,0.00500,1.302
367
+ 52,self_attn.v_proj,0.00513,0.00500,1.292
368
+ 52,self_attn.q_proj,0.03375,0.00500,1.358
369
+ 52,self_attn.o_proj,0.00646,0.00500,1.352
370
+ 52,mlp.up_proj,0.11434,0.00500,1.493
371
+ 52,mlp.gate_proj,0.11091,0.00500,1.486
372
+ 52,mlp.down_proj,0.02019,0.00500,6.729
373
+ 53,self_attn.k_proj,0.00666,0.00500,1.293
374
+ 53,self_attn.v_proj,0.00504,0.00500,1.293
375
+ 53,self_attn.q_proj,0.03897,0.00500,1.362
376
+ 53,self_attn.o_proj,0.00735,0.00500,1.356
377
+ 53,mlp.up_proj,0.12107,0.00500,1.495
378
+ 53,mlp.gate_proj,0.11808,0.00500,1.487
379
+ 53,mlp.down_proj,0.02135,0.00500,6.731
380
+ 54,self_attn.k_proj,0.00652,0.00500,1.294
381
+ 54,self_attn.v_proj,0.00688,0.00500,1.285
382
+ 54,self_attn.q_proj,0.04141,0.00500,1.351
383
+ 54,self_attn.o_proj,0.00672,0.00500,1.361
384
+ 54,mlp.up_proj,0.11996,0.00500,1.487
385
+ 54,mlp.gate_proj,0.11612,0.00500,1.488
386
+ 54,mlp.down_proj,0.02402,0.00500,6.737
387
+ 55,self_attn.k_proj,0.00631,0.00500,1.286
388
+ 55,self_attn.v_proj,0.00684,0.00500,1.284
389
+ 55,self_attn.q_proj,0.04140,0.00500,1.355
390
+ 55,self_attn.o_proj,0.00809,0.00500,1.371
391
+ 55,mlp.up_proj,0.12516,0.00500,1.483
392
+ 55,mlp.gate_proj,0.12055,0.00500,1.471
393
+ 55,mlp.down_proj,0.02912,0.00500,6.729
394
+ 56,self_attn.k_proj,0.00661,0.00500,1.304
395
+ 56,self_attn.v_proj,0.00600,0.00500,1.292
396
+ 56,self_attn.q_proj,0.03907,0.00500,1.352
397
+ 56,self_attn.o_proj,0.01063,0.00500,1.360
398
+ 56,mlp.up_proj,0.13797,0.00500,1.478
399
+ 56,mlp.gate_proj,0.13388,0.00500,1.474
400
+ 56,mlp.down_proj,0.03271,0.00500,6.751
401
+ 57,self_attn.k_proj,0.00739,0.00500,1.302
402
+ 57,self_attn.v_proj,0.00655,0.00500,1.299
403
+ 57,self_attn.q_proj,0.04085,0.00500,1.359
404
+ 57,self_attn.o_proj,0.01045,0.00500,1.354
405
+ 57,mlp.up_proj,0.14086,0.00500,1.489
406
+ 57,mlp.gate_proj,0.13600,0.00500,1.487
407
+ 57,mlp.down_proj,0.03633,0.00500,6.739
408
+ 58,self_attn.k_proj,0.00677,0.00500,1.295
409
+ 58,self_attn.v_proj,0.00729,0.00500,1.291
410
+ 58,self_attn.q_proj,0.04273,0.00500,1.360
411
+ 58,self_attn.o_proj,0.01267,0.00500,1.360
412
+ 58,mlp.up_proj,0.14561,0.00500,1.487
413
+ 58,mlp.gate_proj,0.14010,0.00500,1.478
414
+ 58,mlp.down_proj,0.04554,0.00500,6.754
415
+ 59,self_attn.k_proj,0.00672,0.00500,1.290
416
+ 59,self_attn.v_proj,0.00769,0.00500,1.289
417
+ 59,self_attn.q_proj,0.04146,0.00500,1.360
418
+ 59,self_attn.o_proj,0.01389,0.00500,1.374
419
+ 59,mlp.up_proj,0.15160,0.00500,1.481
420
+ 59,mlp.gate_proj,0.14874,0.00500,1.480
421
+ 59,mlp.down_proj,0.05304,0.00500,6.741
422
+ 60,self_attn.k_proj,0.00752,0.00500,1.303
423
+ 60,self_attn.v_proj,0.00981,0.00500,1.292
424
+ 60,self_attn.q_proj,0.04964,0.00500,1.358
425
+ 60,self_attn.o_proj,0.01291,0.00500,1.355
426
+ 60,mlp.up_proj,0.18553,0.00500,1.487
427
+ 60,mlp.gate_proj,0.18585,0.00500,1.474
428
+ 60,mlp.down_proj,0.06171,0.00500,6.699
429
+ 61,self_attn.k_proj,0.00782,0.00500,1.296
430
+ 61,self_attn.v_proj,0.01079,0.00500,1.288
431
+ 61,self_attn.q_proj,0.04946,0.00500,1.365
432
+ 61,self_attn.o_proj,0.01420,0.00500,1.359
433
+ 61,mlp.up_proj,0.20452,0.00500,1.487
434
+ 61,mlp.gate_proj,0.20626,0.00500,1.481
435
+ 61,mlp.down_proj,0.07154,0.00500,6.759
436
+ 62,self_attn.k_proj,0.00802,0.00500,1.303
437
+ 62,self_attn.v_proj,0.01049,0.00500,1.295
438
+ 62,self_attn.q_proj,0.04958,0.00500,1.351
439
+ 62,self_attn.o_proj,0.01444,0.00500,1.364
440
+ 62,mlp.up_proj,0.23152,0.00500,1.484
441
+ 62,mlp.gate_proj,0.23855,0.00500,1.484
442
+ 62,mlp.down_proj,0.07513,0.00500,6.751
443
+ 63,self_attn.k_proj,0.00830,0.00500,1.299
444
+ 63,self_attn.v_proj,0.01104,0.00500,1.290
445
+ 63,self_attn.q_proj,0.05213,0.00500,1.350
446
+ 63,self_attn.o_proj,0.01569,0.00500,1.357
447
+ 63,mlp.up_proj,0.24630,0.00500,1.483
448
+ 63,mlp.gate_proj,0.25185,0.00500,1.481
449
+ 63,mlp.down_proj,0.09072,0.00500,6.706
450
+ 64,self_attn.k_proj,0.00874,0.00500,1.301
451
+ 64,self_attn.v_proj,0.01102,0.00500,1.300
452
+ 64,self_attn.q_proj,0.05465,0.00500,1.358
453
+ 64,self_attn.o_proj,0.01668,0.00500,1.349
454
+ 64,mlp.up_proj,0.26513,0.00500,1.488
455
+ 64,mlp.gate_proj,0.27326,0.00500,1.475
456
+ 64,mlp.down_proj,0.10649,0.00500,6.700
457
+ 65,self_attn.k_proj,0.00902,0.00500,1.292
458
+ 65,self_attn.v_proj,0.01117,0.00500,1.286
459
+ 65,self_attn.q_proj,0.05555,0.00500,1.359
460
+ 65,self_attn.o_proj,0.01772,0.00500,1.372
461
+ 65,mlp.up_proj,0.29791,0.00500,1.488
462
+ 65,mlp.gate_proj,0.30622,0.00500,1.486
463
+ 65,mlp.down_proj,0.12478,0.00500,6.734
464
+ 66,self_attn.k_proj,0.00813,0.00500,1.304
465
+ 66,self_attn.v_proj,0.01247,0.00500,1.291
466
+ 66,self_attn.q_proj,0.05379,0.00500,1.362
467
+ 66,self_attn.o_proj,0.01692,0.00500,1.351
468
+ 66,mlp.up_proj,0.34625,0.00500,1.490
469
+ 66,mlp.gate_proj,0.36069,0.00500,1.483
470
+ 66,mlp.down_proj,0.13569,0.00500,6.685
471
+ 67,self_attn.k_proj,0.00894,0.00500,1.297
472
+ 67,self_attn.v_proj,0.01497,0.00500,1.294
473
+ 67,self_attn.q_proj,0.05857,0.00500,1.362
474
+ 67,self_attn.o_proj,0.01526,0.00500,1.370
475
+ 67,mlp.up_proj,0.37496,0.00500,1.487
476
+ 67,mlp.gate_proj,0.38628,0.00500,1.487
477
+ 67,mlp.down_proj,0.15187,0.00500,6.726
478
+ 68,self_attn.k_proj,0.00917,0.00500,1.302
479
+ 68,self_attn.v_proj,0.01699,0.00500,1.285
480
+ 68,self_attn.q_proj,0.05995,0.00500,1.357
481
+ 68,self_attn.o_proj,0.01972,0.00500,1.353
482
+ 68,mlp.up_proj,0.40992,0.00500,1.478
483
+ 68,mlp.gate_proj,0.41818,0.00500,1.482
484
+ 68,mlp.down_proj,0.16788,0.00500,6.736
485
+ 69,self_attn.k_proj,0.00957,0.00500,1.301
486
+ 69,self_attn.v_proj,0.01469,0.00500,1.296
487
+ 69,self_attn.q_proj,0.06427,0.00500,1.357
488
+ 69,self_attn.o_proj,0.02322,0.00500,1.346
489
+ 69,mlp.up_proj,0.44610,0.00500,1.492
490
+ 69,mlp.gate_proj,0.44624,0.00500,1.484
491
+ 69,mlp.down_proj,0.19748,0.00500,6.719
492
+ 70,self_attn.k_proj,0.00944,0.00500,1.295
493
+ 70,self_attn.v_proj,0.01957,0.00500,1.287
494
+ 70,self_attn.q_proj,0.06459,0.00500,1.353
495
+ 70,self_attn.o_proj,0.02997,0.00500,1.365
496
+ 70,mlp.up_proj,0.50861,0.00500,1.483
497
+ 70,mlp.gate_proj,0.50142,0.00500,1.478
498
+ 70,mlp.down_proj,0.24202,0.00500,6.759
499
+ 71,self_attn.k_proj,0.00988,0.00500,1.312
500
+ 71,self_attn.v_proj,0.02102,0.00500,1.300
501
+ 71,self_attn.q_proj,0.06401,0.00500,1.352
502
+ 71,self_attn.o_proj,0.03235,0.00500,1.347
503
+ 71,mlp.up_proj,0.55881,0.00500,1.487
504
+ 71,mlp.gate_proj,0.54271,0.00500,1.479
505
+ 71,mlp.down_proj,0.28924,0.00500,6.696
506
+ 72,self_attn.k_proj,0.00921,0.00500,1.292
507
+ 72,self_attn.v_proj,0.02356,0.00500,1.285
508
+ 72,self_attn.q_proj,0.06627,0.00500,1.345
509
+ 72,self_attn.o_proj,0.03236,0.00500,1.359
510
+ 72,mlp.up_proj,0.61693,0.00500,1.477
511
+ 72,mlp.gate_proj,0.59146,0.00500,1.483
512
+ 72,mlp.down_proj,0.34195,0.00500,6.710
513
+ 73,self_attn.k_proj,0.00917,0.00500,1.301
514
+ 73,self_attn.v_proj,0.02763,0.00500,1.290
515
+ 73,self_attn.q_proj,0.06599,0.00500,1.348
516
+ 73,self_attn.o_proj,0.03502,0.00500,1.354
517
+ 73,mlp.up_proj,0.66426,0.00500,1.487
518
+ 73,mlp.gate_proj,0.62903,0.00500,1.477
519
+ 73,mlp.down_proj,0.39945,0.00500,6.694
520
+ 74,self_attn.k_proj,0.00942,0.00500,1.293
521
+ 74,self_attn.v_proj,0.02618,0.00500,1.290
522
+ 74,self_attn.q_proj,0.07704,0.00500,1.362
523
+ 74,self_attn.o_proj,0.04021,0.00500,1.358
524
+ 74,mlp.up_proj,0.70155,0.00500,1.485
525
+ 74,mlp.gate_proj,0.65342,0.00500,1.488
526
+ 74,mlp.down_proj,0.48040,0.00500,6.752
527
+ 75,self_attn.k_proj,0.00929,0.00500,1.296
528
+ 75,self_attn.v_proj,0.02936,0.00500,1.286
529
+ 75,self_attn.q_proj,0.07221,0.00500,1.364
530
+ 75,self_attn.o_proj,0.05541,0.00500,1.360
531
+ 75,mlp.up_proj,0.75981,0.00500,1.486
532
+ 75,mlp.gate_proj,0.69871,0.00500,1.477
533
+ 75,mlp.down_proj,0.57767,0.00500,6.702
534
+ 76,self_attn.k_proj,0.00983,0.00500,1.295
535
+ 76,self_attn.v_proj,0.04741,0.00500,1.290
536
+ 76,self_attn.q_proj,0.08201,0.00500,1.371
537
+ 76,self_attn.o_proj,0.07714,0.00500,1.362
538
+ 76,mlp.up_proj,0.78514,0.00500,1.514
539
+ 76,mlp.gate_proj,0.71541,0.00500,1.490
540
+ 76,mlp.down_proj,0.72642,0.00500,6.715
541
+ 77,self_attn.k_proj,0.00807,0.00500,1.301
542
+ 77,self_attn.v_proj,0.02932,0.00500,1.293
543
+ 77,self_attn.q_proj,0.07229,0.00500,1.439
544
+ 77,self_attn.o_proj,0.07672,0.00500,1.349
545
+ 77,mlp.up_proj,0.76874,0.00500,1.489
546
+ 77,mlp.gate_proj,0.70010,0.00500,1.478
547
+ 77,mlp.down_proj,0.89357,0.00500,6.702
548
+ 78,self_attn.k_proj,0.00738,0.00500,1.291
549
+ 78,self_attn.v_proj,0.02975,0.00500,1.281
550
+ 78,self_attn.q_proj,0.06349,0.00500,1.351
551
+ 78,self_attn.o_proj,0.09652,0.00500,1.359
552
+ 78,mlp.up_proj,0.61304,0.00500,1.485
553
+ 78,mlp.gate_proj,0.56795,0.00500,1.482
554
+ 78,mlp.down_proj,1.44555,0.00500,6.725
555
+ 79,self_attn.k_proj,0.00553,0.00500,1.303
556
+ 79,self_attn.v_proj,0.00989,0.00500,1.289
557
+ 79,self_attn.q_proj,0.04087,0.00500,1.365
558
+ 79,self_attn.o_proj,0.02176,0.00500,1.364
559
+ 79,mlp.up_proj,0.48353,0.00500,1.490
560
+ 79,mlp.gate_proj,0.46248,0.00500,1.488
561
+ 79,mlp.down_proj,1.49121,0.00500,6.710
quantize_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 8,
3
+ "dynamic": null,
4
+ "group_size": 128,
5
+ "desc_act": true,
6
+ "static_groups": false,
7
+ "sym": true,
8
+ "lm_head": false,
9
+ "true_sequential": true,
10
+ "quant_method": "gptq",
11
+ "checkpoint_format": "gptq",
12
+ "meta": {
13
+ "quantizer": "gptqmodel:1.2.1",
14
+ "uri": "https://github.com/modelcloud/gptqmodel",
15
+ "damp_percent": 0.005,
16
+ "damp_auto_increment": 0.0015
17
+ }
18
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|endoftext|>",
201
+ "errors": "replace",
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3e0381684dd4e632872e9c6b2c43efe781af3f809bc306fd0cc6e5feb22a250
3
+ size 8248
vocab.json ADDED
The diff for this file is too large to render. See raw diff