djuna commited on
Commit
080e0ad
·
verified ·
1 Parent(s): 50ef1f9

Python Notebook

Browse files



@DreadPoor


@xi0v

The attached Python notebook is provided. Apologies for the delayed response.

Files changed (1) hide show
  1. MergeKitPlus.ipynb +279 -0
MergeKitPlus.ipynb ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "JHRpOZ5g3Flv"
7
+ },
8
+ "source": [
9
+ "# Clone Mergekit and Install the dependencies"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": null,
15
+ "metadata": {
16
+ "id": "x8548KdSbMs2"
17
+ },
18
+ "outputs": [],
19
+ "source": [
20
+ "!nvidia-smi"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "metadata": {
27
+ "id": "4alsYntU1gNU"
28
+ },
29
+ "outputs": [],
30
+ "source": [
31
+ "!pip install -qqq git+https://github.com/arcee-ai/mergekit.git"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "markdown",
36
+ "metadata": {
37
+ "id": "DtGY8BAo3alb"
38
+ },
39
+ "source": [
40
+ "# Mergekit Config"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": null,
46
+ "metadata": {
47
+ "id": "CmfbveTblP0F"
48
+ },
49
+ "outputs": [],
50
+ "source": [
51
+ "# @markdown What is your model's name will be?\n",
52
+ "MODEL_NAME = 'SmolMoE' # @param {type:\"string\"}"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": null,
58
+ "metadata": {
59
+ "id": "r2-rAjH93w8x"
60
+ },
61
+ "outputs": [],
62
+ "source": [
63
+ "mergekit_yaml = \"\"\"\n",
64
+ "base_model: BEE-spoke-data/smol_llama-220M-GQA\n",
65
+ "gate_mode: random\n",
66
+ "dtype: bfloat16\n",
67
+ "experts:\n",
68
+ " - source_model: BEE-spoke-data/smol_llama-220M-GQA\n",
69
+ " - source_model: BEE-spoke-data/smol_llama-220M-GQA\n",
70
+ "\"\"\" # @param {type:\"string\"}\n",
71
+ "with open('config.yaml', 'w', encoding=\"utf-8\") as f:\n",
72
+ " f.write(mergekit_yaml)"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "markdown",
77
+ "metadata": {
78
+ "id": "WiCGZXysn_mD"
79
+ },
80
+ "source": [
81
+ "# Mergekit Runtime"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": null,
87
+ "metadata": {
88
+ "id": "0scr7Ed_4GPe"
89
+ },
90
+ "outputs": [],
91
+ "source": [
92
+ "low_cpu_ram = True # @param {type:\"boolean\"}\n",
93
+ "runtime = \"GPU\" # @param [\"CPU\", \"GPU\"]\n",
94
+ "task = \"merge-mega\" # @param [\"merge\", \"merge-mega\", \"moe\", \"extract\"]\n",
95
+ "# @markdown ### Mergekit arguments\n",
96
+ "\n",
97
+ "trust_remote_code = False # @param {type:\"boolean\"}\n",
98
+ "clone_tensors = True # @param {type:\"boolean\"}\n",
99
+ "low_ram = True # @param {type:\"boolean\"}\n",
100
+ "out_shard_size = 500M # @param {type:\"string\"}\n",
101
+ "\n",
102
+ "# @markdown ### Extract LoRA (experimental)\n",
103
+ "base_model = \"unsloth/Llama-3.2-3B-Instruct\" # @param {type:\"string\"}\n",
104
+ "finetuned_model = \"theprint/ReWiz-Llama-3.2-3B\" # @param {type:\"string\"}\n",
105
+ "extract_rank = 32 # @param {type:\"number\"}"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "markdown",
110
+ "metadata": {
111
+ "id": "QBhBgX7U52Xn"
112
+ },
113
+ "source": [
114
+ "## Run the program"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": null,
120
+ "metadata": {
121
+ "collapsed": true,
122
+ "id": "3Y7aBJXL54GJ"
123
+ },
124
+ "outputs": [],
125
+ "source": [
126
+ "import os\n",
127
+ "import shutil\n",
128
+ "\n",
129
+ "def empty_folder(folder_path):\n",
130
+ " if os.path.exists(folder_path):\n",
131
+ " shutil.rmtree(folder_path)\n",
132
+ " os.makedirs(folder_path)\n",
133
+ "\n",
134
+ "empty_folder('merge')\n",
135
+ "empty_folder('lora')\n",
136
+ "\n",
137
+ "if task == \"merge\":\n",
138
+ " cli = \"mergekit-yaml\"\n",
139
+ "elif task == \"merge-mega\":\n",
140
+ " cli = \"mergekit-mega\"\n",
141
+ "elif task == \"moe\":\n",
142
+ " cli = \"mergekit-moe\"\n",
143
+ "elif task == \"extract\":\n",
144
+ " if base_model == \"\" or finetuned_model == \"\":\n",
145
+ " raise ValueError(\"base_model and finetuned_model cannot be empty\")\n",
146
+ " !pip install -qqq bitsandbytes\n",
147
+ " cli = f\"mergekit-extract-lora {finetuned_model} {base_model} lora --rank={extract_rank}\"\n",
148
+ "\n",
149
+ "if task in [\"merge\", \"moe\", \"merge-mega\"]:\n",
150
+ " cli += \" config.yaml merge --copy-tokenizer --allow-crimes\"\n",
151
+ " if runtime == \"GPU\":\n",
152
+ " if task in [\"merge\", \"merge-mega\"]:\n",
153
+ " cli += \" --cuda\"\n",
154
+ " elif task == \"moe\":\n",
155
+ " cli += \" --device cuda --cuda\"\n",
156
+ " else:\n",
157
+ " cli += \" --no-cuda\"\n",
158
+ "\n",
159
+ " if trust_remote_code:\n",
160
+ " cli += \" --trust-remote-code\"\n",
161
+ " if clone_tensors:\n",
162
+ " cli += \" --clone-tensors\"\n",
163
+ " if low_ram:\n",
164
+ " cli += f\" --out-shard-size {out_shard_size} --lazy-unpickle\"\n",
165
+ " if low_cpu_ram:\n",
166
+ " cli += \" --low-cpu-memory\"\n",
167
+ "print(cli)\n",
168
+ "!{cli}"
169
+ ]
170
+ },
171
+ {
172
+ "cell_type": "markdown",
173
+ "metadata": {
174
+ "id": "HyeGrtGrDn6S"
175
+ },
176
+ "source": [
177
+ "# Inference the Model"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": null,
183
+ "metadata": {
184
+ "id": "wpy7Ahw6hghH"
185
+ },
186
+ "outputs": [],
187
+ "source": [
188
+ "!pip install -qU transformers bitsandbytes accelerate\n",
189
+ "from transformers import AutoTokenizer, pipeline\n",
190
+ "import torch\n",
191
+ "\n",
192
+ "model = \"merge\"\n",
193
+ "\n",
194
+ "tokenizer = AutoTokenizer.from_pretrained(model)\n",
195
+ "generator = pipeline(\n",
196
+ " \"text-generation\",\n",
197
+ " model=model,\n",
198
+ " model_kwargs={\"torch_dtype\": torch.float16, \"load_in_4bit\": False},\n",
199
+ ")"
200
+ ]
201
+ },
202
+ {
203
+ "cell_type": "code",
204
+ "execution_count": null,
205
+ "metadata": {
206
+ "id": "f05D7q8wiF-5"
207
+ },
208
+ "outputs": [],
209
+ "source": [
210
+ "messages = [{\"role\": \"user\", \"content\": \"Explain what a Mixture of Experts is in less than 100 words.\"}]\n",
211
+ "prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n",
212
+ "outputs = generator(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)\n",
213
+ "print(outputs[0][\"generated_text\"])"
214
+ ]
215
+ },
216
+ {
217
+ "cell_type": "markdown",
218
+ "metadata": {},
219
+ "source": [
220
+ "# Upload to Hugging Face"
221
+ ]
222
+ },
223
+ {
224
+ "cell_type": "code",
225
+ "execution_count": null,
226
+ "metadata": {},
227
+ "outputs": [],
228
+ "source": [
229
+ "# @title ## Upload model to Hugging Face { display-mode: \"form\" }\n",
230
+ "# @markdown Enter your HF username and the name of Colab secret that stores your [Hugging Face access token](https://huggingface.co/settings/tokens).\n",
231
+ "username = 'username' # @param {type:\"string\"}\n",
232
+ "token_env = 'hf_token' # @param {type:\"string\"}\n",
233
+ "\n",
234
+ "!pip install -qU huggingface_hub\n",
235
+ "\n",
236
+ "import yaml\n",
237
+ "\n",
238
+ "from huggingface_hub import HfApi\n",
239
+ "from google.colab import userdata\n",
240
+ "\n",
241
+ "def output_dir():\n",
242
+ " if os.path.exists('merge') and os.listdir('merge'):\n",
243
+ " return \"merge\"\n",
244
+ " if os.path.exists('lora') and os.listdir('lora'):\n",
245
+ " return \"lora\"\n",
246
+ " raise ValueError(\"Both folders are empty or do not exist.\")\n",
247
+ "\n",
248
+ "\n",
249
+ "# Defined in the secrets tab in Google Colab\n",
250
+ "api = HfApi(token=userdata.get(token_env))\n",
251
+ "try:\n",
252
+ " output_dir=output_dir()\n",
253
+ " api.create_repo(\n",
254
+ " repo_id=f\"{username}/{MODEL_NAME}\",\n",
255
+ " repo_type=\"model\",\n",
256
+ " exist_ok=True,\n",
257
+ " )\n",
258
+ " api.upload_folder(\n",
259
+ " repo_id=f\"{username}/{MODEL_NAME}\",\n",
260
+ " folder_path=output_dir,\n",
261
+ " )\n",
262
+ "except ValueError as e:\n",
263
+ " print(e)"
264
+ ]
265
+ }
266
+ ],
267
+ "metadata": {
268
+ "kernelspec": {
269
+ "display_name": "Python 3",
270
+ "name": "python3"
271
+ },
272
+ "language_info": {
273
+ "name": "python",
274
+ "version": "3.11.9"
275
+ }
276
+ },
277
+ "nbformat": 4,
278
+ "nbformat_minor": 0
279
+ }