derek-thomas HF staff commited on
Commit
b692321
·
1 Parent(s): 9ef5a14

Updating for A100 training

Browse files
Files changed (1) hide show
  1. 02-autotrain.ipynb +192 -50
02-autotrain.ipynb CHANGED
@@ -50,7 +50,7 @@
50
  {
51
  "data": {
52
  "application/vnd.jupyter.widget-view+json": {
53
- "model_id": "928f44f483504b438e0fdbd4df3d7dd5",
54
  "version_major": 2,
55
  "version_minor": 0
56
  },
@@ -111,10 +111,10 @@
111
  " },\n",
112
  " \"params\": {\n",
113
  " \"block_size\": 512,\n",
114
- " \"model_max_length\": 1500,\n",
115
  " \"epochs\": 4,\n",
116
  " \"batch_size\": 8,\n",
117
- " \"lr\": 1e-6,\n",
118
  " \"peft\": True,\n",
119
  " \"quantization\": \"int4\",\n",
120
  " \"target_modules\": \"all-linear\",\n",
@@ -191,55 +191,55 @@
191
  "output_type": "stream",
192
  "text": [
193
  "Running autotrain with config: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
194
- "INFO | 2025-01-08 14:33:16 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
195
- "INFO | 2025-01-08 14:33:16 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
196
- "INFO | 2025-01-08 14:33:16 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
197
- "INFO | 2025-01-08 14:33:16 | autotrain.parser:run:224 - {'model': 'tiiuae/Falcon3-7B-Instruct', 'project_name': 'falcon-v03-poe-RFA-gpt3-5', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-falcon-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-07, 'epochs': 4, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'tokenizer', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_RFA_gpt3_5', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
198
- "INFO | 2025-01-08 14:33:23 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-RFA-gpt3-5\n",
199
  "\n",
200
  "---\n",
201
  "https://huggingface.co/spaces/derek-thomas/autotrain-falcon-v03-poe-RFA-gpt3-5\n",
202
  "---\n",
203
  "\n",
204
  "Running autotrain with config: ./autotrain_configs/conversation_RFA_falcon.yml\n",
205
- "INFO | 2025-01-08 14:33:26 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_RFA_falcon.yml\n",
206
- "INFO | 2025-01-08 14:33:26 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
207
- "INFO | 2025-01-08 14:33:26 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
208
- "INFO | 2025-01-08 14:33:26 | autotrain.parser:run:224 - {'model': 'tiiuae/Falcon3-7B-Instruct', 'project_name': 'falcon-v03-poe-RFA-falcon', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-falcon-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-07, 'epochs': 4, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'tokenizer', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_RFA_falcon', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
209
- "INFO | 2025-01-08 14:33:32 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-RFA-falcon\n",
210
  "\n",
211
  "---\n",
212
  "https://huggingface.co/spaces/derek-thomas/autotrain-falcon-v03-poe-RFA-falcon\n",
213
  "---\n",
214
  "\n",
215
  "Running autotrain with config: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
216
- "INFO | 2025-01-08 14:33:36 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
217
- "INFO | 2025-01-08 14:33:36 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
218
- "INFO | 2025-01-08 14:33:36 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
219
- "INFO | 2025-01-08 14:33:36 | autotrain.parser:run:224 - {'model': 'tiiuae/Falcon3-7B-Instruct', 'project_name': 'falcon-v03-poe-FAR-gpt3-5', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-falcon-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-07, 'epochs': 4, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'tokenizer', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FAR_gpt3_5', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
220
- "INFO | 2025-01-08 14:33:41 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-FAR-gpt3-5\n",
221
  "\n",
222
  "---\n",
223
  "https://huggingface.co/spaces/derek-thomas/autotrain-falcon-v03-poe-FAR-gpt3-5\n",
224
  "---\n",
225
  "\n",
226
  "Running autotrain with config: ./autotrain_configs/conversation_FAR_falcon.yml\n",
227
- "INFO | 2025-01-08 14:33:45 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_FAR_falcon.yml\n",
228
- "INFO | 2025-01-08 14:33:45 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
229
- "INFO | 2025-01-08 14:33:45 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
230
- "INFO | 2025-01-08 14:33:45 | autotrain.parser:run:224 - {'model': 'tiiuae/Falcon3-7B-Instruct', 'project_name': 'falcon-v03-poe-FAR-falcon', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-falcon-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-07, 'epochs': 4, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'tokenizer', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FAR_falcon', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
231
- "INFO | 2025-01-08 14:33:51 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-FAR-falcon\n",
232
  "\n",
233
  "---\n",
234
  "https://huggingface.co/spaces/derek-thomas/autotrain-falcon-v03-poe-FAR-falcon\n",
235
  "---\n",
236
  "\n",
237
  "Running autotrain with config: ./autotrain_configs/conversation_FA.yml\n",
238
- "INFO | 2025-01-08 14:33:54 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_FA.yml\n",
239
- "INFO | 2025-01-08 14:33:54 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
240
- "INFO | 2025-01-08 14:33:54 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
241
- "INFO | 2025-01-08 14:33:54 | autotrain.parser:run:224 - {'model': 'tiiuae/Falcon3-7B-Instruct', 'project_name': 'falcon-v03-poe-FA', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-falcon-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-07, 'epochs': 4, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'tokenizer', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FA', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
242
- "INFO | 2025-01-08 14:34:00 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-FA\n",
243
  "\n",
244
  "---\n",
245
  "https://huggingface.co/spaces/derek-thomas/autotrain-falcon-v03-poe-FA\n",
@@ -284,7 +284,7 @@
284
  },
285
  {
286
  "cell_type": "code",
287
- "execution_count": 7,
288
  "id": "adf09687-ab1e-4f1e-8bf9-317cc928467a",
289
  "metadata": {},
290
  "outputs": [],
@@ -295,7 +295,7 @@
295
  },
296
  {
297
  "cell_type": "code",
298
- "execution_count": 8,
299
  "id": "19d80d26-cda4-41fb-a125-06060c3f90ce",
300
  "metadata": {},
301
  "outputs": [
@@ -335,10 +335,82 @@
335
  ]
336
  },
337
  {
338
- "metadata": {},
339
  "cell_type": "code",
340
- "outputs": [],
341
- "execution_count": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  "source": [
343
  "for space in autotrain_spaces:\n",
344
  " confirm = input(f\"Are you sure you want to delete the space '{space}'? (y/n): \")\n",
@@ -347,35 +419,105 @@
347
  " print(f\"Deleted {space}\")\n",
348
  " else:\n",
349
  " print(f\"Skipped {space}\")\n"
350
- ],
351
- "id": "de150f086f1c72fa"
352
  },
353
  {
354
- "metadata": {},
355
  "cell_type": "markdown",
 
 
356
  "source": [
357
  "<span style=\"color:red; font-size:20px; font-weight:bold;\">\n",
358
  "ONLY RUN THIS IF YOU NEED TO RESTART FROM SCRATCH\n",
359
  "THIS WILL DELETE YOUR MODELS\n",
360
  "</span>\n"
361
- ],
362
- "id": "e6aa4788fe8b0297"
363
  },
364
  {
365
- "metadata": {},
366
  "cell_type": "code",
367
- "outputs": [],
368
- "execution_count": null,
369
- "source": [
370
- "# for model in autotrain_models:\n",
371
- "# confirm = input(f\"Are you sure you want to delete the model '{model}'? (y/n): \")\n",
372
- "# if confirm.lower() == 'y':\n",
373
- "# api.delete_repo(model, repo_type='model')\n",
374
- "# print(f\"Deleted {model}\")\n",
375
- "# else:\n",
376
- "# print(f\"Skipped {model}\")\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  ],
378
- "id": "c887e61558785e69"
 
 
 
 
 
 
 
 
379
  },
380
  {
381
  "cell_type": "code",
 
50
  {
51
  "data": {
52
  "application/vnd.jupyter.widget-view+json": {
53
+ "model_id": "b5b27c90c4e849e6aa04587d526e108f",
54
  "version_major": 2,
55
  "version_minor": 0
56
  },
 
111
  " },\n",
112
  " \"params\": {\n",
113
  " \"block_size\": 512,\n",
114
+ " \"model_max_length\": 1750,\n",
115
  " \"epochs\": 4,\n",
116
  " \"batch_size\": 8,\n",
117
+ " \"lr\": 5e-5,\n",
118
  " \"peft\": True,\n",
119
  " \"quantization\": \"int4\",\n",
120
  " \"target_modules\": \"all-linear\",\n",
 
191
  "output_type": "stream",
192
  "text": [
193
  "Running autotrain with config: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
194
+ "INFO | 2025-01-10 13:45:51 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
195
+ "INFO | 2025-01-10 13:45:51 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
196
+ "INFO | 2025-01-10 13:45:51 | autotrain.parser:__post_init__:166 - Using backend: spaces-a100-large\n",
197
+ "INFO | 2025-01-10 13:45:51 | autotrain.parser:run:224 - {'model': 'tiiuae/Falcon3-7B-Instruct', 'project_name': 'falcon-v03-poe-RFA-gpt3-5', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-falcon-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1750, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 5e-05, 'epochs': 4, 'batch_size': 8, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'tokenizer', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_RFA_gpt3_5', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
198
+ "INFO | 2025-01-10 13:45:54 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-RFA-gpt3-5\n",
199
  "\n",
200
  "---\n",
201
  "https://huggingface.co/spaces/derek-thomas/autotrain-falcon-v03-poe-RFA-gpt3-5\n",
202
  "---\n",
203
  "\n",
204
  "Running autotrain with config: ./autotrain_configs/conversation_RFA_falcon.yml\n",
205
+ "INFO | 2025-01-10 13:45:58 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_RFA_falcon.yml\n",
206
+ "INFO | 2025-01-10 13:45:58 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
207
+ "INFO | 2025-01-10 13:45:58 | autotrain.parser:__post_init__:166 - Using backend: spaces-a100-large\n",
208
+ "INFO | 2025-01-10 13:45:58 | autotrain.parser:run:224 - {'model': 'tiiuae/Falcon3-7B-Instruct', 'project_name': 'falcon-v03-poe-RFA-falcon', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-falcon-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1750, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 5e-05, 'epochs': 4, 'batch_size': 8, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'tokenizer', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_RFA_falcon', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
209
+ "INFO | 2025-01-10 13:46:01 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-RFA-falcon\n",
210
  "\n",
211
  "---\n",
212
  "https://huggingface.co/spaces/derek-thomas/autotrain-falcon-v03-poe-RFA-falcon\n",
213
  "---\n",
214
  "\n",
215
  "Running autotrain with config: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
216
+ "INFO | 2025-01-10 13:46:05 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
217
+ "INFO | 2025-01-10 13:46:05 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
218
+ "INFO | 2025-01-10 13:46:05 | autotrain.parser:__post_init__:166 - Using backend: spaces-a100-large\n",
219
+ "INFO | 2025-01-10 13:46:05 | autotrain.parser:run:224 - {'model': 'tiiuae/Falcon3-7B-Instruct', 'project_name': 'falcon-v03-poe-FAR-gpt3-5', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-falcon-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1750, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 5e-05, 'epochs': 4, 'batch_size': 8, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'tokenizer', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FAR_gpt3_5', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
220
+ "INFO | 2025-01-10 13:46:08 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-FAR-gpt3-5\n",
221
  "\n",
222
  "---\n",
223
  "https://huggingface.co/spaces/derek-thomas/autotrain-falcon-v03-poe-FAR-gpt3-5\n",
224
  "---\n",
225
  "\n",
226
  "Running autotrain with config: ./autotrain_configs/conversation_FAR_falcon.yml\n",
227
+ "INFO | 2025-01-10 13:46:13 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_FAR_falcon.yml\n",
228
+ "INFO | 2025-01-10 13:46:13 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
229
+ "INFO | 2025-01-10 13:46:13 | autotrain.parser:__post_init__:166 - Using backend: spaces-a100-large\n",
230
+ "INFO | 2025-01-10 13:46:13 | autotrain.parser:run:224 - {'model': 'tiiuae/Falcon3-7B-Instruct', 'project_name': 'falcon-v03-poe-FAR-falcon', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-falcon-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1750, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 5e-05, 'epochs': 4, 'batch_size': 8, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'tokenizer', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FAR_falcon', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
231
+ "INFO | 2025-01-10 13:46:16 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-FAR-falcon\n",
232
  "\n",
233
  "---\n",
234
  "https://huggingface.co/spaces/derek-thomas/autotrain-falcon-v03-poe-FAR-falcon\n",
235
  "---\n",
236
  "\n",
237
  "Running autotrain with config: ./autotrain_configs/conversation_FA.yml\n",
238
+ "INFO | 2025-01-10 13:46:20 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_FA.yml\n",
239
+ "INFO | 2025-01-10 13:46:20 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
240
+ "INFO | 2025-01-10 13:46:20 | autotrain.parser:__post_init__:166 - Using backend: spaces-a100-large\n",
241
+ "INFO | 2025-01-10 13:46:20 | autotrain.parser:run:224 - {'model': 'tiiuae/Falcon3-7B-Instruct', 'project_name': 'falcon-v03-poe-FA', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-falcon-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1750, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 5e-05, 'epochs': 4, 'batch_size': 8, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'tokenizer', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FA', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
242
+ "INFO | 2025-01-10 13:46:23 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-FA\n",
243
  "\n",
244
  "---\n",
245
  "https://huggingface.co/spaces/derek-thomas/autotrain-falcon-v03-poe-FA\n",
 
284
  },
285
  {
286
  "cell_type": "code",
287
+ "execution_count": 9,
288
  "id": "adf09687-ab1e-4f1e-8bf9-317cc928467a",
289
  "metadata": {},
290
  "outputs": [],
 
295
  },
296
  {
297
  "cell_type": "code",
298
+ "execution_count": 7,
299
  "id": "19d80d26-cda4-41fb-a125-06060c3f90ce",
300
  "metadata": {},
301
  "outputs": [
 
335
  ]
336
  },
337
  {
 
338
  "cell_type": "code",
339
+ "execution_count": 10,
340
+ "id": "de150f086f1c72fa",
341
+ "metadata": {},
342
+ "outputs": [
343
+ {
344
+ "name": "stdin",
345
+ "output_type": "stream",
346
+ "text": [
347
+ "Are you sure you want to delete the space 'derek-thomas/autotrain-falcon-v03-poe-RFA-gpt3-5'? (y/n): y\n"
348
+ ]
349
+ },
350
+ {
351
+ "name": "stdout",
352
+ "output_type": "stream",
353
+ "text": [
354
+ "Deleted derek-thomas/autotrain-falcon-v03-poe-RFA-gpt3-5\n"
355
+ ]
356
+ },
357
+ {
358
+ "name": "stdin",
359
+ "output_type": "stream",
360
+ "text": [
361
+ "Are you sure you want to delete the space 'derek-thomas/autotrain-falcon-v03-poe-RFA-falcon'? (y/n): y\n"
362
+ ]
363
+ },
364
+ {
365
+ "name": "stdout",
366
+ "output_type": "stream",
367
+ "text": [
368
+ "Deleted derek-thomas/autotrain-falcon-v03-poe-RFA-falcon\n"
369
+ ]
370
+ },
371
+ {
372
+ "name": "stdin",
373
+ "output_type": "stream",
374
+ "text": [
375
+ "Are you sure you want to delete the space 'derek-thomas/autotrain-falcon-v03-poe-FAR-gpt3-5'? (y/n): y\n"
376
+ ]
377
+ },
378
+ {
379
+ "name": "stdout",
380
+ "output_type": "stream",
381
+ "text": [
382
+ "Deleted derek-thomas/autotrain-falcon-v03-poe-FAR-gpt3-5\n"
383
+ ]
384
+ },
385
+ {
386
+ "name": "stdin",
387
+ "output_type": "stream",
388
+ "text": [
389
+ "Are you sure you want to delete the space 'derek-thomas/autotrain-falcon-v03-poe-FAR-falcon'? (y/n): y\n"
390
+ ]
391
+ },
392
+ {
393
+ "name": "stdout",
394
+ "output_type": "stream",
395
+ "text": [
396
+ "Deleted derek-thomas/autotrain-falcon-v03-poe-FAR-falcon\n"
397
+ ]
398
+ },
399
+ {
400
+ "name": "stdin",
401
+ "output_type": "stream",
402
+ "text": [
403
+ "Are you sure you want to delete the space 'derek-thomas/autotrain-falcon-v03-poe-FA'? (y/n): y\n"
404
+ ]
405
+ },
406
+ {
407
+ "name": "stdout",
408
+ "output_type": "stream",
409
+ "text": [
410
+ "Deleted derek-thomas/autotrain-falcon-v03-poe-FA\n"
411
+ ]
412
+ }
413
+ ],
414
  "source": [
415
  "for space in autotrain_spaces:\n",
416
  " confirm = input(f\"Are you sure you want to delete the space '{space}'? (y/n): \")\n",
 
419
  " print(f\"Deleted {space}\")\n",
420
  " else:\n",
421
  " print(f\"Skipped {space}\")\n"
422
+ ]
 
423
  },
424
  {
 
425
  "cell_type": "markdown",
426
+ "id": "e6aa4788fe8b0297",
427
+ "metadata": {},
428
  "source": [
429
  "<span style=\"color:red; font-size:20px; font-weight:bold;\">\n",
430
  "ONLY RUN THIS IF YOU NEED TO RESTART FROM SCRATCH\n",
431
  "THIS WILL DELETE YOUR MODELS\n",
432
  "</span>\n"
433
+ ]
 
434
  },
435
  {
 
436
  "cell_type": "code",
437
+ "execution_count": 11,
438
+ "id": "c887e61558785e69",
439
+ "metadata": {},
440
+ "outputs": [
441
+ {
442
+ "name": "stdin",
443
+ "output_type": "stream",
444
+ "text": [
445
+ "Are you sure you want to delete the model 'derek-thomas/falcon-v03-poe-RFA-gpt3-5'? (y/n): y\n"
446
+ ]
447
+ },
448
+ {
449
+ "name": "stdout",
450
+ "output_type": "stream",
451
+ "text": [
452
+ "Deleted derek-thomas/falcon-v03-poe-RFA-gpt3-5\n"
453
+ ]
454
+ },
455
+ {
456
+ "name": "stdin",
457
+ "output_type": "stream",
458
+ "text": [
459
+ "Are you sure you want to delete the model 'derek-thomas/falcon-v03-poe-RFA-falcon'? (y/n): y\n"
460
+ ]
461
+ },
462
+ {
463
+ "name": "stdout",
464
+ "output_type": "stream",
465
+ "text": [
466
+ "Deleted derek-thomas/falcon-v03-poe-RFA-falcon\n"
467
+ ]
468
+ },
469
+ {
470
+ "name": "stdin",
471
+ "output_type": "stream",
472
+ "text": [
473
+ "Are you sure you want to delete the model 'derek-thomas/falcon-v03-poe-FAR-gpt3-5'? (y/n): y\n"
474
+ ]
475
+ },
476
+ {
477
+ "name": "stdout",
478
+ "output_type": "stream",
479
+ "text": [
480
+ "Deleted derek-thomas/falcon-v03-poe-FAR-gpt3-5\n"
481
+ ]
482
+ },
483
+ {
484
+ "name": "stdin",
485
+ "output_type": "stream",
486
+ "text": [
487
+ "Are you sure you want to delete the model 'derek-thomas/falcon-v03-poe-FAR-falcon'? (y/n): y\n"
488
+ ]
489
+ },
490
+ {
491
+ "name": "stdout",
492
+ "output_type": "stream",
493
+ "text": [
494
+ "Deleted derek-thomas/falcon-v03-poe-FAR-falcon\n"
495
+ ]
496
+ },
497
+ {
498
+ "name": "stdin",
499
+ "output_type": "stream",
500
+ "text": [
501
+ "Are you sure you want to delete the model 'derek-thomas/falcon-v03-poe-FA'? (y/n): y\n"
502
+ ]
503
+ },
504
+ {
505
+ "name": "stdout",
506
+ "output_type": "stream",
507
+ "text": [
508
+ "Deleted derek-thomas/falcon-v03-poe-FA\n"
509
+ ]
510
+ }
511
  ],
512
+ "source": [
513
+ "for model in autotrain_models:\n",
514
+ " confirm = input(f\"Are you sure you want to delete the model '{model}'? (y/n): \")\n",
515
+ " if confirm.lower() == 'y':\n",
516
+ " api.delete_repo(model, repo_type='model')\n",
517
+ " print(f\"Deleted {model}\")\n",
518
+ " else:\n",
519
+ " print(f\"Skipped {model}\")\n"
520
+ ]
521
  },
522
  {
523
  "cell_type": "code",