{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Collect images" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Save images from below websites with Firefox.\n", "- https://www.dreamstime.com/photos-images/corgi-butt.html\n", "- https://www.pinterest.com/I_love_Corgi/corgi-butt/" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\hiroga\\miniconda3\\envs\\fine-tuning-vgg16-bread-or-dog\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from datasets import load_dataset\n", "\n", "stanford_dogs_dataset = load_dataset(\"Alanox/stanford-dogs\", split=\"full\", trust_remote_code=True)\n", "# OR !kaggle datasets download -d jessicali9530/stanford-dogs-dataset -p \"data\" -q" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dataset({\n", " features: ['name', 'annotations', 'target', 'image'],\n", " num_rows: 20580\n", "})" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stanford_dogs_dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset\n", "\n", "bread_dataset = load_dataset(\"imagefolder\", data_dir=\"data/images.cv_fg0xp9w733695pvws1a4yh/data\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['image', 'label'],\n", " num_rows: 1478\n", " })\n", " validation: Dataset({\n", " features: ['image', 'label'],\n", " num_rows: 240\n", " })\n", " test: Dataset({\n", " features: ['image', 'label'],\n", " num_rows: 738\n", " })\n", "})" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bread_dataset" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Bedlington Terrier', 'Clumber', 'Bluetick', 'German Short Haired Pointer', 'Labrador Retriever', 'Bernese Mountain Dog', 'Saluki', 'German Shepherd', 'Komondor', 'Kuvasz', 'Weimaraner', 'Great Pyrenees', 'Rottweiler', 'Pekinese', 'Gordon Setter', 'Tibetan Terrier', 'Soft Coated Wheaten Terrier', 'Brittany Spaniel', 'Leonberg', 'English Foxhound', 'Collie', 'Basset', 'Wire Haired Fox Terrier', 'Norwegian Elkhound', 'Chesapeake Bay Retriever', 'Cardigan', 'Borzoi', 'Border Collie', 'Malamute', 'Australian Terrier', 'Silky Terrier', 'Affenpinscher', 'Pomeranian', 'American Staffordshire Terrier', 'Otterhound', 'Staffordshire Bullterrier', 'West Highland White Terrier', 'Boston Bull', 'Redbone', 'Irish Water Spaniel', 'Giant Schnauzer', 'Flat Coated Retriever', 'Norwich Terrier', 'Dhole', 'Airedale', 'Miniature Poodle', 'Malinois', 'Sealyham Terrier', 'Cairn', 'Eskimo Dog', 'Siberian Husky', 'Papillon', 'Greater Swiss Mountain Dog', 'Sussex Spaniel', 'African Hunting Dog', 'Pembroke', 'Dingo', 'Appenzeller', 'Irish Setter', 'Kelpie', 'Brabancon Griffon', 'Groenendael', 'Norfolk Terrier', 'Lakeland Terrier', 'Italian Greyhound', 'Great Dane', 'Yorkshire Terrier', 'Miniature Schnauzer', 'Dandie Dinmont', 'Maltese Dog', 'Border Terrier', 'Rhodesian Ridgeback', 'Blenheim Spaniel', 'Miniature Pinscher', 'Japanese Spaniel', 'Afghan Hound', 'Toy Poodle', 'Old English Sheepdog', 'Doberman', 'Golden Retriever', 'Samoyed', 'Standard Schnauzer', 'Ibizan Hound', 'Mexican Hairless', 'Bouvier Des Flandres', 'Shih Tzu', 'Irish Terrier', 'Standard Poodle', 'Cocker Spaniel', 'Pug', 'Walker Hound', 'Bull Mastiff', 'Toy Terrier', 'Chihuahua', 'Beagle', 'Newfoundland', 'Black And Tan Coonhound', 'Welsh Springer Spaniel', 'Kerry Blue Terrier', 'French Bulldog', 'Tibetan Mastiff', 'English Setter', 'Boxer', 'Curly Coated Retriever', 'Irish Wolfhound', 'Shetland Sheepdog', 'Briard', 'Bloodhound', 'Saint Bernard', 'Whippet', 'Basenji', 'English Springer', 'Scotch Terrier', 'Entlebucher', 'Scottish Deerhound', 'Lhasa', 'Vizsla', 'Keeshond', 'Schipperke', 'Chow']\n" ] } ], "source": [ "unique_targets = stanford_dogs_dataset.unique('target')\n", "print(unique_targets)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "181\n" ] } ], "source": [ "# もしコーギー(Pembroke)だけで数百件あればそれを使い、なければ犬の画像すべてを使う\n", "pembroke_count = sum(target == 'Pembroke' for target in stanford_dogs_dataset['target'])\n", "print(pembroke_count)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Add a new column 'dog_or_bread' to the stanford_dogs_dataset and bread_dataset\n", "bread_dataset = bread_dataset.map(lambda example: {'bread_or_dog': 0})\n", "stanford_dogs_dataset = stanford_dogs_dataset.map(lambda example: {'bread_or_dog': 1})" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dataset({\n", " features: ['name', 'annotations', 'target', 'image', 'bread_or_dog'],\n", " num_rows: 20580\n", "})" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stanford_dogs_dataset" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['name', 'annotations', 'target', 'image', 'bread_or_dog'],\n", " num_rows: 16464\n", " })\n", " test: Dataset({\n", " features: ['name', 'annotations', 'target', 'image', 'bread_or_dog'],\n", " num_rows: 2058\n", " })\n", " validation: Dataset({\n", " features: ['name', 'annotations', 'target', 'image', 'bread_or_dog'],\n", " num_rows: 2058\n", " })\n", "})" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from datasets import DatasetDict\n", "\n", "train_test_dataset = stanford_dogs_dataset.train_test_split(test_size=0.2)\n", "test_valid_dataset = train_test_dataset[\"test\"].train_test_split(test_size=0.5)\n", "stanford_dogs_dataset_dict = DatasetDict({\n", " \"train\": train_test_dataset[\"train\"],\n", " \"test\": test_valid_dataset[\"train\"],\n", " \"validation\": test_valid_dataset[\"test\"]\n", "})\n", "stanford_dogs_dataset_dict" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['name', 'annotations', 'target', 'image', 'bread_or_dog', 'label'],\n", " num_rows: 17942\n", " })\n", " validation: Dataset({\n", " features: ['name', 'annotations', 'target', 'image', 'bread_or_dog', 'label'],\n", " num_rows: 2298\n", " })\n", " test: Dataset({\n", " features: ['name', 'annotations', 'target', 'image', 'bread_or_dog', 'label'],\n", " num_rows: 2796\n", " })\n", "})\n" ] } ], "source": [ "from datasets import concatenate_datasets\n", "\n", "# Concatenate the datasets for each split\n", "merged_train_dataset = concatenate_datasets([stanford_dogs_dataset_dict['train'], bread_dataset['train']])\n", "merged_validation_dataset = concatenate_datasets([stanford_dogs_dataset_dict['validation'], bread_dataset['validation']])\n", "merged_test_dataset = concatenate_datasets([stanford_dogs_dataset_dict['test'], bread_dataset['test']])\n", "merged_dataset = DatasetDict({\n", " \"train\": merged_train_dataset,\n", " \"validation\": merged_validation_dataset,\n", " \"test\": merged_test_dataset\n", "})\n", "\n", "print(merged_dataset)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Inspect model" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "VGG(\n", " (features): Sequential(\n", " (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (1): ReLU(inplace=True)\n", " (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (3): ReLU(inplace=True)\n", " (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", " (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (6): ReLU(inplace=True)\n", " (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (8): ReLU(inplace=True)\n", " (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", " (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (11): ReLU(inplace=True)\n", " (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (13): ReLU(inplace=True)\n", " (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (15): ReLU(inplace=True)\n", " (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", " (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (18): ReLU(inplace=True)\n", " (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (20): ReLU(inplace=True)\n", " (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (22): ReLU(inplace=True)\n", " (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", " (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (25): ReLU(inplace=True)\n", " (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (27): ReLU(inplace=True)\n", " (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", " (29): ReLU(inplace=True)\n", " (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", " )\n", " (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))\n", " (classifier): Sequential(\n", " (0): Linear(in_features=25088, out_features=4096, bias=True)\n", " (1): ReLU(inplace=True)\n", " (2): Dropout(p=0.5, inplace=False)\n", " (3): Linear(in_features=4096, out_features=4096, bias=True)\n", " (4): ReLU(inplace=True)\n", " (5): Dropout(p=0.5, inplace=False)\n", " (6): Linear(in_features=4096, out_features=1000, bias=True)\n", " )\n", ")\n" ] } ], "source": [ "from torchvision import models\n", "\n", "model = models.vgg16()\n", "print(model)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "name='features.0.weight'\n", "name='features.0.bias'\n", "name='features.2.weight'\n", "name='features.2.bias'\n", "name='features.5.weight'\n", "name='features.5.bias'\n", "name='features.7.weight'\n", "name='features.7.bias'\n", "name='features.10.weight'\n", "name='features.10.bias'\n", "name='features.12.weight'\n", "name='features.12.bias'\n", "name='features.14.weight'\n", "name='features.14.bias'\n", "name='features.17.weight'\n", "name='features.17.bias'\n", "name='features.19.weight'\n", "name='features.19.bias'\n", "name='features.21.weight'\n", "name='features.21.bias'\n", "name='features.24.weight'\n", "name='features.24.bias'\n", "name='features.26.weight'\n", "name='features.26.bias'\n", "name='features.28.weight'\n", "name='features.28.bias'\n", "name='classifier.0.weight'\n", "name='classifier.0.bias'\n", "name='classifier.3.weight'\n", "name='classifier.3.bias'\n", "name='classifier.6.weight'\n", "name='classifier.6.bias'\n" ] } ], "source": [ "for name, _param in model.named_parameters():\n", " print(f\"{name=}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fine Tuning" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import wandb\n", "\n", "def train(model, criterion, optimizer, dataloaders_dict, num_epochs, device):\n", " model.to(device)\n", "\n", " for epoch in range(num_epochs):\n", " print('Epoch {}/{}'.format(epoch + 1, num_epochs))\n", " print('-------------')\n", " \n", " for phase in ['train', 'validation']:\n", " if phase == 'train':\n", " model.train()\n", " else:\n", " model.eval()\n", "\n", " epoch_loss = 0.0\n", " epoch_corrects = 0\n", " \n", " for batch in dataloaders_dict[phase]:\n", " images, labels = batch[\"image\"], batch[\"bread_or_dog\"]\n", " images, labels = images.to(device), labels.to(device)\n", "\n", " optimizer.zero_grad()\n", " \n", " # 学習時のみ勾配を計算させる設定にする\n", " with torch.set_grad_enabled(phase == 'train'):\n", " outputs = model(images)\n", " \n", " # 損失を計算\n", " loss = criterion(outputs, labels)\n", " \n", " # ラベルを予測\n", " _, preds = torch.max(outputs, 1)\n", "\n", " if phase == 'train':\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # イテレーション結果の計算\n", " # lossの合計を更新\n", " # PyTorchの仕様上各バッチ内での平均のlossが計算される。\n", " # データ数を掛けることで平均から合計に変換をしている。\n", " # 損失和は「全データの損失/データ数」で計算されるため、\n", " # 平均のままだと損失和を求めることができないため。\n", " epoch_loss += loss.item() * images.size(0)\n", " \n", " # 正解数の合計を更新\n", " epoch_corrects += torch.sum(preds == labels.data)\n", "\n", " # epochごとのlossと正解率を表示\n", " epoch_loss = epoch_loss / len(dataloaders_dict[phase].dataset)\n", " epoch_acc = epoch_corrects.double() / len(dataloaders_dict[phase].dataset)\n", "\n", " log = {\n", " \"epoch\": epoch +1,\n", " \"phase\": phase,\n", " f\"{phase}_loss\": epoch_loss,\n", " f\"{phase}_acc\": epoch_acc,\n", " }\n", " print(log)\n", " wandb.log(log)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Finishing last run (ID:nw4ysgpu) before initializing another..." ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "

Run history:


epoch▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
train_acc▁▂▂▂▅▇████
train_loss█▆▅▃▃▂▂▁▁▁
validation_acc▁▁▁▄▇▇▆██▇
validation_loss█▆▅▃▂▂▂▁▁▁

Run summary:


epoch10
phasevalidation
train_acc0.98473
train_loss0.04198
validation_acc0.97998
validation_loss0.05055

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run resplendent-festival-20 at: https://wandb.ai/hiroga/bread-or-dog/runs/nw4ysgpu
Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Find logs at: .\\wandb\\run-20240217_062447-nw4ysgpu\\logs" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Successfully finished last run (ID:nw4ysgpu). Initializing new run:
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.16.3" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in c:\\Users\\hiroga\\Documents\\GitHub\\til\\computer-science\\machine-learning\\_src\\fine-tuning-vgg16-bread-or-dog\\wandb\\run-20240217_081017-vy00m3yf" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run abundant-festival-21 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/hiroga/bread-or-dog" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/hiroga/bread-or-dog/runs/vy00m3yf" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/10\n", "-------------\n", "{'epoch': 1, 'phase': 'train', 'train_loss': 0.07768695316224603, 'train_acc': tensor(0.9642, device='cuda:0', dtype=torch.float64)}\n", "{'epoch': 1, 'phase': 'validation', 'validation_loss': 0.07376273388806584, 'validation_acc': tensor(0.9752, device='cuda:0', dtype=torch.float64)}\n", "Epoch 2/10\n", "-------------\n", "{'epoch': 2, 'phase': 'train', 'train_loss': 0.039798663440396634, 'train_acc': tensor(0.9868, device='cuda:0', dtype=torch.float64)}\n", "{'epoch': 2, 'phase': 'validation', 'validation_loss': 0.07450082683805061, 'validation_acc': tensor(0.9704, device='cuda:0', dtype=torch.float64)}\n", "Epoch 3/10\n", "-------------\n", "{'epoch': 3, 'phase': 'train', 'train_loss': 0.038719125189200225, 'train_acc': tensor(0.9869, device='cuda:0', dtype=torch.float64)}\n", "{'epoch': 3, 'phase': 'validation', 'validation_loss': 0.08719978122943582, 'validation_acc': tensor(0.9608, device='cuda:0', dtype=torch.float64)}\n", "Epoch 4/10\n", "-------------\n" ] } ], "source": [ "import torch\n", "import wandb\n", "from safetensors.torch import save_file\n", "from torchvision import transforms\n", "from torch.utils.data import DataLoader\n", "\n", "model_name = \"vgg16\"\n", "model.classifier[6] = torch.nn.Linear(in_features=4096, out_features=2)\n", "\n", "features = [param for name, param in model.named_parameters() if \"features\" in name]\n", "classifier = [param for name, param in model.named_parameters() if \"classifier.0\" in name or \"classifier.3\" in name]\n", "last_classifier = [param for name, param in model.named_parameters() if \"classifier.6\" in name]\n", "param_groups = [\n", " {'params': features, 'lr': 1e-4},\n", " {'params': classifier, 'lr': 5e-4},\n", " {'params': last_classifier, 'lr': 1e-3},\n", "]\n", "momentum = 0.9\n", "\n", "batch_size = 64\n", "\n", "# torchvision の datasets とは違い、transforms をそのままセットすれば良いわけではないので留意。\n", "composed = transforms.Compose([\n", " transforms.Resize((224, 224)), # Resize all images to 224x224\n", " transforms.ToTensor(), # Convert images to PyTorch tensors\n", "])\n", "def transform(batch):\n", " tensors = [composed(img) for img in batch['image']]\n", " return {\"image\": tensors, \"bread_or_dog\": batch[\"bread_or_dog\"]}\n", "\n", "merged_dataset['train'].set_transform(transform, [\"image\", \"bread_or_dog\"])\n", "merged_dataset['validation'].set_transform(transform, [\"image\", \"bread_or_dog\"])\n", "\n", "# Assuming that the datasets 'train' and 'validation' are available in the dataloaders_dict\n", "train_dataloader = DataLoader(merged_dataset['train'], batch_size=batch_size, shuffle=True)\n", "valid_dataloader = DataLoader(merged_dataset['validation'], batch_size=batch_size, shuffle=False)\n", "dataloaders_dict = {\n", " \"train\": train_dataloader,\n", " \"validation\": valid_dataloader\n", "}\n", "\n", "num_epochs = 10\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "\n", "wandb.init(\n", " project=\"bread-or-dog\",\n", " config={\n", " \"model_name\": model_name,\n", " \"architecture\": \"CNN\",\n", " \"dataset\": [\"Alanox/stanford-dogs\", \"images.cv_fg0xp9w733695pvws1a4yh\"],\n", " \"param_groups\": param_groups,\n", " \"num_epoch\": num_epochs,\n", " \"momentum\": momentum,\n", " \"device\": device\n", " }\n", ")\n", "\n", "criterion = torch.nn.CrossEntropyLoss()\n", "optimizer = torch.optim.SGD(param_groups, momentum=momentum)\n", "\n", "\n", "train(model, criterion, optimizer, dataloaders_dict, num_epochs=num_epochs, device=device)\n", "\n", "save_file(model.state_dict(), f\"models/snapshots/{model_name}_epoch{num_epochs}.safetensors\")\n", "\n", "wandb.log_artifact(model)\n", "\n", "model.to_onnx()\n", "wandb.save(\"model.onnx\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "fune-tuning-vgg16-bread-or-dog", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.1" } }, "nbformat": 4, "nbformat_minor": 2 }