Spaces:

xhiroga
/

bread-or-dog

Sleeping

App Files Files Community

xhiroga commited on Feb 16, 2024

Commit

9f934ac

verified ·

1 Parent(s): 98d7597

Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

.gitignore +5 -0
README.md +34 -8
app.py +39 -0
data/.gitkeep +0 -0
environment.yml +0 -0
models/model.safetensors +3 -0
models/snapshots/.gitkeep +0 -0
requirements.txt +3 -0
train.ipynb +711 -0
vgg16.py +36 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+data/*
+!data/.gitkeep
+models/snapshots/*
+!models/snapshots/.gitkeep
+wandb

README.md CHANGED Viewed

@@ -1,12 +1,38 @@
 ---
-title: Bread Or Dog
-emoji: 👀
-colorFrom: purple
-colorTo: blue
-sdk: gradio
-sdk_version: 4.19.1
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: bread-or-dog
 app_file: app.py
+sdk: gradio
+sdk_version: 4.19.0
 ---
+# Fine-Tuning VGG16 to detect bread or dog
+See <>.
+## Prerequisites
+```powershell
+conda env create -f environment.yml
+conda activate fine-tuning-vgg16-bread-or-dog
+Invoke-WebRequest -Uri https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json -OutFile ./data/imagenet-simple-labels.json
+```
+## Demo
+```powershell
+# Before Fine-Tuning
+python vgg16.py
+# After Fine-Tunin
+python app.py
+```
+## Deploy
+```powershell
+gradio deploy
+```
+## References
+- [PyTorchの学習済みモデルで画像分類（VGG, ResNetなど） | note.nkmk.me](https://note.nkmk.me/python-pytorch-pretrained-models-image-classification/)
+- [PyTorchによるファインチューニングの実装 - 機械学習ともろもろ](https://venoda.hatenablog.com/entry/2020/10/18/014516)

app.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import gradio as gr
+import torch
+from PIL import Image
+from safetensors import safe_open
+from torchvision import models, transforms
+labels = ["bread", "dog"]
+model = models.vgg16(pretrained=True)
+model.classifier[6] = torch.nn.Linear(in_features=4096, out_features=2)
+model_save_path = "models/vgg16_epoch10.safetensors"
+tensors = {}
+with safe_open(model_save_path, framework="pt", device="cpu") as f:
+    for key in f.keys():
+        tensors[key] = f.get_tensor(key)
+model.load_state_dict(tensors, strict=False)
+model.eval()
+preprocess = transforms.Compose([
+    transforms.Resize((224, 224)),  # Resize all images to 224x224
+    transforms.ToTensor(),  # Convert images to PyTorch tensors
+])
+def classify_image(input_image: Image):
+    img_t = preprocess(input_image)
+    batch_t = torch.unsqueeze(img_t, 0)
+    with torch.no_grad():
+        output = model(batch_t)
+    probabilities = torch.nn.functional.softmax(output, dim=1)
+    label_to_prob = {labels[i]: prob for i, prob in enumerate(probabilities[0])}
+    return label_to_prob
+demo = gr.Interface(fn=classify_image, inputs=gr.Image(type='pil'), outputs='label')
+demo.launch()

data/.gitkeep ADDED Viewed

File without changes

environment.yml ADDED Viewed

Binary file (20.1 kB). View file

models/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6cf14bb076755b13494dcb1556be809b5adfe4254748799b8acff762f4336bf
+size 537077840

models/snapshots/.gitkeep ADDED Viewed

File without changes

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+safetensors
+torchvision

train.ipynb ADDED Viewed

	@@ -0,0 +1,711 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Collect images"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Save images from below websites with Firefox.\n",
+    "- https://www.dreamstime.com/photos-images/corgi-butt.html\n",
+    "- https://www.pinterest.com/I_love_Corgi/corgi-butt/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\hiroga\\miniconda3\\envs\\fine-tuning-vgg16-bread-or-dog\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "stanford_dogs_dataset = load_dataset(\"Alanox/stanford-dogs\", split=\"full\", trust_remote_code=True)\n",
+    "# OR !kaggle datasets download -d jessicali9530/stanford-dogs-dataset -p \"data\" -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Dataset({\n",
+       "    features: ['name', 'annotations', 'target', 'image'],\n",
+       "    num_rows: 20580\n",
+       "})"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stanford_dogs_dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "bread_dataset = load_dataset(\"imagefolder\", data_dir=\"data/images.cv_fg0xp9w733695pvws1a4yh/data\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DatasetDict({\n",
+       "    train: Dataset({\n",
+       "        features: ['image', 'label'],\n",
+       "        num_rows: 1478\n",
+       "    })\n",
+       "    validation: Dataset({\n",
+       "        features: ['image', 'label'],\n",
+       "        num_rows: 240\n",
+       "    })\n",
+       "    test: Dataset({\n",
+       "        features: ['image', 'label'],\n",
+       "        num_rows: 738\n",
+       "    })\n",
+       "})"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bread_dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Bedlington Terrier', 'Clumber', 'Bluetick', 'German Short Haired Pointer', 'Labrador Retriever', 'Bernese Mountain Dog', 'Saluki', 'German Shepherd', 'Komondor', 'Kuvasz', 'Weimaraner', 'Great Pyrenees', 'Rottweiler', 'Pekinese', 'Gordon Setter', 'Tibetan Terrier', 'Soft Coated Wheaten Terrier', 'Brittany Spaniel', 'Leonberg', 'English Foxhound', 'Collie', 'Basset', 'Wire Haired Fox Terrier', 'Norwegian Elkhound', 'Chesapeake Bay Retriever', 'Cardigan', 'Borzoi', 'Border Collie', 'Malamute', 'Australian Terrier', 'Silky Terrier', 'Affenpinscher', 'Pomeranian', 'American Staffordshire Terrier', 'Otterhound', 'Staffordshire Bullterrier', 'West Highland White Terrier', 'Boston Bull', 'Redbone', 'Irish Water Spaniel', 'Giant Schnauzer', 'Flat Coated Retriever', 'Norwich Terrier', 'Dhole', 'Airedale', 'Miniature Poodle', 'Malinois', 'Sealyham Terrier', 'Cairn', 'Eskimo Dog', 'Siberian Husky', 'Papillon', 'Greater Swiss Mountain Dog', 'Sussex Spaniel', 'African Hunting Dog', 'Pembroke', 'Dingo', 'Appenzeller', 'Irish Setter', 'Kelpie', 'Brabancon Griffon', 'Groenendael', 'Norfolk Terrier', 'Lakeland Terrier', 'Italian Greyhound', 'Great Dane', 'Yorkshire Terrier', 'Miniature Schnauzer', 'Dandie Dinmont', 'Maltese Dog', 'Border Terrier', 'Rhodesian Ridgeback', 'Blenheim Spaniel', 'Miniature Pinscher', 'Japanese Spaniel', 'Afghan Hound', 'Toy Poodle', 'Old English Sheepdog', 'Doberman', 'Golden Retriever', 'Samoyed', 'Standard Schnauzer', 'Ibizan Hound', 'Mexican Hairless', 'Bouvier Des Flandres', 'Shih Tzu', 'Irish Terrier', 'Standard Poodle', 'Cocker Spaniel', 'Pug', 'Walker Hound', 'Bull Mastiff', 'Toy Terrier', 'Chihuahua', 'Beagle', 'Newfoundland', 'Black And Tan Coonhound', 'Welsh Springer Spaniel', 'Kerry Blue Terrier', 'French Bulldog', 'Tibetan Mastiff', 'English Setter', 'Boxer', 'Curly Coated Retriever', 'Irish Wolfhound', 'Shetland Sheepdog', 'Briard', 'Bloodhound', 'Saint Bernard', 'Whippet', 'Basenji', 'English Springer', 'Scotch Terrier', 'Entlebucher', 'Scottish Deerhound', 'Lhasa', 'Vizsla', 'Keeshond', 'Schipperke', 'Chow']\n"
+     ]
+    }
+   ],
+   "source": [
+    "unique_targets = stanford_dogs_dataset.unique('target')\n",
+    "print(unique_targets)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "181\n"
+     ]
+    }
+   ],
+   "source": [
+    "# もしコーギー(Pembroke)だけで数百件あればそれを使い、なければ犬の画像すべてを使う\n",
+    "pembroke_count = sum(target == 'Pembroke' for target in stanford_dogs_dataset['target'])\n",
+    "print(pembroke_count)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add a new column 'dog_or_bread' to the stanford_dogs_dataset and bread_dataset\n",
+    "bread_dataset = bread_dataset.map(lambda example: {'bread_or_dog': 0})\n",
+    "stanford_dogs_dataset = stanford_dogs_dataset.map(lambda example: {'bread_or_dog': 1})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Dataset({\n",
+       "    features: ['name', 'annotations', 'target', 'image', 'bread_or_dog'],\n",
+       "    num_rows: 20580\n",
+       "})"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "stanford_dogs_dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DatasetDict({\n",
+       "    train: Dataset({\n",
+       "        features: ['name', 'annotations', 'target', 'image', 'bread_or_dog'],\n",
+       "        num_rows: 16464\n",
+       "    })\n",
+       "    test: Dataset({\n",
+       "        features: ['name', 'annotations', 'target', 'image', 'bread_or_dog'],\n",
+       "        num_rows: 2058\n",
+       "    })\n",
+       "    validation: Dataset({\n",
+       "        features: ['name', 'annotations', 'target', 'image', 'bread_or_dog'],\n",
+       "        num_rows: 2058\n",
+       "    })\n",
+       "})"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from datasets import DatasetDict\n",
+    "\n",
+    "train_test_dataset = stanford_dogs_dataset.train_test_split(test_size=0.2)\n",
+    "test_valid_dataset = train_test_dataset[\"test\"].train_test_split(test_size=0.5)\n",
+    "stanford_dogs_dataset_dict = DatasetDict({\n",
+    "    \"train\": train_test_dataset[\"train\"],\n",
+    "    \"test\": test_valid_dataset[\"train\"],\n",
+    "    \"validation\": test_valid_dataset[\"test\"]\n",
+    "})\n",
+    "stanford_dogs_dataset_dict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['name', 'annotations', 'target', 'image', 'bread_or_dog', 'label'],\n",
+      "        num_rows: 17942\n",
+      "    })\n",
+      "    validation: Dataset({\n",
+      "        features: ['name', 'annotations', 'target', 'image', 'bread_or_dog', 'label'],\n",
+      "        num_rows: 2298\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['name', 'annotations', 'target', 'image', 'bread_or_dog', 'label'],\n",
+      "        num_rows: 2796\n",
+      "    })\n",
+      "})\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datasets import concatenate_datasets\n",
+    "\n",
+    "# Concatenate the datasets for each split\n",
+    "merged_train_dataset = concatenate_datasets([stanford_dogs_dataset_dict['train'], bread_dataset['train']])\n",
+    "merged_validation_dataset = concatenate_datasets([stanford_dogs_dataset_dict['validation'], bread_dataset['validation']])\n",
+    "merged_test_dataset = concatenate_datasets([stanford_dogs_dataset_dict['test'], bread_dataset['test']])\n",
+    "merged_dataset = DatasetDict({\n",
+    "    \"train\": merged_train_dataset,\n",
+    "    \"validation\": merged_validation_dataset,\n",
+    "    \"test\": merged_test_dataset\n",
+    "})\n",
+    "\n",
+    "print(merged_dataset)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inspect model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "VGG(\n",
+      "  (features): Sequential(\n",
+      "    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (1): ReLU(inplace=True)\n",
+      "    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (3): ReLU(inplace=True)\n",
+      "    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (6): ReLU(inplace=True)\n",
+      "    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (8): ReLU(inplace=True)\n",
+      "    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (11): ReLU(inplace=True)\n",
+      "    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (13): ReLU(inplace=True)\n",
+      "    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (15): ReLU(inplace=True)\n",
+      "    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (18): ReLU(inplace=True)\n",
+      "    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (20): ReLU(inplace=True)\n",
+      "    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (22): ReLU(inplace=True)\n",
+      "    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (25): ReLU(inplace=True)\n",
+      "    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (27): ReLU(inplace=True)\n",
+      "    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n",
+      "    (29): ReLU(inplace=True)\n",
+      "    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
+      "  )\n",
+      "  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))\n",
+      "  (classifier): Sequential(\n",
+      "    (0): Linear(in_features=25088, out_features=4096, bias=True)\n",
+      "    (1): ReLU(inplace=True)\n",
+      "    (2): Dropout(p=0.5, inplace=False)\n",
+      "    (3): Linear(in_features=4096, out_features=4096, bias=True)\n",
+      "    (4): ReLU(inplace=True)\n",
+      "    (5): Dropout(p=0.5, inplace=False)\n",
+      "    (6): Linear(in_features=4096, out_features=1000, bias=True)\n",
+      "  )\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "from torchvision import models\n",
+    "\n",
+    "model = models.vgg16()\n",
+    "print(model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "name='features.0.weight'\n",
+      "name='features.0.bias'\n",
+      "name='features.2.weight'\n",
+      "name='features.2.bias'\n",
+      "name='features.5.weight'\n",
+      "name='features.5.bias'\n",
+      "name='features.7.weight'\n",
+      "name='features.7.bias'\n",
+      "name='features.10.weight'\n",
+      "name='features.10.bias'\n",
+      "name='features.12.weight'\n",
+      "name='features.12.bias'\n",
+      "name='features.14.weight'\n",
+      "name='features.14.bias'\n",
+      "name='features.17.weight'\n",
+      "name='features.17.bias'\n",
+      "name='features.19.weight'\n",
+      "name='features.19.bias'\n",
+      "name='features.21.weight'\n",
+      "name='features.21.bias'\n",
+      "name='features.24.weight'\n",
+      "name='features.24.bias'\n",
+      "name='features.26.weight'\n",
+      "name='features.26.bias'\n",
+      "name='features.28.weight'\n",
+      "name='features.28.bias'\n",
+      "name='classifier.0.weight'\n",
+      "name='classifier.0.bias'\n",
+      "name='classifier.3.weight'\n",
+      "name='classifier.3.bias'\n",
+      "name='classifier.6.weight'\n",
+      "name='classifier.6.bias'\n"
+     ]
+    }
+   ],
+   "source": [
+    "for name, _param in model.named_parameters():\n",
+    "    print(f\"{name=}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Fine Tuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import wandb\n",
+    "\n",
+    "def train(model, criterion, optimizer, dataloaders_dict, num_epochs, device):\n",
+    "    model.to(device)\n",
+    "\n",
+    "    for epoch in range(num_epochs):\n",
+    "        print('Epoch {}/{}'.format(epoch + 1, num_epochs))\n",
+    "        print('-------------')\n",
+    "        \n",
+    "        for phase in ['train', 'validation']:\n",
+    "            if phase == 'train':\n",
+    "                model.train()\n",
+    "            else:\n",
+    "                model.eval()\n",
+    "\n",
+    "            epoch_loss = 0.0\n",
+    "            epoch_corrects = 0\n",
+    "            \n",
+    "            for batch in dataloaders_dict[phase]:\n",
+    "                images, labels = batch[\"image\"], batch[\"bread_or_dog\"]\n",
+    "                images, labels = images.to(device), labels.to(device)\n",
+    "\n",
+    "                optimizer.zero_grad()\n",
+    "                \n",
+    "                # 学習時のみ勾配を計算させる設定にする\n",
+    "                with torch.set_grad_enabled(phase == 'train'):\n",
+    "                    outputs = model(images)\n",
+    "                    \n",
+    "                    # 損失を計算\n",
+    "                    loss = criterion(outputs, labels)\n",
+    "                    \n",
+    "                    # ラベルを予測\n",
+    "                    _, preds = torch.max(outputs, 1)\n",
+    "\n",
+    "                    if phase == 'train':\n",
+    "                        loss.backward()\n",
+    "                        optimizer.step()\n",
+    "\n",
+    "                    # イテレーション結果の計算\n",
+    "                    # lossの合計を更新\n",
+    "                    # PyTorchの仕様上各バッチ内での平均のlossが計算される。\n",
+    "                    # データ数を掛けることで平均から合計に変換をしている。\n",
+    "                    # 損失和は「全データの損失/データ数」で計算されるため、\n",
+    "                    # 平均のままだと損失和を求めることができないため。\n",
+    "                    epoch_loss += loss.item() * images.size(0)\n",
+    "                    \n",
+    "                    # 正解数の合計を更新\n",
+    "                    epoch_corrects += torch.sum(preds == labels.data)\n",
+    "\n",
+    "            # epochごとのlossと正解率を表示\n",
+    "            epoch_loss = epoch_loss / len(dataloaders_dict[phase].dataset)\n",
+    "            epoch_acc = epoch_corrects.double() / len(dataloaders_dict[phase].dataset)\n",
+    "\n",
+    "            log = {\n",
+    "                \"epoch\": epoch +1,\n",
+    "                \"phase\": phase,\n",
+    "                f\"{phase}_loss\": epoch_loss,\n",
+    "                f\"{phase}_acc\": epoch_acc,\n",
+    "            }\n",
+    "            print(log)\n",
+    "            wandb.log(log)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Finishing last run (ID:nw4ysgpu) before initializing another..."
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<style>\n",
+       "    table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
+       "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
+       "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
+       "    </style>\n",
+       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>epoch</td><td>▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██</td></tr><tr><td>train_acc</td><td>▁▂▂▂▅▇████</td></tr><tr><td>train_loss</td><td>█▆▅▃▃▂▂▁▁▁</td></tr><tr><td>validation_acc</td><td>▁▁▁▄▇▇▆██▇</td></tr><tr><td>validation_loss</td><td>█▆▅▃▂▂▂▁▁▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>epoch</td><td>10</td></tr><tr><td>phase</td><td>validation</td></tr><tr><td>train_acc</td><td>0.98473</td></tr><tr><td>train_loss</td><td>0.04198</td></tr><tr><td>validation_acc</td><td>0.97998</td></tr><tr><td>validation_loss</td><td>0.05055</td></tr></table><br/></div></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run <strong style=\"color:#cdcd00\">resplendent-festival-20</strong> at: <a href='https://wandb.ai/hiroga/bread-or-dog/runs/nw4ysgpu' target=\"_blank\">https://wandb.ai/hiroga/bread-or-dog/runs/nw4ysgpu</a><br/>Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Find logs at: <code>.\\wandb\\run-20240217_062447-nw4ysgpu\\logs</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Successfully finished last run (ID:nw4ysgpu). Initializing new run:<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.16.3"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>c:\\Users\\hiroga\\Documents\\GitHub\\til\\computer-science\\machine-learning\\_src\\fine-tuning-vgg16-bread-or-dog\\wandb\\run-20240217_081017-vy00m3yf</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/hiroga/bread-or-dog/runs/vy00m3yf' target=\"_blank\">abundant-festival-21</a></strong> to <a href='https://wandb.ai/hiroga/bread-or-dog' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/hiroga/bread-or-dog' target=\"_blank\">https://wandb.ai/hiroga/bread-or-dog</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/hiroga/bread-or-dog/runs/vy00m3yf' target=\"_blank\">https://wandb.ai/hiroga/bread-or-dog/runs/vy00m3yf</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10\n",
+      "-------------\n",
+      "{'epoch': 1, 'phase': 'train', 'train_loss': 0.07768695316224603, 'train_acc': tensor(0.9642, device='cuda:0', dtype=torch.float64)}\n",
+      "{'epoch': 1, 'phase': 'validation', 'validation_loss': 0.07376273388806584, 'validation_acc': tensor(0.9752, device='cuda:0', dtype=torch.float64)}\n",
+      "Epoch 2/10\n",
+      "-------------\n",
+      "{'epoch': 2, 'phase': 'train', 'train_loss': 0.039798663440396634, 'train_acc': tensor(0.9868, device='cuda:0', dtype=torch.float64)}\n",
+      "{'epoch': 2, 'phase': 'validation', 'validation_loss': 0.07450082683805061, 'validation_acc': tensor(0.9704, device='cuda:0', dtype=torch.float64)}\n",
+      "Epoch 3/10\n",
+      "-------------\n",
+      "{'epoch': 3, 'phase': 'train', 'train_loss': 0.038719125189200225, 'train_acc': tensor(0.9869, device='cuda:0', dtype=torch.float64)}\n",
+      "{'epoch': 3, 'phase': 'validation', 'validation_loss': 0.08719978122943582, 'validation_acc': tensor(0.9608, device='cuda:0', dtype=torch.float64)}\n",
+      "Epoch 4/10\n",
+      "-------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "import wandb\n",
+    "from safetensors.torch import save_file\n",
+    "from torchvision import transforms\n",
+    "from torch.utils.data import DataLoader\n",
+    "\n",
+    "model_name = \"vgg16\"\n",
+    "model.classifier[6] = torch.nn.Linear(in_features=4096, out_features=2)\n",
+    "\n",
+    "features = [param for name, param in model.named_parameters() if \"features\" in name]\n",
+    "classifier = [param for name, param in model.named_parameters() if \"classifier.0\" in name or \"classifier.3\" in name]\n",
+    "last_classifier = [param for name, param in model.named_parameters() if \"classifier.6\" in name]\n",
+    "param_groups = [\n",
+    "    {'params': features, 'lr': 1e-4},\n",
+    "    {'params': classifier, 'lr': 5e-4},\n",
+    "    {'params': last_classifier, 'lr': 1e-3},\n",
+    "]\n",
+    "momentum = 0.9\n",
+    "\n",
+    "batch_size = 64\n",
+    "\n",
+    "# torchvision の datasets とは違い、transforms をそのままセットすれば良いわけではないので留意。\n",
+    "composed = transforms.Compose([\n",
+    "    transforms.Resize((224, 224)),  # Resize all images to 224x224\n",
+    "    transforms.ToTensor(),  # Convert images to PyTorch tensors\n",
+    "])\n",
+    "def transform(batch):\n",
+    "    tensors = [composed(img) for img in batch['image']]\n",
+    "    return {\"image\": tensors, \"bread_or_dog\": batch[\"bread_or_dog\"]}\n",
+    "\n",
+    "merged_dataset['train'].set_transform(transform, [\"image\", \"bread_or_dog\"])\n",
+    "merged_dataset['validation'].set_transform(transform, [\"image\", \"bread_or_dog\"])\n",
+    "\n",
+    "# Assuming that the datasets 'train' and 'validation' are available in the dataloaders_dict\n",
+    "train_dataloader = DataLoader(merged_dataset['train'], batch_size=batch_size, shuffle=True)\n",
+    "valid_dataloader = DataLoader(merged_dataset['validation'], batch_size=batch_size, shuffle=False)\n",
+    "dataloaders_dict = {\n",
+    "    \"train\": train_dataloader,\n",
+    "    \"validation\": valid_dataloader\n",
+    "}\n",
+    "\n",
+    "num_epochs = 10\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "\n",
+    "wandb.init(\n",
+    "    project=\"bread-or-dog\",\n",
+    "    config={\n",
+    "        \"model_name\": model_name,\n",
+    "        \"architecture\": \"CNN\",\n",
+    "        \"dataset\": [\"Alanox/stanford-dogs\", \"images.cv_fg0xp9w733695pvws1a4yh\"],\n",
+    "        \"param_groups\": param_groups,\n",
+    "        \"num_epoch\": num_epochs,\n",
+    "        \"momentum\": momentum,\n",
+    "        \"device\": device\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "criterion = torch.nn.CrossEntropyLoss()\n",
+    "optimizer = torch.optim.SGD(param_groups, momentum=momentum)\n",
+    "\n",
+    "\n",
+    "train(model, criterion, optimizer, dataloaders_dict, num_epochs=num_epochs, device=device)\n",
+    "\n",
+    "save_file(model.state_dict(), f\"models/snapshots/{model_name}_epoch{num_epochs}.safetensors\")\n",
+    "\n",
+    "wandb.log_artifact(model)\n",
+    "\n",
+    "model.to_onnx()\n",
+    "wandb.save(\"model.onnx\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "fune-tuning-vgg16-bread-or-dog",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

vgg16.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import gradio as gr
+import json
+import torch
+from PIL import Image
+from torchvision import models, transforms
+with open("data/imagenet-simple-labels.json") as f:
+    labels = json.load(f)
+model = models.vgg16(pretrained=True)
+model.eval()  # 推論モードに設定
+preprocess = transforms.Compose(
+    [
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ]
+)
+def classify_image(input_image: Image):
+    img_t = preprocess(input_image)
+    batch_t = torch.unsqueeze(img_t, 0)
+    with torch.no_grad():
+        output = model(batch_t)
+    probabilities = torch.nn.functional.softmax(output, dim=1)
+    label_to_prob = {labels[i]: prob for i, prob in enumerate(probabilities[0])}
+    return label_to_prob
+demo = gr.Interface(fn=classify_image, inputs=gr.Image(type="pil"), outputs="label")
+demo.launch()