## Collect images

Save images from below websites with Firefox.
- https://www.dreamstime.com/photos-images/corgi-butt.html
- https://www.pinterest.com/I_love_Corgi/corgi-butt/

In [1]:
from datasets import load_dataset

stanford_dogs_dataset = load_dataset("Alanox/stanford-dogs", split="full", trust_remote_code=True)
# OR !kaggle datasets download -d jessicali9530/stanford-dogs-dataset -p "data" -q

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
stanford_dogs_dataset

Dataset({
    features: ['name', 'annotations', 'target', 'image'],
    num_rows: 20580
})

In [3]:
from datasets import load_dataset

bread_dataset = load_dataset("imagefolder", data_dir="data/images.cv_fg0xp9w733695pvws1a4yh/data")

In [4]:
bread_dataset

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 1478
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 240
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 738
    })
})

In [5]:
unique_targets = stanford_dogs_dataset.unique('target')
print(unique_targets)

['Bedlington Terrier', 'Clumber', 'Bluetick', 'German Short Haired Pointer', 'Labrador Retriever', 'Bernese Mountain Dog', 'Saluki', 'German Shepherd', 'Komondor', 'Kuvasz', 'Weimaraner', 'Great Pyrenees', 'Rottweiler', 'Pekinese', 'Gordon Setter', 'Tibetan Terrier', 'Soft Coated Wheaten Terrier', 'Brittany Spaniel', 'Leonberg', 'English Foxhound', 'Collie', 'Basset', 'Wire Haired Fox Terrier', 'Norwegian Elkhound', 'Chesapeake Bay Retriever', 'Cardigan', 'Borzoi', 'Border Collie', 'Malamute', 'Australian Terrier', 'Silky Terrier', 'Affenpinscher', 'Pomeranian', 'American Staffordshire Terrier', 'Otterhound', 'Staffordshire Bullterrier', 'West Highland White Terrier', 'Boston Bull', 'Redbone', 'Irish Water Spaniel', 'Giant Schnauzer', 'Flat Coated Retriever', 'Norwich Terrier', 'Dhole', 'Airedale', 'Miniature Poodle', 'Malinois', 'Sealyham Terrier', 'Cairn', 'Eskimo Dog', 'Siberian Husky', 'Papillon', 'Greater Swiss Mountain Dog', 'Sussex Spaniel', 'African Hunting Dog', 'Pembroke', 'D

In [6]:
# もしコーギー(Pembroke)だけで数百件あればそれを使い、なければ犬の画像すべてを使う
pembroke_count = sum(target == 'Pembroke' for target in stanford_dogs_dataset['target'])
print(pembroke_count)

181


In [7]:
# Add a new column 'dog_or_bread' to the stanford_dogs_dataset and bread_dataset
bread_dataset = bread_dataset.map(lambda example: {'bread_or_dog': 0})
stanford_dogs_dataset = stanford_dogs_dataset.map(lambda example: {'bread_or_dog': 1})

In [8]:
stanford_dogs_dataset

Dataset({
    features: ['name', 'annotations', 'target', 'image', 'bread_or_dog'],
    num_rows: 20580
})

In [9]:
from datasets import DatasetDict

train_test_dataset = stanford_dogs_dataset.train_test_split(test_size=0.2)
test_valid_dataset = train_test_dataset["test"].train_test_split(test_size=0.5)
stanford_dogs_dataset_dict = DatasetDict({
    "train": train_test_dataset["train"],
    "test": test_valid_dataset["train"],
    "validation": test_valid_dataset["test"]
})
stanford_dogs_dataset_dict

DatasetDict({
    train: Dataset({
        features: ['name', 'annotations', 'target', 'image', 'bread_or_dog'],
        num_rows: 16464
    })
    test: Dataset({
        features: ['name', 'annotations', 'target', 'image', 'bread_or_dog'],
        num_rows: 2058
    })
    validation: Dataset({
        features: ['name', 'annotations', 'target', 'image', 'bread_or_dog'],
        num_rows: 2058
    })
})

In [10]:
from datasets import concatenate_datasets

# Concatenate the datasets for each split
merged_train_dataset = concatenate_datasets([stanford_dogs_dataset_dict['train'], bread_dataset['train']])
merged_validation_dataset = concatenate_datasets([stanford_dogs_dataset_dict['validation'], bread_dataset['validation']])
merged_test_dataset = concatenate_datasets([stanford_dogs_dataset_dict['test'], bread_dataset['test']])
merged_dataset = DatasetDict({
    "train": merged_train_dataset,
    "validation": merged_validation_dataset,
    "test": merged_test_dataset
})

print(merged_dataset)

DatasetDict({
    train: Dataset({
        features: ['name', 'annotations', 'target', 'image', 'bread_or_dog', 'label'],
        num_rows: 17942
    })
    validation: Dataset({
        features: ['name', 'annotations', 'target', 'image', 'bread_or_dog', 'label'],
        num_rows: 2298
    })
    test: Dataset({
        features: ['name', 'annotations', 'target', 'image', 'bread_or_dog', 'label'],
        num_rows: 2796
    })
})


## Inspect model

In [11]:
from torchvision import models

model = models.vgg16()
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [12]:
for name, _param in model.named_parameters():
    print(f"{name=}")

name='features.0.weight'
name='features.0.bias'
name='features.2.weight'
name='features.2.bias'
name='features.5.weight'
name='features.5.bias'
name='features.7.weight'
name='features.7.bias'
name='features.10.weight'
name='features.10.bias'
name='features.12.weight'
name='features.12.bias'
name='features.14.weight'
name='features.14.bias'
name='features.17.weight'
name='features.17.bias'
name='features.19.weight'
name='features.19.bias'
name='features.21.weight'
name='features.21.bias'
name='features.24.weight'
name='features.24.bias'
name='features.26.weight'
name='features.26.bias'
name='features.28.weight'
name='features.28.bias'
name='classifier.0.weight'
name='classifier.0.bias'
name='classifier.3.weight'
name='classifier.3.bias'
name='classifier.6.weight'
name='classifier.6.bias'


## Fine Tuning

In [18]:
import torch
import wandb

def train(model, criterion, optimizer, dataloaders_dict, num_epochs, device):
    model.to(device)

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-------------')
        
        for phase in ['train', 'validation']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            epoch_loss = 0.0
            epoch_corrects = 0
            
            for batch in dataloaders_dict[phase]:
                images, labels = batch["image"], batch["bread_or_dog"]
                images, labels = images.to(device), labels.to(device)

                optimizer.zero_grad()
                
                # 学習時のみ勾配を計算させる設定にする
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(images)
                    
                    # 損失を計算
                    loss = criterion(outputs, labels)
                    
                    # ラベルを予測
                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    # イテレーション結果の計算
                    # lossの合計を更新
                    # PyTorchの仕様上各バッチ内での平均のlossが計算される。
                    # データ数を掛けることで平均から合計に変換をしている。
                    # 損失和は「全データの損失/データ数」で計算されるため、
                    # 平均のままだと損失和を求めることができないため。
                    epoch_loss += loss.item() * images.size(0)
                    
                    # 正解数の合計を更新
                    epoch_corrects += torch.sum(preds == labels.data)

            # epochごとのlossと正解率を表示
            epoch_loss = epoch_loss / len(dataloaders_dict[phase].dataset)
            epoch_acc = epoch_corrects.double() / len(dataloaders_dict[phase].dataset)

            log = {
                "epoch": epoch +1,
                "phase": phase,
                f"{phase}_loss": epoch_loss,
                f"{phase}_acc": epoch_acc,
            }
            print(log)
            wandb.log(log)

In [20]:
import torch
import wandb
from safetensors.torch import save_file
from torchvision import transforms
from torch.utils.data import DataLoader

model_name = "vgg16"
model.classifier[6] = torch.nn.Linear(in_features=4096, out_features=2)

features = [param for name, param in model.named_parameters() if "features" in name]
classifier = [param for name, param in model.named_parameters() if "classifier.0" in name or "classifier.3" in name]
last_classifier = [param for name, param in model.named_parameters() if "classifier.6" in name]
param_groups = [
    {'params': features, 'lr': 1e-4},
    {'params': classifier, 'lr': 5e-4},
    {'params': last_classifier, 'lr': 1e-3},
]
momentum = 0.9

batch_size = 64

# torchvision の datasets とは違い、transforms をそのままセットすれば良いわけではないので留意。
composed = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize all images to 224x224
    transforms.ToTensor(),  # Convert images to PyTorch tensors
])
def transform(batch):
    tensors = [composed(img) for img in batch['image']]
    return {"image": tensors, "bread_or_dog": batch["bread_or_dog"]}

merged_dataset['train'].set_transform(transform, ["image", "bread_or_dog"])
merged_dataset['validation'].set_transform(transform, ["image", "bread_or_dog"])

# Assuming that the datasets 'train' and 'validation' are available in the dataloaders_dict
train_dataloader = DataLoader(merged_dataset['train'], batch_size=batch_size, shuffle=True)
valid_dataloader = DataLoader(merged_dataset['validation'], batch_size=batch_size, shuffle=False)
dataloaders_dict = {
    "train": train_dataloader,
    "validation": valid_dataloader
}

num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

wandb.init(
    project="bread-or-dog",
    config={
        "model_name": model_name,
        "architecture": "CNN",
        "dataset": ["Alanox/stanford-dogs", "images.cv_fg0xp9w733695pvws1a4yh"],
        "param_groups": param_groups,
        "num_epoch": num_epochs,
        "momentum": momentum,
        "device": device
    }
)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(param_groups, momentum=momentum)


train(model, criterion, optimizer, dataloaders_dict, num_epochs=num_epochs, device=device)

save_file(model.state_dict(), f"models/snapshots/{model_name}_epoch{num_epochs}.safetensors")

wandb.log_artifact(model)

model.to_onnx()
wandb.save("model.onnx")

0,1
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
train_acc,▁▂▂▂▅▇████
train_loss,█▆▅▃▃▂▂▁▁▁
validation_acc,▁▁▁▄▇▇▆██▇
validation_loss,█▆▅▃▂▂▂▁▁▁

0,1
epoch,10
phase,validation
train_acc,0.98473
train_loss,0.04198
validation_acc,0.97998
validation_loss,0.05055


Epoch 1/10
-------------
{'epoch': 1, 'phase': 'train', 'train_loss': 0.07768695316224603, 'train_acc': tensor(0.9642, device='cuda:0', dtype=torch.float64)}
{'epoch': 1, 'phase': 'validation', 'validation_loss': 0.07376273388806584, 'validation_acc': tensor(0.9752, device='cuda:0', dtype=torch.float64)}
Epoch 2/10
-------------
{'epoch': 2, 'phase': 'train', 'train_loss': 0.039798663440396634, 'train_acc': tensor(0.9868, device='cuda:0', dtype=torch.float64)}
{'epoch': 2, 'phase': 'validation', 'validation_loss': 0.07450082683805061, 'validation_acc': tensor(0.9704, device='cuda:0', dtype=torch.float64)}
Epoch 3/10
-------------
{'epoch': 3, 'phase': 'train', 'train_loss': 0.038719125189200225, 'train_acc': tensor(0.9869, device='cuda:0', dtype=torch.float64)}
{'epoch': 3, 'phase': 'validation', 'validation_loss': 0.08719978122943582, 'validation_acc': tensor(0.9608, device='cuda:0', dtype=torch.float64)}
Epoch 4/10
-------------
