Spaces:

itslukeypookie
/

CIFAR-10_Model_Training_GUI

Sleeping

App Files Files Community

itslukeypookie commited on Feb 6, 2024

Commit

df2cf07

verified ·

1 Parent(s): 38f18d9

Upload 26 files

Browse files

Files changed (26) hide show

.gitignore +5 -0
LICENSE +21 -0
README.md +62 -12
app.py +560 -0
documentation.md +39 -0
models/__init__.py +18 -0
models/densenet.py +107 -0
models/dla.py +135 -0
models/dla_simple.py +128 -0
models/dpn.py +98 -0
models/efficientnet.py +175 -0
models/googlenet.py +107 -0
models/lenet.py +23 -0
models/mobilenet.py +61 -0
models/mobilenetv2.py +86 -0
models/pnasnet.py +125 -0
models/preact_resnet.py +118 -0
models/regnet.py +155 -0
models/resnet.py +132 -0
models/resnext.py +95 -0
models/senet.py +121 -0
models/shufflenet.py +109 -0
models/shufflenetv2.py +162 -0
models/vgg.py +47 -0
requirements.txt +0 -0
utils.py +127 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+models/__pycache__/
+__pycache__/
+wandb/
+data/
+.DS_Store

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2017 liukuang
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,12 +1,62 @@
----
-title: CIFAR-10 Model Training GUI
-emoji: 🌍
-colorFrom: red
-colorTo: blue
-sdk: gradio
-sdk_version: 4.16.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Train CIFAR10 with PyTorch
+I'm playing with [PyTorch](http://pytorch.org/) on the CIFAR10 dataset.
+## Prerequisites
+- Python 3.6+
+- PyTorch 1.0+
+- Install the dependencies from `requirements.txt` by running `pip install -r requirements.txt`
+- Update the `requirements.txt` with `pip list --format=freeze > requirements.txt`
+## Dependencies for Weights and Biases
+You must install weights and biases using the command below:
+`pip install wandb`
+Afterwards, you must run this command to login into weights and biases. A GitHub account can be used to login:
+`wandb login`
+Paste the API key into terminal.
+Additionally, change this line in `main.py`:
+`wandb.init(entity="balica15", project="tutorial")`
+Where "balica15" is replaced by your username that you used to login with into Weights and Biases.
+## Training
+```
+# Start training with:
+python main.py
+# You can manually resume the training with:
+python main.py --resume --lr=0.01
+```
+## Features to Add
+| Name                          | Feature      |
+| ----------------------------- | ----------- |
+| Evelyn Atkins and Ethan White | Input and Error Protection     |
+| Keiane Balicanta              | TorchVision Model Dropdown      |
+| Henry Conde                   | Weights and Biases API      |
+| Matthew Gerace                | Iteration and Batch Size Sliders      |
+| Luke Wilkins                  | Image Classification |
+## Accuracy
+| Model             | Acc.        |
+| ----------------- | ----------- |
+| [VGG16](https://arxiv.org/abs/1409.1556)              | 92.64%      |
+| [ResNet18](https://arxiv.org/abs/1512.03385)          | 93.02%      |
+| [ResNet50](https://arxiv.org/abs/1512.03385)          | 93.62%      |
+| [ResNet101](https://arxiv.org/abs/1512.03385)         | 93.75%      |
+| [RegNetX_200MF](https://arxiv.org/abs/2003.13678)     | 94.24%      |
+| [RegNetY_400MF](https://arxiv.org/abs/2003.13678)     | 94.29%      |
+| [MobileNetV2](https://arxiv.org/abs/1801.04381)       | 94.43%      |
+| [ResNeXt29(32x4d)](https://arxiv.org/abs/1611.05431)  | 94.73%      |
+| [ResNeXt29(2x64d)](https://arxiv.org/abs/1611.05431)  | 94.82%      |
+| [SimpleDLA](https://arxiv.org/abs/1707.064)           | 94.89%      |
+| [DenseNet121](https://arxiv.org/abs/1608.06993)       | 95.04%      |
+| [PreActResNet18](https://arxiv.org/abs/1603.05027)    | 95.11%      |
+| [DPN92](https://arxiv.org/abs/1707.01629)             | 95.16%      |
+| [DLA](https://arxiv.org/pdf/1707.06484.pdf)           | 95.47%      |

app.py ADDED Viewed

	@@ -0,0 +1,560 @@

+###### Train CIFAR10 with PyTorch. ######
+### IMPORT DEPENDENCIES
+from torch.utils.data import DataLoader
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+import torch.backends.cudnn as cudnn
+import gradio as gr
+import wandb
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+import torchvision
+import torchvision.transforms as transforms
+import torchvision.models as models
+import torch.optim.lr_scheduler as lr_scheduler
+import os
+import argparse
+import torchattacks
+from models import *
+from tqdm import tqdm
+from PIL import Image
+import gradio as gr
+# from utils import progress_bar
+# CSS theme styling
+theme = gr.themes.Base(
+    font=[gr.themes.GoogleFont('Montserrat'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
+    primary_hue="emerald",
+    secondary_hue="emerald",
+    neutral_hue="zinc"
+).set(
+    body_text_color='*neutral_950',
+    body_text_color_subdued='*neutral_950',
+    block_shadow='*shadow_drop_lg',
+    button_shadow='*shadow_drop_lg',
+    block_title_text_color='*neutral_950',
+    block_title_text_weight='500',
+    slider_color='*secondary_600'
+)
+def normalize(img):
+    min_im = np.min(img)
+    np_img = img - min_im
+    max_im = np.max(np_img)
+    np_img /= max_im
+    return np_img
+def imshow(img, fig_name = "test_input.png"):
+    try:
+        img = img.clone().detach().cpu().numpy()
+    except:
+        print('img already numpy')
+    plt.imshow(normalize(np.transpose(img, (1, 2, 0))))
+    plt.savefig(fig_name)
+    print(f'Figure saved as {fig_name}')
+    return fig_name
+def class_names(class_num, class_list): # converts the raw number label to text
+    if (class_num < 0) and (class_num >= 10):
+        gr.Warning("Class List Error")
+        return
+    return class_list[class_num]
+### MAIN FUNCTION
+best_acc = 0
+def main(drop_type, epochs_sldr, train_sldr, test_sldr, learning_rate, optimizer, sigma_sldr, adv_attack, username, scheduler):
+    ## Input protection
+    if not drop_type:
+        gr.Warning("Please select a model from the dropdown.")
+        return
+    if not username:
+        gr.Warning("Please enter a WandB username.")
+        return
+    if(epochs_sldr % 1 != 0):
+        gr.Warning("Number of epochs must be an integer.")
+        return
+    if(train_sldr % 1 != 0):
+        gr.Warning("Training batch size must be an integer.")
+        return
+    if(test_sldr % 1 != 0):
+        gr.Warning("Testing batch size must be an integer.")
+        return
+    num_epochs = int(epochs_sldr)
+    global learn_batch
+    learn_batch = int(train_sldr)
+    global test_batch
+    test_batch = int(test_sldr)
+    learning_rate = float(learning_rate)
+    optimizer_choose = str(optimizer)
+    sigma = float(sigma_sldr)
+    attack = str(adv_attack)
+    scheduler_choose = str(scheduler)
+    # REPLACE ENTITY WITH USERNAME BELOW
+    wandb.init(entity=username, project="model-training")
+    parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
+    parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
+    parser.add_argument('--resume', '-r', action='store_true',
+                        help='resume from checkpoint')
+    args = parser.parse_args()
+    if torch.cuda.is_available():
+        device = 'cuda'
+        gr.Info("Cuda detected - running on Cuda")
+    elif torch.backends.mps.is_available():
+        device = 'mps'
+        gr.Info("MPS detected - running on Metal")
+    else:
+        device = 'cpu'
+        gr.Info("No GPU Detected - running on CPU")
+    start_epoch = 0  # start from epoch 0 or last checkpoint epoch
+    ## Data
+    try:
+        print('==> Preparing data..')
+        transform_train = transforms.Compose([
+            transforms.RandomCrop(32, padding=4),
+            transforms.RandomHorizontalFlip(),
+            transforms.ToTensor(),
+            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+        ])
+        transform_test = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
+        ])
+        trainset = torchvision.datasets.CIFAR10(
+            root='./data', train=True, download=True, transform=transform_train)
+        trainloader = DataLoader(
+            trainset, batch_size=learn_batch, shuffle=True, num_workers=2)
+        testset = torchvision.datasets.CIFAR10(
+            root='./data', train=False, download=True, transform=transform_test)
+        testloader = DataLoader(
+            testset, batch_size=test_batch, shuffle=True, num_workers=2)
+        classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
+    except Exception as e:
+        print(f"Error: {e}")
+        gr.Warning(f"Data Loading Error: {e}")
+    ## Model
+    try:
+        print('==> Building model..')
+        net = models_dict.get(drop_type, None)
+        # Make list of models containing either classifer or fc functions
+        classifier_models = ['ConvNext_Small', 'ConvNext_Base', 'ConvNext_Large', 'DenseNet', 'EfficientNet_B0', 'MobileNetV2',
+                            'MaxVit', 'MnasNet0_5', 'SqueezeNet', 'VGG19']
+        fc_models = ['GoogLeNet', 'InceptionNetV3', 'RegNet_X_400MF', 'ResNet18', 'ShuffleNet_V2_X0_5']
+        # Check dropdown choice for fc or classifier function implementation
+        if net in classifier_models:
+            num_ftrs = net.classifier[-1].in_features
+            net.classifier[-1] = torch.nn.Linear(num_ftrs, len(classes))
+        elif net in fc_models:
+            num_ftrs = net.fc.in_features
+            net.fc = torch.nn.Linear(num_ftrs, len(classes))
+        net = net.to(device)
+    except Exception as e:
+        print(f"Error: {e}")
+        gr.Warning(f"Model Building Error: {e}")
+    # if args.resume:
+    #     # Load checkpoint.
+    #     print('==> Resuming from checkpoint..')
+    #     assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
+    #     checkpoint = torch.load('./checkpoint/ckpt.pth')
+    #     net.load_state_dict(checkpoint['net'])
+    #     best_acc = checkpoint['acc']
+    #     start_epoch = checkpoint['epoch']
+    SGDopt = optim.SGD(net.parameters(), lr=learning_rate,momentum=0.9, weight_decay=5e-4)
+    Adamopt = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=5e-4)
+    criterion = nn.CrossEntropyLoss()
+    if optimizer_choose == "SGD":
+        optimizer = SGDopt
+    elif optimizer_choose == "Adam":
+        optimizer = Adamopt
+    print (f'optimizer: {optimizer}')
+    #scheduler = lr_scheduler.LinearLR(optimizer, start_factor=learning_rate, end_factor=0.0001, total_iters=10)
+    if scheduler_choose == "CosineAnnealingLR":
+        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
+    elif scheduler_choose == "ReduceLROnPlateau":
+        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=5)
+    elif scheduler_choose == "StepLR":
+        scheduler = lr_scheduler.StepLR(optimizer, step_size=30)
+    print (f'scheduler: {scheduler_choose}')
+    img_labels = [] # initialize list for label generation
+    raw_image_list = [] # initialize list for image generation
+    img_list1 = [] # initialize list for combined image/labels
+    img_list2 = [] # initialize list for gaussian image generation
+    img_list3 = [] # initialize list for adversarial attack image generation
+    # The following lists are used when generating all images in an epoch instead of 10:
+    full_img_labels = []
+    full_raw_image_list = []
+    full_img_list1 = []
+    adv_num = 1 # initialize adversarial image number for naming purposes
+    global gaussian_num
+    gaussian_num = 1 # initialize gaussian noise image number for naming purposes
+    for epoch in range(start_epoch, start_epoch+epochs_sldr):
+        if sigma == 0:
+            train(epoch, net, trainloader, device, optimizer, criterion, sigma)
+        else:
+            gaussian_fig = train(epoch, net, trainloader, device, optimizer, criterion, sigma)
+        acc, predicted = test(epoch, net, testloader, device, criterion)
+        if scheduler_choose == "ReduceLROnPlateau":
+            scheduler.step(metrics=acc)
+        elif not scheduler_choose == "None":
+            scheduler.step()
+        if (((epoch-1) % 10 == 0) or (epoch == 0)) and (epoch != 1): # generate images every 10 epochs (and the 0th epoch)
+            dataiter = iter(testloader)
+            imgs, labels = next(dataiter)
+            normalized_imgs = (imgs-imgs.min())/(imgs.max()-imgs.min())
+            atk = torchattacks.PGD(net, eps=0.00015, alpha=0.0000000000000001, steps=7)
+            if attack == "Yes":
+                if normalized_imgs is None:
+                    print("error occured")
+                else:
+                    print(torch.std(normalized_imgs))
+                    atk.set_normalization_used(mean = torch.mean(normalized_imgs,axis=[0,2,3]), std=torch.std(normalized_imgs,axis=[0,2,3])/1.125)
+                    adv_images = atk(imgs, labels)
+                    fig_name = imshow(adv_images[0], fig_name = f'figures/adversarial_attack{adv_num}.png')
+                    attack_fig = Image.open(fig_name)
+                    for i in range(1): # generate 1 image per epoch
+                        img_list3.append(attack_fig)
+                        adv_num = adv_num + 1
+            for i in range(10): # generate 10 images per epoch
+                gradio_imgs = transforms.functional.to_pil_image(normalized_imgs[i])
+                raw_image_list.append(gradio_imgs)
+                predicted_text = class_names(predicted[i].item(), classes)
+                actual_text = class_names(labels[i].item(), classes)
+                label_text = f'Epoch: {epoch} | Predicted: {predicted_text} | Actual: {actual_text}'
+                img_labels.append(label_text)
+            for i in range(test_batch): # generate all images per epoch
+                full_gradio_imgs = transforms.functional.to_pil_image(normalized_imgs[i])
+                full_raw_image_list.append(full_gradio_imgs)
+                full_predicted_text = class_names(predicted[i].item(), classes)
+                full_actual_text = class_names(labels[i].item(), classes)
+                full_label_text = f'Epoch: {epoch} | Predicted: {full_predicted_text} | Actual: {full_actual_text}'
+                full_img_labels.append(full_label_text)
+            for i in range(len(raw_image_list)):
+                img_tuple = (raw_image_list[i], img_labels[i])
+                img_list1.append(img_tuple)
+            for i in range(len(full_raw_image_list)):
+                full_img_tuple = (full_raw_image_list[i], full_img_labels[i])
+                full_img_list1.append(full_img_tuple)
+            if sigma != 0:
+                    for i in range(1): # generate 1 image per epoch
+                        img_list2.append(gaussian_fig)
+                        gaussian_num = gaussian_num + 1
+    if (sigma == 0) and (attack == "No"):
+        return str(acc)+"%", img_list1, full_img_list1, None, None
+    elif (sigma != 0) and (attack == "No"):
+        return str(acc)+"%", img_list1, full_img_list1, img_list2, None
+    elif (sigma == 0) and (attack == "Yes"):
+        return str(acc)+"%", img_list1, full_img_list1, None, img_list3
+    else:
+        return str(acc)+"%", img_list1, full_img_list1, img_list2, img_list3
+### TRAINING
+def train(epoch, net, trainloader, device, optimizer, criterion, sigma, progress=gr.Progress()):
+    try:
+        print('\nEpoch: %d' % epoch)
+        net.train()
+        train_loss = 0
+        correct = 0
+        total = 0
+        iter_float = 50000/learn_batch
+        iterations = math.ceil(iter_float)
+        iter_prog = 0
+        for batch_idx, (inputs, targets) in tqdm(enumerate(trainloader)):
+            if sigma == 0:
+                inputs, targets = inputs.to(device), targets.to(device)
+                optimizer.zero_grad()
+                outputs = net(inputs)
+            else:
+                noise = np.random.normal(0, sigma, inputs.shape)
+                inputs += torch.tensor(noise)
+                inputs, targets = inputs.to(device), targets.to(device)
+                optimizer.zero_grad()
+                outputs = net(inputs)
+                n_inputs = inputs.clone().detach().cpu().numpy()
+                if(batch_idx%99 == 0):
+                        fig_name = imshow(n_inputs[0], fig_name= f'figures/gaussian_noise{gaussian_num}.png')
+                        gaussian_fig = Image.open(fig_name)
+            loss = criterion(outputs, targets)
+            loss.backward()
+            optimizer.step()
+            train_loss += loss.item()
+            _, predicted = outputs.max(1)
+            total += targets.size(0)
+            correct += predicted.eq(targets).sum().item()
+            iter_prog = iter_prog + 1 # Iterating iteration amount
+            progress(iter_prog/iterations, desc=f"Training Epoch {epoch}", total=iterations)
+            # progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
+            #              % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
+    except Exception as e:
+        print(f"Error: {e}")
+        gr.Warning(f"Training Error: {e}")
+    if sigma != 0:
+        return gaussian_fig
+### TESTING
+def test(epoch, net, testloader, device, criterion, progress = gr.Progress()):
+    try:
+        net.eval()
+        test_loss = 0
+        correct = 0
+        total = 0
+        iter_float = 10000/test_batch
+        iterations = math.ceil(iter_float)
+        iter_prog = 0
+        with torch.no_grad():
+            for batch_idx, (inputs, targets) in tqdm(enumerate(testloader)):
+                inputs, targets = inputs.to(device), targets.to(device)
+                outputs = net(inputs)
+                loss = criterion(outputs, targets)
+                test_loss += loss.item()
+                _, predicted = outputs.max(1)
+                total += targets.size(0)
+                correct += predicted.eq(targets).sum().item()
+                iter_prog = iter_prog + 1 # Iterating iteration amount
+                progress(iter_prog/iterations, desc=f"Testing Epoch {epoch}", total=iterations)
+            wandb.log({'epoch': epoch+1, 'loss': test_loss})
+            wandb.log({"acc": correct/total})
+                # progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
+                #              % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
+        # Save checkpoint.
+        global best_acc
+        global acc
+        acc = 100.*correct/total
+        print(acc)
+        if acc > best_acc:
+            best_acc = acc
+            return best_acc, predicted
+        else:
+            return acc, predicted
+        # if acc > best_acc:
+        #     print('Saving..')
+        #     state = {
+        #         'net': net.state_dict(),
+        #         'acc': acc,
+        #         'epoch': epoch,
+        #     }
+        #     if not os.path.isdir('checkpoint'):
+        #         os.mkdir('checkpoint')
+        #     torch.save(state, './checkpoint/ckpt.pth')
+        #     best_acc = acc
+    except Exception as e:
+        print(f"Error: {e}")
+        gr.Warning(f"Testing Error: {e}")
+models_dict = {
+        #"AlexNet": models.AlexNet(weights=models.AlexNet_Weights.DEFAULT),
+        #"ConvNext_Small": models.convnext_small(weights=models.ConvNeXt_Small_Weights.DEFAULT),
+        #"ConvNext_Base": models.convnext_base(weights=models.ConvNeXt_Base_Weights.DEFAULT),
+        #"ConvNext_Large": models.convnext_large(weights=models.ConvNeXt_Large_Weights.DEFAULT),
+        "DenseNet": models.densenet121(weights=models.DenseNet121_Weights.DEFAULT),
+        #"EfficientNet_B0": models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT),
+        #"GoogLeNet": models.googlenet(weights=models.GoogLeNet_Weights.DEFAULT),
+        # "InceptionNetV3": models.inception_v3(weights=models.Inception_V3_Weights.DEFAULT),
+        # "MaxVit": models.maxvit_t(weights=models.MaxVit_T_Weights.DEFAULT),
+        #"MnasNet0_5": models.mnasnet0_5(weights=models.MNASNet0_5_Weights.DEFAULT),
+        #"MobileNetV2": models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT),
+        "ResNet18": models.resnet18(weights=models.ResNet18_Weights.DEFAULT),
+        "ResNet50": models.resnet50(weights=models.ResNet50_Weights.DEFAULT),
+        #"RegNet_X_400MF": models.regnet_x_400mf(weights=models.RegNet_X_400MF_Weights.DEFAULT),
+        #"ShuffleNet_V2_X0_5": models.shufflenet_v2_x0_5(weights=models.ShuffleNet_V2_X0_5_Weights.DEFAULT),
+        #"SqueezeNet": models.squeezenet1_0(weights=models.SqueezeNet1_0_Weights.DEFAULT),
+        "VGG19": models.vgg19(weights=models.VGG19_Weights.DEFAULT)
+}
+# Store dictionary keys into list for dropdown menu choices
+names = list(models_dict.keys())
+# Optimizer names
+optimizers = ["SGD","Adam"]
+# Scheduler names
+schedulers = ["None","CosineAnnealingLR","ReduceLROnPlateau","StepLR"]
+### GRADIO APP INTERFACE
+def togglepicsettings(choice):
+    yes=gr.Gallery(visible=True)
+    no=gr.Gallery(visible=False)
+    if choice == "Yes":
+        return yes,no
+    else:
+        return no,yes
+def settings(choice):
+    if choice == "Advanced":
+        advanced = [
+            gr.Slider(visible=True),
+            gr.Slider(visible=True),
+            gr.Slider(visible=True),
+            gr.Dropdown(visible=True),
+            gr.Dropdown(visible=True),
+            gr.Radio(visible=True)
+        ]
+        return advanced
+    else:
+        basic = [
+            gr.Slider(visible=False),
+            gr.Slider(visible=False),
+            gr.Slider(visible=False),
+            gr.Dropdown(visible=False),
+            gr.Dropdown(visible=False),
+            gr.Radio(visible=False)
+        ]
+        return basic
+def attacks(choice):
+    if choice == "Yes":
+        yes = [
+            gr.Markdown(visible=True),
+            gr.Radio(visible=True),
+            gr.Radio(visible=True)
+        ]
+        return yes
+    if choice == "No":
+        no = [
+            gr.Markdown(visible=False),
+            gr.Radio(visible=False),
+            gr.Radio(visible=False)
+        ]
+        return no
+def gaussian(choice):
+    if choice == "Yes":
+        yes = [
+            gr.Slider(visible=True),
+            gr.Gallery(visible=True),
+        ]
+        return yes
+    else:
+        no = [
+            gr.Slider(visible=False),
+            gr.Gallery(visible=False),
+        ]
+        return no
+def adversarial(choice):
+    if choice == "Yes":
+        yes = gr.Gallery(visible=True)
+        return yes
+    else:
+        no = gr.Gallery(visible=False)
+## Main app for functionality
+with gr.Blocks(css=".caption-label {display:none}") as functionApp:
+    with gr.Row():
+        gr.Markdown("# CIFAR-10 Model Training GUI")
+    with gr.Row():
+        gr.Markdown("## Parameters")
+    with gr.Row():
+        inp = gr.Dropdown(choices=names, label="Training Model", value="ResNet18", info="Choose one of 13 common models provided in the dropdown to use for training.")
+        username = gr.Textbox(label="Weights and Biases", info="Enter your username or team name from the Weights and Biases API.")
+        epochs_sldr = gr.Slider(label="Number of Epochs", minimum=1, maximum=100, step=1, value=1, info="How many times the model will see the entire dataset during trianing.")
+        with gr.Column():
+            setting_radio = gr.Radio(["Basic", "Advanced"], label="Settings", value="Basic")
+            btn = gr.Button("Run")
+    with gr.Row():
+        train_sldr = gr.Slider(visible=False, label="Training Batch Size", minimum=1, maximum=1000, step=1, value=128, info="The number of training samples processed before the model's internal parameters are updated.")
+        test_sldr = gr.Slider(visible=False, label="Testing Batch Size", minimum=1, maximum=1000, step=1, value=100, info="The number of testing samples processed at once during the evaluation phase.")
+        learning_rate_sldr = gr.Slider(visible=False, label="Learning Rate", minimum=0.0001, maximum=0.1, step=0.0001, value=0.001, info="The learning rate of the optimization program.")
+        optimizer = gr.Dropdown(visible=False, label="Optimizer", choices=optimizers, value="SGD", info="The optimization algorithm used to minimize the loss function during training.")
+        scheduler = gr.Dropdown(visible=False, label="Scheduler", choices=schedulers, value="CosineAnnealingLR", info="The scheduler used to iteratively alter learning rate.")
+        use_attacks = gr.Radio(["Yes", "No"], visible=False, label="Use Attacking Methods?", value="No")
+        setting_radio.change(fn=settings, inputs=setting_radio, outputs=[train_sldr, test_sldr, learning_rate_sldr, optimizer, scheduler, use_attacks])
+    with gr.Row():
+        attack_method = gr.Markdown("## Attacking Methods", visible=False)
+    with gr.Row():
+        use_sigma = gr.Radio(["Yes","No"], visible=False, label="Use Gaussian Noise?", value="No")
+        sigma_sldr = gr.Slider(visible=False, label="Gaussian Noise", minimum=0, maximum=1, value=0, step=0.1, info="The sigma value of the gaussian noise eqaution. A value of 0 disables gaussian noise.")
+        adv_attack = gr.Radio(["Yes","No"], visible=False, label="Use Adversarial Attacks?", value="No")
+    with gr.Row():
+        gr.Markdown("## Training Results")
+    with gr.Row():
+        accuracy = gr.Textbox(label = "Accuracy", info="The validation accuracy of the trained model (accuracy evaluated on testing data).")
+        with gr.Column():
+            showpics = gr.Radio(["Yes","No"], visible = True, label = "Show all pictures?", value = "No")
+            pics = gr.Gallery(preview=False, selected_index=0, object_fit='contain', label="Testing Images")
+            allpics = gr.Gallery(preview=True, selected_index=0, object_fit='contain', label="Full Testing Images",visible = False)
+            showpics.change(fn=togglepicsettings, inputs=[showpics], outputs = [allpics, pics])
+    with gr.Row():
+        gaussian_pics = gr.Gallery(visible=False, preview=False, selected_index=0, object_fit='contain', label="Gaussian Noise")
+        attack_pics = gr.Gallery(visible=False, preview=False, selected_index=0, object_fit='contain', label="Adversarial Attack")
+        use_attacks.change(fn=attacks, inputs=use_attacks, outputs=[attack_method, use_sigma, adv_attack])
+        use_sigma.change(fn=gaussian, inputs=use_sigma, outputs=[sigma_sldr, gaussian_pics])
+        adv_attack.change(fn=adversarial, inputs=adv_attack, outputs=attack_pics)
+        btn.click(fn=main, inputs=[inp, epochs_sldr, train_sldr, test_sldr, learning_rate_sldr, optimizer, sigma_sldr, adv_attack, username, scheduler], outputs=[accuracy, pics, allpics, gaussian_pics, attack_pics])
+## Documentation app (implemented as second tab)
+markdown_file_path = 'documentation.md'
+with open(markdown_file_path, 'r') as file:
+    markdown_content = file.read()
+with gr.Blocks() as documentationApp:
+    with gr.Row():
+        gr.Markdown("# CIFAR-10 Training Interface Documentation")
+    with gr.Row():
+        gr.Markdown(markdown_content) # Can be collapesed in VSCode to hide paragraphs from view. Vscode can also wrap text.
+### LAUNCH APP
+if __name__ == '__main__':
+    mainApp = gr.TabbedInterface([functionApp, documentationApp], ["Welcome", "Documentation"], theme=theme)
+    mainApp.queue()
+    mainApp.launch()

documentation.md ADDED Viewed

	@@ -0,0 +1,39 @@

+# Overview
+This interface facilitates training deep learning models on the CIFAR-10 dataset using PyTorch. Users can select from a
+variety of models, set training parameters, and initiate training to evaluate model performance. Here's more about it:
+# Training Parameters
+In the training parameters section, users can customize the training process by adjusting several settings, sorted into basic and advanced. These parameters collectively allow users to tailor the training process to meet specific computational constraints and performance goals.
+## Basic Settings
+### Model Selection:
+In the model selection section, users have the option to choose from a variety of predefined models, each with its unique architecture and set of parameters. The available models are tailored for different computational capabilities and objectives, thereby offering a diverse range of options for training on the CIFAR-10 dataset. By providing a selection of models, this interface facilitates a more flexible and tailored approach to exploring and understanding the performance of different neural network architectures on the CIFAR-10 dataset. Users can easily switch between models to observe how each performs and to find the one that best meets their requirements.
+### Weights and Biases:
+Username required for weights and biases (wandb) website to save graphs regarding accuracy and loss. If you do not have a wandb account, input "balica15".
+### Number of Epochs:
+The number of epochs controls how many times the entire training dataset is passed forward and backward through the neural network.
+### Run:
+Run the program. Once pressed, the selected model begins training on the CIFAR-10 dataset using the specified training parameters. The training process includes both forward and backward passes through the network, optimizing the model's weights to minimize the loss function.
+## Advanced Settings
+### Training and Testing Batch Sizes:
+The training and testing batch sizes determine the number of samples that will be propagated through the network at one time, affecting the speed and memory usage of the training process.
+### Learning Rate:
+The starting learning rate optimization of the optimizers. The learning rate in the optional schedulers are unable to be edited as they were chosen specifically to heighten accuracy.
+### Optimizer:
+The optimizer selection allows users to choose between different optimization algorithms, namely SGD (Stochastic Gradient Descent) or Adam, which have distinct behaviors and performance characteristics.
+### Scheduler:
+The scheduler selection allows users to choose how they want learning rate to change over the course of the program. There are four options: None, CosineAnnealingLR, ReduceLROnPlateau, and StepLR.
+- None: No scheduler. The learning rate remains constant the entire run.
+- CosineAnnealingLR: The learning rate of each parameter group is determined using a cosine annealing schedule.
+- ReduceLROnPlateau: The learning rate reduces when a parameter stops improving over a certain interval. In this case, if the accuracy stops improving for five epochs straight, the program will lower the learning rate.
+- StepLR: The learning rate decreases at a set rate over a set interval. In this case, every 30 epochs the learning rate decreases by a factor of 0.1.
+### Attacking Methods:
+If attacking methods are enabled, it reveals two mores settings: gaussian noise and advarsarial attack.
+When gaussian noise is enabled, the user can choose a value for the sigma value of gaussian noise, controlling how much it influences the model. A sigma value of 0 disables gaussian noise, even if the setting is enabled.
+The advarsarial attack is simply a toggle that causes almost unnoticable changes to the pictures the model is looking at, which can cause incorrect results.
+# Training Results
+Upon completion of the training across the defined number of epochs, the interface will evaluate the model on the test dataset and display the achieved accuracy, 10 testing pictures per every 10 epochs, the gaussian noise on an image (if enabled), and the advarsarial attack result on an image (if enabled).
+# Warnings
+Any warnings during training will be displayed in a yellow popup at the top right of the interface.
+# Data
+The CIFAR-10 dataset used in this interface comprises 60,000 32x32 color images spread across 10 different classes, with a training set of 50,000 images and a testing set of 10,000 images. Before training, the dataset undergoes specific transformations such as random cropping and normalization to augment the data and standardize the pixel values, respectively. These preprocessing steps help in enhancing the model's ability to learn and generalize well from the data. The interface automatically handles the downloading and preparation of the CIFAR-10 dataset, making it effortless for users to start training models without worrying about data management.

models/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from .vgg import *
+from .dpn import *
+from .lenet import *
+from .senet import *
+from .pnasnet import *
+from .densenet import *
+from .googlenet import *
+from .shufflenet import *
+from .shufflenetv2 import *
+from .resnet import *
+from .resnext import *
+from .preact_resnet import *
+from .mobilenet import *
+from .mobilenetv2 import *
+from .efficientnet import *
+from .regnet import *
+from .dla_simple import *
+from .dla import *

models/densenet.py ADDED Viewed

	@@ -0,0 +1,107 @@

+'''DenseNet in PyTorch.'''
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Bottleneck(nn.Module):
+    def __init__(self, in_planes, growth_rate):
+        super(Bottleneck, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(4*growth_rate)
+        self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
+    def forward(self, x):
+        out = self.conv1(F.relu(self.bn1(x)))
+        out = self.conv2(F.relu(self.bn2(out)))
+        out = torch.cat([out,x], 1)
+        return out
+class Transition(nn.Module):
+    def __init__(self, in_planes, out_planes):
+        super(Transition, self).__init__()
+        self.bn = nn.BatchNorm2d(in_planes)
+        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
+    def forward(self, x):
+        out = self.conv(F.relu(self.bn(x)))
+        out = F.avg_pool2d(out, 2)
+        return out
+class DenseNet(nn.Module):
+    def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
+        super(DenseNet, self).__init__()
+        self.growth_rate = growth_rate
+        num_planes = 2*growth_rate
+        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
+        self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
+        num_planes += nblocks[0]*growth_rate
+        out_planes = int(math.floor(num_planes*reduction))
+        self.trans1 = Transition(num_planes, out_planes)
+        num_planes = out_planes
+        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
+        num_planes += nblocks[1]*growth_rate
+        out_planes = int(math.floor(num_planes*reduction))
+        self.trans2 = Transition(num_planes, out_planes)
+        num_planes = out_planes
+        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
+        num_planes += nblocks[2]*growth_rate
+        out_planes = int(math.floor(num_planes*reduction))
+        self.trans3 = Transition(num_planes, out_planes)
+        num_planes = out_planes
+        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
+        num_planes += nblocks[3]*growth_rate
+        self.bn = nn.BatchNorm2d(num_planes)
+        self.linear = nn.Linear(num_planes, num_classes)
+    def _make_dense_layers(self, block, in_planes, nblock):
+        layers = []
+        for i in range(nblock):
+            layers.append(block(in_planes, self.growth_rate))
+            in_planes += self.growth_rate
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.trans1(self.dense1(out))
+        out = self.trans2(self.dense2(out))
+        out = self.trans3(self.dense3(out))
+        out = self.dense4(out)
+        out = F.avg_pool2d(F.relu(self.bn(out)), 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def DenseNet121():
+    return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
+def DenseNet169():
+    return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
+def DenseNet201():
+    return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
+def DenseNet161():
+    return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
+def densenet_cifar():
+    return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
+def test():
+    net = densenet_cifar()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y)
+# test()

models/dla.py ADDED Viewed

	@@ -0,0 +1,135 @@

+'''DLA in PyTorch.
+Reference:
+    Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class Root(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=1):
+        super(Root, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels, out_channels, kernel_size,
+            stride=1, padding=(kernel_size - 1) // 2, bias=False)
+        self.bn = nn.BatchNorm2d(out_channels)
+    def forward(self, xs):
+        x = torch.cat(xs, 1)
+        out = F.relu(self.bn(self.conv(x)))
+        return out
+class Tree(nn.Module):
+    def __init__(self, block, in_channels, out_channels, level=1, stride=1):
+        super(Tree, self).__init__()
+        self.level = level
+        if level == 1:
+            self.root = Root(2*out_channels, out_channels)
+            self.left_node = block(in_channels, out_channels, stride=stride)
+            self.right_node = block(out_channels, out_channels, stride=1)
+        else:
+            self.root = Root((level+2)*out_channels, out_channels)
+            for i in reversed(range(1, level)):
+                subtree = Tree(block, in_channels, out_channels,
+                               level=i, stride=stride)
+                self.__setattr__('level_%d' % i, subtree)
+            self.prev_root = block(in_channels, out_channels, stride=stride)
+            self.left_node = block(out_channels, out_channels, stride=1)
+            self.right_node = block(out_channels, out_channels, stride=1)
+    def forward(self, x):
+        xs = [self.prev_root(x)] if self.level > 1 else []
+        for i in reversed(range(1, self.level)):
+            level_i = self.__getattr__('level_%d' % i)
+            x = level_i(x)
+            xs.append(x)
+        x = self.left_node(x)
+        xs.append(x)
+        x = self.right_node(x)
+        xs.append(x)
+        out = self.root(xs)
+        return out
+class DLA(nn.Module):
+    def __init__(self, block=BasicBlock, num_classes=10):
+        super(DLA, self).__init__()
+        self.base = nn.Sequential(
+            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(16),
+            nn.ReLU(True)
+        )
+        self.layer1 = nn.Sequential(
+            nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(16),
+            nn.ReLU(True)
+        )
+        self.layer2 = nn.Sequential(
+            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(32),
+            nn.ReLU(True)
+        )
+        self.layer3 = Tree(block,  32,  64, level=1, stride=1)
+        self.layer4 = Tree(block,  64, 128, level=2, stride=2)
+        self.layer5 = Tree(block, 128, 256, level=2, stride=2)
+        self.layer6 = Tree(block, 256, 512, level=1, stride=2)
+        self.linear = nn.Linear(512, num_classes)
+    def forward(self, x):
+        out = self.base(x)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = self.layer5(out)
+        out = self.layer6(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def test():
+    net = DLA()
+    print(net)
+    x = torch.randn(1, 3, 32, 32)
+    y = net(x)
+    print(y.size())
+if __name__ == '__main__':
+    test()

models/dla_simple.py ADDED Viewed

	@@ -0,0 +1,128 @@

+'''Simplified version of DLA in PyTorch.
+Note this implementation is not identical to the original paper version.
+But it seems works fine.
+See dla.py for the original paper version.
+Reference:
+    Deep Layer Aggregation. https://arxiv.org/abs/1707.06484
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class Root(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=1):
+        super(Root, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels, out_channels, kernel_size,
+            stride=1, padding=(kernel_size - 1) // 2, bias=False)
+        self.bn = nn.BatchNorm2d(out_channels)
+    def forward(self, xs):
+        x = torch.cat(xs, 1)
+        out = F.relu(self.bn(self.conv(x)))
+        return out
+class Tree(nn.Module):
+    def __init__(self, block, in_channels, out_channels, level=1, stride=1):
+        super(Tree, self).__init__()
+        self.root = Root(2*out_channels, out_channels)
+        if level == 1:
+            self.left_tree = block(in_channels, out_channels, stride=stride)
+            self.right_tree = block(out_channels, out_channels, stride=1)
+        else:
+            self.left_tree = Tree(block, in_channels,
+                                  out_channels, level=level-1, stride=stride)
+            self.right_tree = Tree(block, out_channels,
+                                   out_channels, level=level-1, stride=1)
+    def forward(self, x):
+        out1 = self.left_tree(x)
+        out2 = self.right_tree(out1)
+        out = self.root([out1, out2])
+        return out
+class SimpleDLA(nn.Module):
+    def __init__(self, block=BasicBlock, num_classes=10):
+        super(SimpleDLA, self).__init__()
+        self.base = nn.Sequential(
+            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(16),
+            nn.ReLU(True)
+        )
+        self.layer1 = nn.Sequential(
+            nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(16),
+            nn.ReLU(True)
+        )
+        self.layer2 = nn.Sequential(
+            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(32),
+            nn.ReLU(True)
+        )
+        self.layer3 = Tree(block,  32,  64, level=1, stride=1)
+        self.layer4 = Tree(block,  64, 128, level=2, stride=2)
+        self.layer5 = Tree(block, 128, 256, level=2, stride=2)
+        self.layer6 = Tree(block, 256, 512, level=1, stride=2)
+        self.linear = nn.Linear(512, num_classes)
+    def forward(self, x):
+        out = self.base(x)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = self.layer5(out)
+        out = self.layer6(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def test():
+    net = SimpleDLA()
+    print(net)
+    x = torch.randn(1, 3, 32, 32)
+    y = net(x)
+    print(y.size())
+if __name__ == '__main__':
+    test()

models/dpn.py ADDED Viewed

	@@ -0,0 +1,98 @@

+'''Dual Path Networks in PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Bottleneck(nn.Module):
+    def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
+        super(Bottleneck, self).__init__()
+        self.out_planes = out_planes
+        self.dense_depth = dense_depth
+        self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
+        self.bn2 = nn.BatchNorm2d(in_planes)
+        self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
+        self.shortcut = nn.Sequential()
+        if first_layer:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(out_planes+dense_depth)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        x = self.shortcut(x)
+        d = self.out_planes
+        out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
+        out = F.relu(out)
+        return out
+class DPN(nn.Module):
+    def __init__(self, cfg):
+        super(DPN, self).__init__()
+        in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
+        num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.last_planes = 64
+        self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
+        self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
+        self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
+        self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
+        self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
+    def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for i,stride in enumerate(strides):
+            layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
+            self.last_planes = out_planes + (i+2) * dense_depth
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def DPN26():
+    cfg = {
+        'in_planes': (96,192,384,768),
+        'out_planes': (256,512,1024,2048),
+        'num_blocks': (2,2,2,2),
+        'dense_depth': (16,32,24,128)
+    }
+    return DPN(cfg)
+def DPN92():
+    cfg = {
+        'in_planes': (96,192,384,768),
+        'out_planes': (256,512,1024,2048),
+        'num_blocks': (3,4,20,3),
+        'dense_depth': (16,32,24,128)
+    }
+    return DPN(cfg)
+def test():
+    net = DPN92()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y)
+# test()

models/efficientnet.py ADDED Viewed

	@@ -0,0 +1,175 @@

+'''EfficientNet in PyTorch.
+Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks".
+Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def swish(x):
+    return x * x.sigmoid()
+def drop_connect(x, drop_ratio):
+    keep_ratio = 1.0 - drop_ratio
+    mask = torch.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
+    mask.bernoulli_(keep_ratio)
+    x.div_(keep_ratio)
+    x.mul_(mask)
+    return x
+class SE(nn.Module):
+    '''Squeeze-and-Excitation block with Swish.'''
+    def __init__(self, in_channels, se_channels):
+        super(SE, self).__init__()
+        self.se1 = nn.Conv2d(in_channels, se_channels,
+                             kernel_size=1, bias=True)
+        self.se2 = nn.Conv2d(se_channels, in_channels,
+                             kernel_size=1, bias=True)
+    def forward(self, x):
+        out = F.adaptive_avg_pool2d(x, (1, 1))
+        out = swish(self.se1(out))
+        out = self.se2(out).sigmoid()
+        out = x * out
+        return out
+class Block(nn.Module):
+    '''expansion + depthwise + pointwise + squeeze-excitation'''
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 expand_ratio=1,
+                 se_ratio=0.,
+                 drop_rate=0.):
+        super(Block, self).__init__()
+        self.stride = stride
+        self.drop_rate = drop_rate
+        self.expand_ratio = expand_ratio
+        # Expansion
+        channels = expand_ratio * in_channels
+        self.conv1 = nn.Conv2d(in_channels,
+                               channels,
+                               kernel_size=1,
+                               stride=1,
+                               padding=0,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(channels)
+        # Depthwise conv
+        self.conv2 = nn.Conv2d(channels,
+                               channels,
+                               kernel_size=kernel_size,
+                               stride=stride,
+                               padding=(1 if kernel_size == 3 else 2),
+                               groups=channels,
+                               bias=False)
+        self.bn2 = nn.BatchNorm2d(channels)
+        # SE layers
+        se_channels = int(in_channels * se_ratio)
+        self.se = SE(channels, se_channels)
+        # Output
+        self.conv3 = nn.Conv2d(channels,
+                               out_channels,
+                               kernel_size=1,
+                               stride=1,
+                               padding=0,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(out_channels)
+        # Skip connection if in and out shapes are the same (MV-V2 style)
+        self.has_skip = (stride == 1) and (in_channels == out_channels)
+    def forward(self, x):
+        out = x if self.expand_ratio == 1 else swish(self.bn1(self.conv1(x)))
+        out = swish(self.bn2(self.conv2(out)))
+        out = self.se(out)
+        out = self.bn3(self.conv3(out))
+        if self.has_skip:
+            if self.training and self.drop_rate > 0:
+                out = drop_connect(out, self.drop_rate)
+            out = out + x
+        return out
+class EfficientNet(nn.Module):
+    def __init__(self, cfg, num_classes=10):
+        super(EfficientNet, self).__init__()
+        self.cfg = cfg
+        self.conv1 = nn.Conv2d(3,
+                               32,
+                               kernel_size=3,
+                               stride=1,
+                               padding=1,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.layers = self._make_layers(in_channels=32)
+        self.linear = nn.Linear(cfg['out_channels'][-1], num_classes)
+    def _make_layers(self, in_channels):
+        layers = []
+        cfg = [self.cfg[k] for k in ['expansion', 'out_channels', 'num_blocks', 'kernel_size',
+                                     'stride']]
+        b = 0
+        blocks = sum(self.cfg['num_blocks'])
+        for expansion, out_channels, num_blocks, kernel_size, stride in zip(*cfg):
+            strides = [stride] + [1] * (num_blocks - 1)
+            for stride in strides:
+                drop_rate = self.cfg['drop_connect_rate'] * b / blocks
+                layers.append(
+                    Block(in_channels,
+                          out_channels,
+                          kernel_size,
+                          stride,
+                          expansion,
+                          se_ratio=0.25,
+                          drop_rate=drop_rate))
+                in_channels = out_channels
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = swish(self.bn1(self.conv1(x)))
+        out = self.layers(out)
+        out = F.adaptive_avg_pool2d(out, 1)
+        out = out.view(out.size(0), -1)
+        dropout_rate = self.cfg['dropout_rate']
+        if self.training and dropout_rate > 0:
+            out = F.dropout(out, p=dropout_rate)
+        out = self.linear(out)
+        return out
+def EfficientNetB0():
+    cfg = {
+        'num_blocks': [1, 2, 2, 3, 3, 4, 1],
+        'expansion': [1, 6, 6, 6, 6, 6, 6],
+        'out_channels': [16, 24, 40, 80, 112, 192, 320],
+        'kernel_size': [3, 3, 5, 3, 5, 5, 3],
+        'stride': [1, 2, 2, 2, 1, 2, 1],
+        'dropout_rate': 0.2,
+        'drop_connect_rate': 0.2,
+    }
+    return EfficientNet(cfg)
+def test():
+    net = EfficientNetB0()
+    x = torch.randn(2, 3, 32, 32)
+    y = net(x)
+    print(y.shape)
+if __name__ == '__main__':
+    test()

models/googlenet.py ADDED Viewed

	@@ -0,0 +1,107 @@

+'''GoogLeNet with PyTorch.'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Inception(nn.Module):
+    def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
+        super(Inception, self).__init__()
+        # 1x1 conv branch
+        self.b1 = nn.Sequential(
+            nn.Conv2d(in_planes, n1x1, kernel_size=1),
+            nn.BatchNorm2d(n1x1),
+            nn.ReLU(True),
+        )
+        # 1x1 conv -> 3x3 conv branch
+        self.b2 = nn.Sequential(
+            nn.Conv2d(in_planes, n3x3red, kernel_size=1),
+            nn.BatchNorm2d(n3x3red),
+            nn.ReLU(True),
+            nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
+            nn.BatchNorm2d(n3x3),
+            nn.ReLU(True),
+        )
+        # 1x1 conv -> 5x5 conv branch
+        self.b3 = nn.Sequential(
+            nn.Conv2d(in_planes, n5x5red, kernel_size=1),
+            nn.BatchNorm2d(n5x5red),
+            nn.ReLU(True),
+            nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
+            nn.BatchNorm2d(n5x5),
+            nn.ReLU(True),
+            nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
+            nn.BatchNorm2d(n5x5),
+            nn.ReLU(True),
+        )
+        # 3x3 pool -> 1x1 conv branch
+        self.b4 = nn.Sequential(
+            nn.MaxPool2d(3, stride=1, padding=1),
+            nn.Conv2d(in_planes, pool_planes, kernel_size=1),
+            nn.BatchNorm2d(pool_planes),
+            nn.ReLU(True),
+        )
+    def forward(self, x):
+        y1 = self.b1(x)
+        y2 = self.b2(x)
+        y3 = self.b3(x)
+        y4 = self.b4(x)
+        return torch.cat([y1,y2,y3,y4], 1)
+class GoogLeNet(nn.Module):
+    def __init__(self):
+        super(GoogLeNet, self).__init__()
+        self.pre_layers = nn.Sequential(
+            nn.Conv2d(3, 192, kernel_size=3, padding=1),
+            nn.BatchNorm2d(192),
+            nn.ReLU(True),
+        )
+        self.a3 = Inception(192,  64,  96, 128, 16, 32, 32)
+        self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
+        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+        self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
+        self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
+        self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
+        self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
+        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
+        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
+        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
+        self.avgpool = nn.AvgPool2d(8, stride=1)
+        self.linear = nn.Linear(1024, 10)
+    def forward(self, x):
+        out = self.pre_layers(x)
+        out = self.a3(out)
+        out = self.b3(out)
+        out = self.maxpool(out)
+        out = self.a4(out)
+        out = self.b4(out)
+        out = self.c4(out)
+        out = self.d4(out)
+        out = self.e4(out)
+        out = self.maxpool(out)
+        out = self.a5(out)
+        out = self.b5(out)
+        out = self.avgpool(out)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def test():
+    net = GoogLeNet()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y.size())
+# test()

models/lenet.py ADDED Viewed

	@@ -0,0 +1,23 @@

+'''LeNet in PyTorch.'''
+import torch.nn as nn
+import torch.nn.functional as F
+class LeNet(nn.Module):
+    def __init__(self):
+        super(LeNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1   = nn.Linear(16*5*5, 120)
+        self.fc2   = nn.Linear(120, 84)
+        self.fc3   = nn.Linear(84, 10)
+    def forward(self, x):
+        out = F.relu(self.conv1(x))
+        out = F.max_pool2d(out, 2)
+        out = F.relu(self.conv2(out))
+        out = F.max_pool2d(out, 2)
+        out = out.view(out.size(0), -1)
+        out = F.relu(self.fc1(out))
+        out = F.relu(self.fc2(out))
+        out = self.fc3(out)
+        return out

models/mobilenet.py ADDED Viewed

	@@ -0,0 +1,61 @@

+'''MobileNet in PyTorch.
+See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
+for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Block(nn.Module):
+    '''Depthwise conv + Pointwise conv'''
+    def __init__(self, in_planes, out_planes, stride=1):
+        super(Block, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_planes)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        return out
+class MobileNet(nn.Module):
+    # (128,2) means conv planes=128, conv stride=2, by default conv stride=1
+    cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
+    def __init__(self, num_classes=10):
+        super(MobileNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.layers = self._make_layers(in_planes=32)
+        self.linear = nn.Linear(1024, num_classes)
+    def _make_layers(self, in_planes):
+        layers = []
+        for x in self.cfg:
+            out_planes = x if isinstance(x, int) else x[0]
+            stride = 1 if isinstance(x, int) else x[1]
+            layers.append(Block(in_planes, out_planes, stride))
+            in_planes = out_planes
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layers(out)
+        out = F.avg_pool2d(out, 2)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def test():
+    net = MobileNet()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y.size())
+# test()

models/mobilenetv2.py ADDED Viewed

	@@ -0,0 +1,86 @@

+'''MobileNetV2 in PyTorch.
+See the paper "Inverted Residuals and Linear Bottlenecks:
+Mobile Networks for Classification, Detection and Segmentation" for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Block(nn.Module):
+    '''expand + depthwise + pointwise'''
+    def __init__(self, in_planes, out_planes, expansion, stride):
+        super(Block, self).__init__()
+        self.stride = stride
+        planes = expansion * in_planes
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn3 = nn.BatchNorm2d(out_planes)
+        self.shortcut = nn.Sequential()
+        if stride == 1 and in_planes != out_planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
+                nn.BatchNorm2d(out_planes),
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out = out + self.shortcut(x) if self.stride==1 else out
+        return out
+class MobileNetV2(nn.Module):
+    # (expansion, out_planes, num_blocks, stride)
+    cfg = [(1,  16, 1, 1),
+           (6,  24, 2, 1),  # NOTE: change stride 2 -> 1 for CIFAR10
+           (6,  32, 3, 2),
+           (6,  64, 4, 2),
+           (6,  96, 3, 1),
+           (6, 160, 3, 2),
+           (6, 320, 1, 1)]
+    def __init__(self, num_classes=10):
+        super(MobileNetV2, self).__init__()
+        # NOTE: change conv1 stride 2 -> 1 for CIFAR10
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.layers = self._make_layers(in_planes=32)
+        self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(1280)
+        self.linear = nn.Linear(1280, num_classes)
+    def _make_layers(self, in_planes):
+        layers = []
+        for expansion, out_planes, num_blocks, stride in self.cfg:
+            strides = [stride] + [1]*(num_blocks-1)
+            for stride in strides:
+                layers.append(Block(in_planes, out_planes, expansion, stride))
+                in_planes = out_planes
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layers(out)
+        out = F.relu(self.bn2(self.conv2(out)))
+        # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def test():
+    net = MobileNetV2()
+    x = torch.randn(2,3,32,32)
+    y = net(x)
+    print(y.size())
+# test()

models/pnasnet.py ADDED Viewed

	@@ -0,0 +1,125 @@

+'''PNASNet in PyTorch.
+Paper: Progressive Neural Architecture Search
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class SepConv(nn.Module):
+    '''Separable Convolution.'''
+    def __init__(self, in_planes, out_planes, kernel_size, stride):
+        super(SepConv, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, out_planes,
+                               kernel_size, stride,
+                               padding=(kernel_size-1)//2,
+                               bias=False, groups=in_planes)
+        self.bn1 = nn.BatchNorm2d(out_planes)
+    def forward(self, x):
+        return self.bn1(self.conv1(x))
+class CellA(nn.Module):
+    def __init__(self, in_planes, out_planes, stride=1):
+        super(CellA, self).__init__()
+        self.stride = stride
+        self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
+        if stride==2:
+            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+            self.bn1 = nn.BatchNorm2d(out_planes)
+    def forward(self, x):
+        y1 = self.sep_conv1(x)
+        y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
+        if self.stride==2:
+            y2 = self.bn1(self.conv1(y2))
+        return F.relu(y1+y2)
+class CellB(nn.Module):
+    def __init__(self, in_planes, out_planes, stride=1):
+        super(CellB, self).__init__()
+        self.stride = stride
+        # Left branch
+        self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
+        self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
+        # Right branch
+        self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
+        if stride==2:
+            self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+            self.bn1 = nn.BatchNorm2d(out_planes)
+        # Reduce channels
+        self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_planes)
+    def forward(self, x):
+        # Left branch
+        y1 = self.sep_conv1(x)
+        y2 = self.sep_conv2(x)
+        # Right branch
+        y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
+        if self.stride==2:
+            y3 = self.bn1(self.conv1(y3))
+        y4 = self.sep_conv3(x)
+        # Concat & reduce channels
+        b1 = F.relu(y1+y2)
+        b2 = F.relu(y3+y4)
+        y = torch.cat([b1,b2], 1)
+        return F.relu(self.bn2(self.conv2(y)))
+class PNASNet(nn.Module):
+    def __init__(self, cell_type, num_cells, num_planes):
+        super(PNASNet, self).__init__()
+        self.in_planes = num_planes
+        self.cell_type = cell_type
+        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(num_planes)
+        self.layer1 = self._make_layer(num_planes, num_cells=6)
+        self.layer2 = self._downsample(num_planes*2)
+        self.layer3 = self._make_layer(num_planes*2, num_cells=6)
+        self.layer4 = self._downsample(num_planes*4)
+        self.layer5 = self._make_layer(num_planes*4, num_cells=6)
+        self.linear = nn.Linear(num_planes*4, 10)
+    def _make_layer(self, planes, num_cells):
+        layers = []
+        for _ in range(num_cells):
+            layers.append(self.cell_type(self.in_planes, planes, stride=1))
+            self.in_planes = planes
+        return nn.Sequential(*layers)
+    def _downsample(self, planes):
+        layer = self.cell_type(self.in_planes, planes, stride=2)
+        self.in_planes = planes
+        return layer
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = self.layer5(out)
+        out = F.avg_pool2d(out, 8)
+        out = self.linear(out.view(out.size(0), -1))
+        return out
+def PNASNetA():
+    return PNASNet(CellA, num_cells=6, num_planes=44)
+def PNASNetB():
+    return PNASNet(CellB, num_cells=6, num_planes=32)
+def test():
+    net = PNASNetB()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y)
+# test()

models/preact_resnet.py ADDED Viewed

	@@ -0,0 +1,118 @@

+'''Pre-activation ResNet in PyTorch.
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Identity Mappings in Deep Residual Networks. arXiv:1603.05027
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class PreActBlock(nn.Module):
+    '''Pre-activation version of the BasicBlock.'''
+    expansion = 1
+    def __init__(self, in_planes, planes, stride=1):
+        super(PreActBlock, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(x))
+        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+        out = self.conv1(out)
+        out = self.conv2(F.relu(self.bn2(out)))
+        out += shortcut
+        return out
+class PreActBottleneck(nn.Module):
+    '''Pre-activation version of the original Bottleneck module.'''
+    expansion = 4
+    def __init__(self, in_planes, planes, stride=1):
+        super(PreActBottleneck, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(x))
+        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+        out = self.conv1(out)
+        out = self.conv2(F.relu(self.bn2(out)))
+        out = self.conv3(F.relu(self.bn3(out)))
+        out += shortcut
+        return out
+class PreActResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super(PreActResNet, self).__init__()
+        self.in_planes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def PreActResNet18():
+    return PreActResNet(PreActBlock, [2,2,2,2])
+def PreActResNet34():
+    return PreActResNet(PreActBlock, [3,4,6,3])
+def PreActResNet50():
+    return PreActResNet(PreActBottleneck, [3,4,6,3])
+def PreActResNet101():
+    return PreActResNet(PreActBottleneck, [3,4,23,3])
+def PreActResNet152():
+    return PreActResNet(PreActBottleneck, [3,8,36,3])
+def test():
+    net = PreActResNet18()
+    y = net((torch.randn(1,3,32,32)))
+    print(y.size())
+# test()

models/regnet.py ADDED Viewed

	@@ -0,0 +1,155 @@

+'''RegNet in PyTorch.
+Paper: "Designing Network Design Spaces".
+Reference: https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficientnet.py
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class SE(nn.Module):
+    '''Squeeze-and-Excitation block.'''
+    def __init__(self, in_planes, se_planes):
+        super(SE, self).__init__()
+        self.se1 = nn.Conv2d(in_planes, se_planes, kernel_size=1, bias=True)
+        self.se2 = nn.Conv2d(se_planes, in_planes, kernel_size=1, bias=True)
+    def forward(self, x):
+        out = F.adaptive_avg_pool2d(x, (1, 1))
+        out = F.relu(self.se1(out))
+        out = self.se2(out).sigmoid()
+        out = x * out
+        return out
+class Block(nn.Module):
+    def __init__(self, w_in, w_out, stride, group_width, bottleneck_ratio, se_ratio):
+        super(Block, self).__init__()
+        # 1x1
+        w_b = int(round(w_out * bottleneck_ratio))
+        self.conv1 = nn.Conv2d(w_in, w_b, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(w_b)
+        # 3x3
+        num_groups = w_b // group_width
+        self.conv2 = nn.Conv2d(w_b, w_b, kernel_size=3,
+                               stride=stride, padding=1, groups=num_groups, bias=False)
+        self.bn2 = nn.BatchNorm2d(w_b)
+        # se
+        self.with_se = se_ratio > 0
+        if self.with_se:
+            w_se = int(round(w_in * se_ratio))
+            self.se = SE(w_b, w_se)
+        # 1x1
+        self.conv3 = nn.Conv2d(w_b, w_out, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(w_out)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or w_in != w_out:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(w_in, w_out,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(w_out)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        if self.with_se:
+            out = self.se(out)
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class RegNet(nn.Module):
+    def __init__(self, cfg, num_classes=10):
+        super(RegNet, self).__init__()
+        self.cfg = cfg
+        self.in_planes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(0)
+        self.layer2 = self._make_layer(1)
+        self.layer3 = self._make_layer(2)
+        self.layer4 = self._make_layer(3)
+        self.linear = nn.Linear(self.cfg['widths'][-1], num_classes)
+    def _make_layer(self, idx):
+        depth = self.cfg['depths'][idx]
+        width = self.cfg['widths'][idx]
+        stride = self.cfg['strides'][idx]
+        group_width = self.cfg['group_width']
+        bottleneck_ratio = self.cfg['bottleneck_ratio']
+        se_ratio = self.cfg['se_ratio']
+        layers = []
+        for i in range(depth):
+            s = stride if i == 0 else 1
+            layers.append(Block(self.in_planes, width,
+                                s, group_width, bottleneck_ratio, se_ratio))
+            self.in_planes = width
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.adaptive_avg_pool2d(out, (1, 1))
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def RegNetX_200MF():
+    cfg = {
+        'depths': [1, 1, 4, 7],
+        'widths': [24, 56, 152, 368],
+        'strides': [1, 1, 2, 2],
+        'group_width': 8,
+        'bottleneck_ratio': 1,
+        'se_ratio': 0,
+    }
+    return RegNet(cfg)
+def RegNetX_400MF():
+    cfg = {
+        'depths': [1, 2, 7, 12],
+        'widths': [32, 64, 160, 384],
+        'strides': [1, 1, 2, 2],
+        'group_width': 16,
+        'bottleneck_ratio': 1,
+        'se_ratio': 0,
+    }
+    return RegNet(cfg)
+def RegNetY_400MF():
+    cfg = {
+        'depths': [1, 2, 7, 12],
+        'widths': [32, 64, 160, 384],
+        'strides': [1, 1, 2, 2],
+        'group_width': 16,
+        'bottleneck_ratio': 1,
+        'se_ratio': 0.25,
+    }
+    return RegNet(cfg)
+def test():
+    net = RegNetX_200MF()
+    print(net)
+    x = torch.randn(2, 3, 32, 32)
+    y = net(x)
+    print(y.shape)
+if __name__ == '__main__':
+    test()

models/resnet.py ADDED Viewed

	@@ -0,0 +1,132 @@

+'''ResNet in PyTorch.
+For Pre-activation ResNet, see 'preact_resnet.py'.
+Reference:
+[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+    Deep Residual Learning for Image Recognition. arXiv:1512.03385
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(self, in_planes, planes, stride=1):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
+                               stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion *
+                               planes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*planes)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*planes,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*planes)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super(ResNet, self).__init__()
+        self.in_planes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512*block.expansion, num_classes)
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def ResNet18():
+    return ResNet(BasicBlock, [2, 2, 2, 2])
+def ResNet34():
+    return ResNet(BasicBlock, [3, 4, 6, 3])
+def ResNet50():
+    return ResNet(Bottleneck, [3, 4, 6, 3])
+def ResNet101():
+    return ResNet(Bottleneck, [3, 4, 23, 3])
+def ResNet152():
+    return ResNet(Bottleneck, [3, 8, 36, 3])
+def test():
+    net = ResNet18()
+    y = net(torch.randn(1, 3, 32, 32))
+    print(y.size())
+# test()

models/resnext.py ADDED Viewed

	@@ -0,0 +1,95 @@

+'''ResNeXt in PyTorch.
+See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Block(nn.Module):
+    '''Grouped convolution block.'''
+    expansion = 2
+    def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
+        super(Block, self).__init__()
+        group_width = cardinality * bottleneck_width
+        self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(group_width)
+        self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
+        self.bn2 = nn.BatchNorm2d(group_width)
+        self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != self.expansion*group_width:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(self.expansion*group_width)
+            )
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class ResNeXt(nn.Module):
+    def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
+        super(ResNeXt, self).__init__()
+        self.cardinality = cardinality
+        self.bottleneck_width = bottleneck_width
+        self.in_planes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(num_blocks[0], 1)
+        self.layer2 = self._make_layer(num_blocks[1], 2)
+        self.layer3 = self._make_layer(num_blocks[2], 2)
+        # self.layer4 = self._make_layer(num_blocks[3], 2)
+        self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
+    def _make_layer(self, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
+            self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
+        # Increase bottleneck_width by 2 after each stage.
+        self.bottleneck_width *= 2
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        # out = self.layer4(out)
+        out = F.avg_pool2d(out, 8)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def ResNeXt29_2x64d():
+    return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
+def ResNeXt29_4x64d():
+    return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
+def ResNeXt29_8x64d():
+    return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
+def ResNeXt29_32x4d():
+    return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
+def test_resnext():
+    net = ResNeXt29_2x64d()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y.size())
+# test_resnext()

models/senet.py ADDED Viewed

	@@ -0,0 +1,121 @@

+'''SENet in PyTorch.
+SENet is the winner of ImageNet-2017. The paper is not released yet.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class BasicBlock(nn.Module):
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_planes != planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes)
+            )
+        # SE layers
+        self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)  # Use nn.Conv2d instead of nn.Linear
+        self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        # Squeeze
+        w = F.avg_pool2d(out, out.size(2))
+        w = F.relu(self.fc1(w))
+        w = F.sigmoid(self.fc2(w))
+        # Excitation
+        out = out * w  # New broadcasting feature from v0.2!
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+class PreActBlock(nn.Module):
+    def __init__(self, in_planes, planes, stride=1):
+        super(PreActBlock, self).__init__()
+        self.bn1 = nn.BatchNorm2d(in_planes)
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
+        if stride != 1 or in_planes != planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
+            )
+        # SE layers
+        self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
+        self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
+    def forward(self, x):
+        out = F.relu(self.bn1(x))
+        shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
+        out = self.conv1(out)
+        out = self.conv2(F.relu(self.bn2(out)))
+        # Squeeze
+        w = F.avg_pool2d(out, out.size(2))
+        w = F.relu(self.fc1(w))
+        w = F.sigmoid(self.fc2(w))
+        # Excitation
+        out = out * w
+        out += shortcut
+        return out
+class SENet(nn.Module):
+    def __init__(self, block, num_blocks, num_classes=10):
+        super(SENet, self).__init__()
+        self.in_planes = 64
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
+        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear = nn.Linear(512, num_classes)
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1]*(num_blocks-1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.layer4(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def SENet18():
+    return SENet(PreActBlock, [2,2,2,2])
+def test():
+    net = SENet18()
+    y = net(torch.randn(1,3,32,32))
+    print(y.size())
+# test()

models/shufflenet.py ADDED Viewed

	@@ -0,0 +1,109 @@

+'''ShuffleNet in PyTorch.
+See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ShuffleBlock(nn.Module):
+    def __init__(self, groups):
+        super(ShuffleBlock, self).__init__()
+        self.groups = groups
+    def forward(self, x):
+        '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
+        N,C,H,W = x.size()
+        g = self.groups
+        return x.view(N,g,C//g,H,W).permute(0,2,1,3,4).reshape(N,C,H,W)
+class Bottleneck(nn.Module):
+    def __init__(self, in_planes, out_planes, stride, groups):
+        super(Bottleneck, self).__init__()
+        self.stride = stride
+        mid_planes = out_planes/4
+        g = 1 if in_planes==24 else groups
+        self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
+        self.bn1 = nn.BatchNorm2d(mid_planes)
+        self.shuffle1 = ShuffleBlock(groups=g)
+        self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
+        self.bn2 = nn.BatchNorm2d(mid_planes)
+        self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
+        self.bn3 = nn.BatchNorm2d(out_planes)
+        self.shortcut = nn.Sequential()
+        if stride == 2:
+            self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.shuffle1(out)
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        res = self.shortcut(x)
+        out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
+        return out
+class ShuffleNet(nn.Module):
+    def __init__(self, cfg):
+        super(ShuffleNet, self).__init__()
+        out_planes = cfg['out_planes']
+        num_blocks = cfg['num_blocks']
+        groups = cfg['groups']
+        self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(24)
+        self.in_planes = 24
+        self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
+        self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
+        self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
+        self.linear = nn.Linear(out_planes[2], 10)
+    def _make_layer(self, out_planes, num_blocks, groups):
+        layers = []
+        for i in range(num_blocks):
+            stride = 2 if i == 0 else 1
+            cat_planes = self.in_planes if i == 0 else 0
+            layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
+            self.in_planes = out_planes
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+def ShuffleNetG2():
+    cfg = {
+        'out_planes': [200,400,800],
+        'num_blocks': [4,8,4],
+        'groups': 2
+    }
+    return ShuffleNet(cfg)
+def ShuffleNetG3():
+    cfg = {
+        'out_planes': [240,480,960],
+        'num_blocks': [4,8,4],
+        'groups': 3
+    }
+    return ShuffleNet(cfg)
+def test():
+    net = ShuffleNetG2()
+    x = torch.randn(1,3,32,32)
+    y = net(x)
+    print(y)
+# test()

models/shufflenetv2.py ADDED Viewed

	@@ -0,0 +1,162 @@

+'''ShuffleNetV2 in PyTorch.
+See the paper "ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design" for more details.
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ShuffleBlock(nn.Module):
+    def __init__(self, groups=2):
+        super(ShuffleBlock, self).__init__()
+        self.groups = groups
+    def forward(self, x):
+        '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
+        N, C, H, W = x.size()
+        g = self.groups
+        return x.view(N, g, C//g, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
+class SplitBlock(nn.Module):
+    def __init__(self, ratio):
+        super(SplitBlock, self).__init__()
+        self.ratio = ratio
+    def forward(self, x):
+        c = int(x.size(1) * self.ratio)
+        return x[:, :c, :, :], x[:, c:, :, :]
+class BasicBlock(nn.Module):
+    def __init__(self, in_channels, split_ratio=0.5):
+        super(BasicBlock, self).__init__()
+        self.split = SplitBlock(split_ratio)
+        in_channels = int(in_channels * split_ratio)
+        self.conv1 = nn.Conv2d(in_channels, in_channels,
+                               kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_channels)
+        self.conv2 = nn.Conv2d(in_channels, in_channels,
+                               kernel_size=3, stride=1, padding=1, groups=in_channels, bias=False)
+        self.bn2 = nn.BatchNorm2d(in_channels)
+        self.conv3 = nn.Conv2d(in_channels, in_channels,
+                               kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(in_channels)
+        self.shuffle = ShuffleBlock()
+    def forward(self, x):
+        x1, x2 = self.split(x)
+        out = F.relu(self.bn1(self.conv1(x2)))
+        out = self.bn2(self.conv2(out))
+        out = F.relu(self.bn3(self.conv3(out)))
+        out = torch.cat([x1, out], 1)
+        out = self.shuffle(out)
+        return out
+class DownBlock(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(DownBlock, self).__init__()
+        mid_channels = out_channels // 2
+        # left
+        self.conv1 = nn.Conv2d(in_channels, in_channels,
+                               kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False)
+        self.bn1 = nn.BatchNorm2d(in_channels)
+        self.conv2 = nn.Conv2d(in_channels, mid_channels,
+                               kernel_size=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(mid_channels)
+        # right
+        self.conv3 = nn.Conv2d(in_channels, mid_channels,
+                               kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(mid_channels)
+        self.conv4 = nn.Conv2d(mid_channels, mid_channels,
+                               kernel_size=3, stride=2, padding=1, groups=mid_channels, bias=False)
+        self.bn4 = nn.BatchNorm2d(mid_channels)
+        self.conv5 = nn.Conv2d(mid_channels, mid_channels,
+                               kernel_size=1, bias=False)
+        self.bn5 = nn.BatchNorm2d(mid_channels)
+        self.shuffle = ShuffleBlock()
+    def forward(self, x):
+        # left
+        out1 = self.bn1(self.conv1(x))
+        out1 = F.relu(self.bn2(self.conv2(out1)))
+        # right
+        out2 = F.relu(self.bn3(self.conv3(x)))
+        out2 = self.bn4(self.conv4(out2))
+        out2 = F.relu(self.bn5(self.conv5(out2)))
+        # concat
+        out = torch.cat([out1, out2], 1)
+        out = self.shuffle(out)
+        return out
+class ShuffleNetV2(nn.Module):
+    def __init__(self, net_size):
+        super(ShuffleNetV2, self).__init__()
+        out_channels = configs[net_size]['out_channels']
+        num_blocks = configs[net_size]['num_blocks']
+        self.conv1 = nn.Conv2d(3, 24, kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(24)
+        self.in_channels = 24
+        self.layer1 = self._make_layer(out_channels[0], num_blocks[0])
+        self.layer2 = self._make_layer(out_channels[1], num_blocks[1])
+        self.layer3 = self._make_layer(out_channels[2], num_blocks[2])
+        self.conv2 = nn.Conv2d(out_channels[2], out_channels[3],
+                               kernel_size=1, stride=1, padding=0, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_channels[3])
+        self.linear = nn.Linear(out_channels[3], 10)
+    def _make_layer(self, out_channels, num_blocks):
+        layers = [DownBlock(self.in_channels, out_channels)]
+        for i in range(num_blocks):
+            layers.append(BasicBlock(out_channels))
+            self.in_channels = out_channels
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        # out = F.max_pool2d(out, 3, stride=2, padding=1)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = F.relu(self.bn2(self.conv2(out)))
+        out = F.avg_pool2d(out, 4)
+        out = out.view(out.size(0), -1)
+        out = self.linear(out)
+        return out
+configs = {
+    0.5: {
+        'out_channels': (48, 96, 192, 1024),
+        'num_blocks': (3, 7, 3)
+    },
+    1: {
+        'out_channels': (116, 232, 464, 1024),
+        'num_blocks': (3, 7, 3)
+    },
+    1.5: {
+        'out_channels': (176, 352, 704, 1024),
+        'num_blocks': (3, 7, 3)
+    },
+    2: {
+        'out_channels': (224, 488, 976, 2048),
+        'num_blocks': (3, 7, 3)
+    }
+}
+def test():
+    net = ShuffleNetV2(net_size=0.5)
+    x = torch.randn(3, 3, 32, 32)
+    y = net(x)
+    print(y.shape)
+# test()

models/vgg.py ADDED Viewed

	@@ -0,0 +1,47 @@

+'''VGG11/13/16/19 in Pytorch.'''
+import torch
+import torch.nn as nn
+cfg = {
+    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
+    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
+    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
+}
+class VGG(nn.Module):
+    def __init__(self, vgg_name):
+        super(VGG, self).__init__()
+        self.features = self._make_layers(cfg[vgg_name])
+        self.classifier = nn.Linear(512, 10)
+    def forward(self, x):
+        out = self.features(x)
+        out = out.view(out.size(0), -1)
+        out = self.classifier(out)
+        return out
+    def _make_layers(self, cfg):
+        layers = []
+        in_channels = 3
+        for x in cfg:
+            if x == 'M':
+                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
+            else:
+                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
+                           nn.BatchNorm2d(x),
+                           nn.ReLU(inplace=True)]
+                in_channels = x
+        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
+        return nn.Sequential(*layers)
+def test():
+    net = VGG('VGG11')
+    x = torch.randn(2,3,32,32)
+    y = net(x)
+    print(y.size())
+# test()

requirements.txt ADDED Viewed

Binary file (8.01 kB). View file

utils.py ADDED Viewed

	@@ -0,0 +1,127 @@

+'''Some helper functions for PyTorch, including:
+    - get_mean_and_std: calculate the mean and std value of dataset.
+    - msr_init: net parameter initialization.
+    - progress_bar: progress bar mimic xlua.progress.
+'''
+import os
+import sys
+import time
+import math
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+def get_mean_and_std(dataset):
+    '''Compute the mean and std value of dataset.'''
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
+    mean = torch.zeros(3)
+    std = torch.zeros(3)
+    print('==> Computing mean and std..')
+    for inputs, targets in dataloader:
+        for i in range(3):
+            mean[i] += inputs[:,i,:,:].mean()
+            std[i] += inputs[:,i,:,:].std()
+    mean.div_(len(dataset))
+    std.div_(len(dataset))
+    return mean, std
+def init_params(net):
+    '''Init layer parameters.'''
+    for m in net.modules():
+        if isinstance(m, nn.Conv2d):
+            init.kaiming_normal(m.weight, mode='fan_out')
+            if m.bias:
+                init.constant(m.bias, 0)
+        elif isinstance(m, nn.BatchNorm2d):
+            init.constant(m.weight, 1)
+            init.constant(m.bias, 0)
+        elif isinstance(m, nn.Linear):
+            init.normal(m.weight, std=1e-3)
+            if m.bias:
+                init.constant(m.bias, 0)
+term_width = os.popen('stty size', 'r').read().split()
+# term_width = int(term_width)
+print(f"Term Width: {(term_width)}")
+TOTAL_BAR_LENGTH = 65.
+last_time = time.time()
+begin_time = last_time
+def progress_bar(current, total, msg=None):
+    global last_time, begin_time
+    if current == 0:
+        begin_time = time.time()  # Reset for new bar.
+    cur_len = int(TOTAL_BAR_LENGTH*current/total)
+    rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
+    sys.stdout.write(' [')
+    for i in range(cur_len):
+        sys.stdout.write('=')
+    sys.stdout.write('>')
+    for i in range(rest_len):
+        sys.stdout.write('.')
+    sys.stdout.write(']')
+    cur_time = time.time()
+    step_time = cur_time - last_time
+    last_time = cur_time
+    tot_time = cur_time - begin_time
+    L = []
+    L.append('  Step: %s' % format_time(step_time))
+    L.append(' | Tot: %s' % format_time(tot_time))
+    if msg:
+        L.append(' | ' + msg)
+    msg = ''.join(L)
+    sys.stdout.write(msg)
+    for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
+        sys.stdout.write(' ')
+    # Go back to the center of the bar.
+    for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
+        sys.stdout.write('\b')
+    sys.stdout.write(' %d/%d ' % (current+1, total))
+    if current < total-1:
+        sys.stdout.write('\r')
+    else:
+        sys.stdout.write('\n')
+    sys.stdout.flush()
+def format_time(seconds):
+    days = int(seconds / 3600/24)
+    seconds = seconds - days*3600*24
+    hours = int(seconds / 3600)
+    seconds = seconds - hours*3600
+    minutes = int(seconds / 60)
+    seconds = seconds - minutes*60
+    secondsf = int(seconds)
+    seconds = seconds - secondsf
+    millis = int(seconds*1000)
+    f = ''
+    i = 1
+    if days > 0:
+        f += str(days) + 'D'
+        i += 1
+    if hours > 0 and i <= 2:
+        f += str(hours) + 'h'
+        i += 1
+    if minutes > 0 and i <= 2:
+        f += str(minutes) + 'm'
+        i += 1
+    if secondsf > 0 and i <= 2:
+        f += str(secondsf) + 's'
+        i += 1
+    if millis > 0 and i <= 2:
+        f += str(millis) + 'ms'
+        i += 1
+    if f == '':
+        f = '0ms'
+    return f