Spaces:

subatomicseer
/

2022-AdaIN-pytorch-Demo

Runtime error

App Files Files Community

subatomicseer commited on May 13, 2022

Commit

7999e5a

1 Parent(s): fb09963

Initial Commit

Browse files

Files changed (12) hide show

.gitignore +3 -0
AdaIN.py +73 -0
Network.py +73 -0
README.md +4 -4
app.py +95 -0
examples/content/brad_pitt.jpg +0 -0
examples/img.png +0 -0
examples/style/flower_of_life.jpg +0 -0
examples/style/sketch.jpg +0 -0
infer_func.py +60 -0
requirements.txt +11 -0
utils.py +144 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.idea/
+__pycache__/
+*.pth

AdaIN.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import torch
+import torch.nn as nn
+from Network import vgg19, decoder
+from utils import adaptive_instance_normalization
+class AdaINNet(nn.Module):
+    """
+    AdaIN Style Transfer Network
+    Args:
+        vgg_weight: pretrained vgg19 weight
+    """
+    def __init__(self, vgg_weight):
+        super().__init__()
+        self.encoder = vgg19(vgg_weight)
+        # drop layers after 4_1
+        self.encoder = nn.Sequential(*list(self.encoder.children())[:22])
+        # No optimization for encoder
+        for parameter in self.encoder.parameters():
+            parameter.requires_grad = False
+        self.decoder = decoder()
+        self.mseloss = nn.MSELoss()
+    """
+    Computes style loss of two images
+    Args:
+        x (torch.FloatTensor): content image tensor
+        y (torch.FloatTensor): style image tensor
+    Return:
+        Mean Squared Error between x.mean, y.mean and MSE between x.std, y.std
+    """
+    def _style_loss(self, x, y):
+        return self.mseloss(torch.mean(x, dim=[2, 3]), torch.mean(y, dim=[2, 3])) + \
+            self.mseloss(torch.std(x, dim=[2, 3]), torch.std(y, dim=[2, 3]))
+    def forward(self, content, style, alpha=1.0):
+        # Generate image features
+        content_enc = self.encoder(content)
+        style_enc = self.encoder(style)
+        # Perform style transfer on feature space
+        transfer_enc = adaptive_instance_normalization(content_enc, style_enc)
+        # Generate outptu image
+        out = self.decoder(transfer_enc)
+        # vgg19 layer relu1_1
+        style_relu11 = self.encoder[:3](style)
+        out_relu11 = self.encoder[:3](out)
+        # vgg19 layer relu2_1
+        style_relu21 = self.encoder[3:8](style_relu11)
+        out_relu21 = self.encoder[3:8](out_relu11)
+        # vgg19 layer relu3_1
+        style_relu31 = self.encoder[8:13](style_relu21)
+        out_relu31 = self.encoder[8:13](out_relu21)
+        # vgg19 layer relu4_1
+        out_enc = self.encoder[13:](out_relu31)
+        # Calculate loss
+        content_loss = self.mseloss(out_enc, transfer_enc)
+        style_loss = self._style_loss(out_relu11, style_relu11) + self._style_loss(out_relu21, style_relu21) + \
+            self._style_loss(out_relu31, style_relu31) + self._style_loss(out_enc, style_enc)
+        return content_loss, style_loss

Network.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import torch.nn as nn
+vgg19_cfg = [3, 64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M"]
+decoder_cfg = [512, 256, "U", 256, 256, 256, 128, "U", 128, 64, 'U', 64, 3]
+def vgg19(weights=None):
+    """
+    Build vgg19 network. Load weights if weights are given.
+    Args:
+        weights (dict): vgg19 pretrained weights
+    Return:
+        layers (nn.Sequential): vgg19 layers
+    """
+    modules = make_block(vgg19_cfg)
+    modules = [nn.Conv2d(3, 3, kernel_size=1)] + list(modules.children())
+    layers = nn.Sequential(*modules)
+    if weights:
+        layers.load_state_dict(weights)
+    return layers
+def decoder(weights=None):
+    """
+    Build decoder network. Load weights if weights are given.
+    Args:
+        weights (dict): decoder pretrained weights
+    Return:
+        layers (nn.Sequential): decoder layers
+    """
+    modules = make_block(decoder_cfg)
+    layers = nn.Sequential(*list(modules.children())[:-1]) # no relu at the last layer
+    if weights:
+        layers.load_state_dict(weights)
+    return layers
+def make_block(config):
+    """
+    Helper function for building blocks of convolutional layers.
+    Args:
+        config (list): List of layer configs. "M"
+            "M" - Max pooling layer.
+            "U" - Upsampling layer.
+            i (int) - Convolutional layer (i filters) plus ReLU activation.
+    Return:
+        layers (nn.Sequential): block layers
+    """
+    layers = []
+    in_channels = config[0]
+    for c in config[1:]:
+        if c == "M":
+            layers.append(nn.MaxPool2d(kernel_size=2, stride=2, padding=0))
+        elif c == "U":
+            layers.append(nn.Upsample(scale_factor=2, mode='nearest'))
+        else:
+            assert(isinstance(c, int))
+            layers.append(nn.Conv2d(in_channels, c, kernel_size=3, padding=1))
+            layers.append(nn.ReLU(inplace=True))
+            in_channels = c
+    return nn.Sequential(*layers)

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: 2022 AdaIN Pytorch Demo
-emoji: ⚡
-colorFrom: blue
-colorTo: gray
 sdk: streamlit
 sdk_version: 1.2.0
 app_file: app.py

 ---
+title: AdaIN
+emoji: 📚
+colorFrom: red
+colorTo: indigo
 sdk: streamlit
 sdk_version: 1.2.0
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import os
+import streamlit as st
+import gdown
+from packaging.version import Version
+from infer_func import convert
+ROOT = os.path.dirname(os.path.abspath(__file__))
+EXAMPLES = {
+    'content': {
+        'Brad Pitt': ROOT + '/examples/content/brad_pitt.jpg'
+    },
+    'style': {
+        'Flower of Life': ROOT + '/examples/style/flower_of_life.jpg'
+    }
+}
+VGG_WEIGHT_URL = 'https://drive.google.com/uc?id=1UcSl-Zn3byEmn15NIPXMf9zaGCKc2gfx'
+DECODER_WEIGHT_URL = 'https://drive.google.com/uc?id=18JpLtMOapA-vwBz-LRomyTl24A9GwhTF'
+VGG_WEIGHT_FILENAME = ROOT + '/vgg.pth'
+DECODER_WEIGHT_FILENAME = ROOT + '/decoder.pth'
+@st.cache
+def download_models():
+    with st.spinner(text="Downloading VGG weights..."):
+        gdown.download(VGG_WEIGHT_URL, output=VGG_WEIGHT_FILENAME)
+    with st.spinner(text="Downloading Decoder weights..."):
+        gdown.download(DECODER_WEIGHT_URL, output=DECODER_WEIGHT_FILENAME)
+def image_getter(image_kind):
+    image = None
+    options = ['Use Example Image', 'Upload Image']
+    if Version(st.__version__) >= Version('1.4.0'):
+        options.append('Open Camera')
+    option = st.selectbox(
+        'Choose Image',
+        options, key=image_kind)
+    if option == 'Use Example Image':
+        image_key = st.selectbox(
+            'Choose from examples',
+            EXAMPLES[image_kind], key=image_kind)
+        image = EXAMPLES[image_kind][image_key]
+    elif option == 'Upload Image':
+        image = st.file_uploader("Upload an image", type=['png', 'jpg', 'PNG', 'JPG', 'JPEG'], key=image_kind)
+    elif option == 'Open Camera':
+        image = st.camera_input('', key=image_kind)
+    return image
+if __name__ == '__main__':
+    st.set_page_config(layout="wide")
+    st.header('Adaptive Instance Normalization demo based on '
+              '[2022-AdaIN-pytorch](https://github.com/media-comp/2022-AdaIN-pytorch)')
+    download_models()
+    # col1, col2, col3, col4 = st.columns((2, 2, 1, 3))
+    col1, col2, col3 = st.columns((3, 4, 4))
+    with col1:
+        st.subheader('Content Image')
+        content = image_getter('content')
+        st.subheader('Style Image')
+        style = image_getter('style')
+    with col2:
+        img1 = content if content is not None else 'examples/img.png'
+        img2 = style if style is not None else 'examples/img.png'
+        if img1 is not None:
+            st.image(img1, width=None, caption='Content Image')
+        if img2 is not None:
+            st.image(img2, width=None, caption='Style Image')
+    with col3:
+        color_control = st.checkbox('Preserve content image color')
+        alpha = st.slider('Strength of style transfer', 0.0, 1.0, 1.0, 0.01)
+        process = st.button('Stylize')
+    if content is not None and style is not None and process:
+        print(content, style)
+        with col3:
+            with st.spinner('Processing...'):
+                output_image = convert(content, style, VGG_WEIGHT_FILENAME, DECODER_WEIGHT_FILENAME, alpha, color_control)
+            st.image(output_image, width=None, caption='Stylized Image')

examples/content/brad_pitt.jpg ADDED Viewed

examples/img.png ADDED Viewed

examples/style/flower_of_life.jpg ADDED Viewed

examples/style/sketch.jpg ADDED Viewed

infer_func.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import torch
+import torchvision.transforms
+from PIL import Image
+from AdaIN import AdaINNet
+from utils import adaptive_instance_normalization, transform, linear_histogram_matching
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+def style_transfer(content_tensor, style_tensor, encoder, decoder, alpha=1.0):
+    """
+    Given content image and style image, generate feature maps with encoder, apply
+    neural style transfer with adaptive instance normalization, generate output image
+    with decoder
+    Args:
+        content_tensor (torch.FloatTensor): Content image
+        style_tensor (torch.FloatTensor): Style Image
+        encoder: Encoder (vgg19) network
+        decoder: Decoder network
+        alpha (float, default=1.0): Weight of style image feature
+    Return:
+        output_tensor (torch.FloatTensor): Style Transfer output image
+    """
+    content_enc = encoder(content_tensor)
+    style_enc = encoder(style_tensor)
+    transfer_enc = adaptive_instance_normalization(content_enc, style_enc)
+    mix_enc = alpha * transfer_enc + (1 - alpha) * content_enc
+    return decoder(mix_enc)
+def convert(content_path, style_path, vgg_weights_path, decoder_weights_path, alpha, color_control):
+    vgg = torch.load(vgg_weights_path)
+    model = AdaINNet(vgg).to(device)
+    model.decoder.load_state_dict(torch.load(decoder_weights_path))
+    model.eval()
+    # Prepare image transform
+    t = transform(512)
+    # load images
+    content_img = Image.open(content_path)
+    content_tensor = t(content_img).unsqueeze(0).to(device)
+    style_tensor = t(Image.open(style_path)).unsqueeze(0).to(device)
+    if color_control:
+        style_tensor = linear_histogram_matching(content_tensor, style_tensor)
+    with torch.no_grad():
+        out_tensor = style_transfer(content_tensor, style_tensor, model.encoder, model.decoder, alpha).cpu()
+    output_image = torchvision.transforms.ToPILImage()(out_tensor.squeeze(0))
+    return output_image

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+torch==1.10.1
+torchvision==0.11.2
+opencv-python==4.5.1.48
+numpy == 1.18.4
+Pillow==8.4.0
+tqdm==4.62.3
+imageio==2.9.0
+imageio-ffmpeg==0.4.6
+matplotlib==3.3.2
+gdown
+packaging

utils.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import os
+from PIL import Image, ImageFile
+import torch
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+import matplotlib.pyplot as plt
+from pathlib import Path
+from glob import glob
+def adaptive_instance_normalization(x, y, eps=1e-5):
+	"""
+	Adaptive Instance Normalization. Perform neural style transfer given content image x
+	and style image y.
+	Args:
+		x (torch.FloatTensor): Content image tensor
+		y (torch.FloatTensor): Style image tensor
+		eps (float, default=1e-5): Small value to avoid zero division
+	Return:
+		output (torch.FloatTensor): AdaIN style transferred output
+	"""
+	mu_x = torch.mean(x, dim=[2, 3])
+	mu_y = torch.mean(y, dim=[2, 3])
+	mu_x = mu_x.unsqueeze(-1).unsqueeze(-1)
+	mu_y = mu_y.unsqueeze(-1).unsqueeze(-1)
+	sigma_x = torch.std(x, dim=[2, 3])
+	sigma_y = torch.std(y, dim=[2, 3])
+	sigma_x = sigma_x.unsqueeze(-1).unsqueeze(-1) + eps
+	sigma_y = sigma_y.unsqueeze(-1).unsqueeze(-1) + eps
+	return (x - mu_x) / sigma_x * sigma_y  + mu_y
+def transform(size):
+	"""
+	Image preprocess transformation. Resize image and convert to tensor.
+	Args:
+		size (int): Resize image size
+	Return:
+		output (torchvision.transforms): Composition of torchvision.transforms steps
+	"""
+	t = []
+	t.append(transforms.Resize(size))
+	t.append(transforms.ToTensor())
+	t = transforms.Compose(t)
+	return t
+def grid_image(row, col, images, height=6, width=6, save_pth='grid.png'):
+	"""
+	Generate and save an image that contains row x col grids of images.
+	Args:
+		row (int): number of rows
+		col (int): number of columns
+		images (list of PIL image): list of images.
+		height (int) : height of each image (inch)
+		width (int) : width of eac image (inch)
+		save_pth (str): save file path
+	"""
+	width = col * width
+	height = row * height
+	plt.figure(figsize=(width, height))
+	for i, image in enumerate(images):
+		plt.subplot(row, col, i+1)
+		plt.imshow(image)
+		plt.axis('off')
+		plt.subplots_adjust(wspace=0.01, hspace=0.01)
+	plt.savefig(save_pth)
+def linear_histogram_matching(content_tensor, style_tensor):
+	"""
+	Given content_tensor and style_tensor, transform style_tensor histogram to that of content_tensor.
+	Args:
+		content_tensor (torch.FloatTensor): Content image
+		style_tensor (torch.FloatTensor): Style Image
+	Return:
+		style_tensor (torch.FloatTensor): histogram matched Style Image
+	"""
+    #for batch
+	for b in range(len(content_tensor)):
+		std_ct = []
+		std_st = []
+		mean_ct = []
+		mean_st = []
+		#for channel
+		for c in range(len(content_tensor[b])):
+			std_ct.append(torch.var(content_tensor[b][c],unbiased = False))
+			mean_ct.append(torch.mean(content_tensor[b][c]))
+			std_st.append(torch.var(style_tensor[b][c],unbiased = False))
+			mean_st.append(torch.mean(style_tensor[b][c]))
+			style_tensor[b][c] = (style_tensor[b][c] - mean_st[c]) * std_ct[c] / std_st[c] + mean_ct[c]
+	return style_tensor
+class TrainSet(Dataset):
+	"""
+	Build Training dataset
+	"""
+	def __init__(self, content_dir, style_dir, crop_size = 256):
+		super().__init__()
+		self.content_files = [Path(f) for f in glob(content_dir+'/*')]
+		self.style_files = [Path(f) for f in glob(style_dir+'/*')]
+		self.transform = transforms.Compose([
+			transforms.Resize(512, interpolation=transforms.InterpolationMode.BICUBIC),
+			transforms.RandomCrop(crop_size),
+			transforms.ToTensor(),
+			transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
+			])
+		Image.MAX_IMAGE_PIXELS = None
+		ImageFile.LOAD_TRUNCATED_IMAGES = True
+	def __len__(self):
+		return min(len(self.style_files), len(self.content_files))
+	def __getitem__(self, index):
+		content_img = Image.open(self.content_files[index]).convert('RGB')
+		style_img = Image.open(self.style_files[index]).convert('RGB')
+		content_sample = self.transform(content_img)
+		style_sample = self.transform(style_img)
+		return content_sample, style_sample
+class Range(object):
+	"""
+	Helper class for input argument range restriction
+	"""
+	def __init__(self, start, end):
+		self.start = start
+		self.end = end
+	def __eq__(self, other):
+		return self.start <= other <= self.end