Spaces:

tidalove
/

adain

Sleeping

App Files Files Community

Chengkai Yang commited on Apr 18, 2022

Commit

7930ce0

0 Parent(s):

init

Browse files

Files changed (9) hide show

AdaIN.py +54 -0
Network.py +73 -0
README.md +132 -0
test.py +125 -0
test_interpolate.py +126 -0
test_style_transfer.py +58 -0
test_video.py +107 -0
train.py +100 -0
utils.py +117 -0

AdaIN.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import torch
+import torch.nn as nn
+from Network import vgg19, decoder
+from utils import adaptive_instance_normalization
+class AdaINNet(nn.Module):
+    """
+    AdaIN Style Transfer Network
+    Args:
+        vgg_weight: pretrained vgg19 weight
+    """
+    def __init__(self, vgg_weight):
+        super().__init__()
+        self.encoder = vgg19(vgg_weight)
+        self.encoder = nn.Sequential(*list(self.encoder.children())[:22]) # drop layers after 4_1
+        for parameter in self.encoder.parameters():
+            parameter.requires_grad = False
+        self.decoder = decoder()
+        self.mseloss = nn.MSELoss()
+    def _style_loss(self, x, y):
+        return self.mseloss(torch.mean(x, dim=[2, 3]), torch.mean(y, dim=[2, 3])) + \
+            self.mseloss(torch.std(x, dim=[2, 3]), torch.std(y, dim=[2, 3]))
+    def forward(self, content, style, alpha=1.0):
+        content_enc = self.encoder(content)
+        style_enc = self.encoder(style)
+        transfer_enc = adaptive_instance_normalization(content_enc, style_enc)
+        out = self.decoder(transfer_enc)
+        # vgg19 layer relu1_1
+        style_relu11 = self.encoder[:3](style)
+        out_relu11 = self.encoder[:3](out)
+        # vgg19 layer relu2_1
+        style_relu21 = self.encoder[3:8](style_relu11)
+        out_relu21 = self.encoder[3:8](out_relu11)
+        # vgg19 layer relu3_1
+        style_relu31 = self.encoder[8:13](style_relu21)
+        out_relu31 = self.encoder[8:13](out_relu21)
+        # vgg19 layer relu4_1
+        out_enc = self.encoder[13:](out_relu31)
+        content_loss = self.mseloss(out_enc, transfer_enc)
+        style_loss = self._style_loss(out_relu11, style_relu11) + self._style_loss(out_relu21, style_relu21) + \
+            self._style_loss(out_relu31, style_relu31) + self._style_loss(out_enc, style_enc)
+        return content_loss, style_loss

Network.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import torch.nn as nn
+vgg19_cfg = [3, 64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M"]
+decoder_cfg = [512, 256, "U", 256, 256, 256, 128, "U", 128, 64, 'U', 64, 3]
+def vgg19(weights=None):
+    """
+    Build vgg19 network. Load weights if weights are given.
+    Args:
+        weights (dict): vgg19 pretrained weights
+    Return:
+        layers (nn.Sequential): vgg19 layers
+    """
+    modules = make_block(vgg19_cfg)
+    modules = [nn.Conv2d(3, 3, kernel_size=1)] + list(modules.children())
+    layers = nn.Sequential(*modules)
+    if weights:
+        layers.load_state_dict(weights)
+    return layers
+def decoder(weights=None):
+    """
+    Build decoder network. Load weights if weights are given.
+    Args:
+        weights (dict): decoder pretrained weights
+    Return:
+        layers (nn.Sequential): decoder layers
+    """
+    modules = make_block(decoder_cfg)
+    layers = nn.Sequential(*list(modules.children())[:-1]) # no relu at the last layer
+    if weights:
+        layers.load_state_dict(weights)
+    return layers
+def make_block(config):
+    """
+    Helper function for building blocks of convolutional layers.
+    Args:
+        config (list): List of layer configs. "M"
+            "M" - Max pooling layer.
+            "U" - Upsampling layer.
+            i (int) - Convolutional layer (i filters) plus ReLU activation.
+    Return:
+        layers (nn.Sequential): block layers
+    """
+    layers = []
+    in_channels = config[0]
+    for c in config[1:]:
+        if c == "M":
+            layers.append(nn.MaxPool2d(kernel_size=2, stride=2, padding=0))
+        elif c == "U":
+            layers.append(nn.Upsample(scale_factor=2, mode='nearest'))
+        else:
+            assert(isinstance(c, int))
+            layers.append(nn.Conv2d(in_channels, c, kernel_size=3, padding=1))
+            layers.append(nn.ReLU(inplace=True))
+            in_channels = c
+    return nn.Sequential(*layers)

README.md ADDED Viewed

	@@ -0,0 +1,132 @@

+2022-AdaIN-pytorch
+============================
+This is an unofficial Pytorch implementation of the paper, Style Transfer with Adaptive Instance Normalization [arxiv](https://arxiv.org/abs/1703.06868). I referred to the [official implementation](https://github.com/xunhuang1995/AdaIN-style) in Torch. I used pretrained weights for vgg19 and decoder from [naoto0804](https://github.com/naoto0804/pytorch-AdaIN).
+Requirement
+----------------------------
+* Python 3.7
+* PyTorch 1.10
+* Pillow
+* TorchVision
+* Numpy
+* tqdm
+Usage
+----------------------------
+### Training
+The encoder uses pretrained vgg19 network. Download the [weight of vgg19](https://drive.google.com/file/d/1UcSl-Zn3byEmn15NIPXMf9zaGCKc2gfx/view?usp=sharing). The decoder is trained on MSCOCO and wikiart dataset.
+Run the script train.py
+```
+$ python train.py --content_dir $CONTENT_DIR --style_dir STYLE_DIR --cuda
+usage: train.py [-h] [--content_dir CONTENT_DIR] [--style_dir STYLE_DIR]
+                [--epochs EPOCHS] [--batch_size BATCH_SIZE] [--resume RESUME] [--cuda]
+optional arguments:
+  -h, --help            show this help message and exit
+  --content_dir CONTENT_DIR
+                        content images folder path
+  --style_dir STYLE_DIR
+                        style images folder path
+  --epochs EPOCHS       Number of epoch
+  --batch_size BATCH_SIZE
+                        Batch size
+  --resume RESUME       Continue training from epoch
+  --cuda                Use CUDA
+```
+### Test Image Style Transfer
+Download the [decoder weight](https://drive.google.com/file/d/18JpLtMOapA-vwBz-LRomyTl24A9GwhTF/view?usp=sharing).
+To test basic style transfer, run the script test_image.py.
+```
+$ python test.py --content_image $IMG --style_image $STYLE --decoder_weight $WEIGHT --cuda
+usage: test_style_transfer.py [-h] [--content_image CONTENT_IMAGE] [--content_dir CONTENT_DIR]
+                              [--style_image STYLE_IMAGE] [--style_dir STYLE_DIR]
+                              [--decoder_weight DECODER_WEIGHT] [--alpha {Alpha Range}]
+                              [--cuda] [--grid_pth GRID_PTH]
+optional arguments:
+  -h, --help            show this help message and exit
+  --content_image CONTENT_IMAGE
+                        single content image file
+  --content_dir CONTENT_DIR
+                        content image directory, iterate all images under this directory
+  --style_image STYLE_IMAGE
+                        single style image
+  --style_dir STYLE_DIR
+                        style image directory, iterate all images under this directory
+  --decoder_weight DECODER_WEIGHT       decoder weight file
+  --alpha {Alpha Range}
+                        Alpha [0.0, 1.0] controls style transfer level
+  --cuda                Use CUDA
+  --grid_pth GRID_PTH
+                        Specify a grid image path (default=None) if generate a grid image that contains all style transferred images
+```
+### Test Image Interpolation Style Transfer
+To test style transfer interpolation, run the script test_interpolate.py.
+```
+$ python test_interpolation.py --content_image $IMG --style_image $STYLE --decoder_weight $WEIGHT --cuda
+usage: test_style_transfer.py [-h] [--content_image CONTENT_IMAGE] [--style_image STYLE_IMAGE]
+                              [--decoder_weight DECODER_WEIGHT] [--alpha {Alpha Range}]
+                              [--interpolation_weights INTERPOLATION_WEIGHTS]
+                              [--cuda] [--grid_pth GRID_PTH]
+optional arguments:
+  -h, --help            show this help message and exit
+  --content_image CONTENT_IMAGE
+                        single content image file
+  --style_image STYLE_IMAGE
+                        multiple style images file separated by comma
+  --decoder_weight DECODER_WEIGHT       decoder weight file
+  --alpha {Alpha Range}
+                        Alpha [0.0, 1.0] controls style transfer level
+  --interpolation_weights INTERPOLATION_WEIGHTS
+                        Interpolation weight of each style image, separated by comma. Perform interpolation style transfer once. Do not specify if input grid_pth.
+  --cuda                Use CUDA
+  --grid_pth GRID_PTH
+                        Specify a grid image path (default=None) to perform interpolation style transfer multiple times with different built-in weights and generate a grid image that contains all style transferred images. Provide 4 style images. Do not specify if input interpolation_weights.
+```
+### Test Video Style Transfer
+----------------------------
+To test video style transfer, run the script test_video.py.
+```
+$ python test_video.py --content_video $VID --style_image $STYLE --decoder_weight $WEIGHT --cuda
+usage: test_style_transfer.py [-h] [--content_video CONTENT_VID] [--style_image STYLE_IMAGE]
+                              [--decoder_weight DECODER_WEIGHT] [--alpha {Alpha Range}]
+                              [--cuda]
+optional arguments:
+  -h, --help            show this help message and exit
+  --content_image CONTENT_IMAGE
+                        single content video file
+  --style_image STYLE_IMAGE
+                        single style image
+  --decoder_weight DECODER_WEIGHT       decoder weight file
+  --alpha {Alpha Range}
+                        Alpha [0.0, 1.0] controls style transfer level
+  --cuda                Use CUDA
+```
+### References
+----------------------------
+-[1]: X. Huang and S. Belongie. "Arbitrary Style Transfer in Real-time with Adaptive Instance Normalization.", in ICCV, 2017. [arxiv](https://arxiv.org/abs/1703.06868)
+-[2]: [Original implementation in Torch](https://github.com/xunhuang1995/AdaIN-style)
+-[3]: [Pretrained weights](https://github.com/naoto0804/pytorch-AdaIN)

test.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import os
+import argparse
+import torch
+import time
+import numpy as np
+from pathlib import Path
+from AdaIN import AdaINNet
+from PIL import Image
+from torchvision.utils import save_image
+from torchvision.transforms import ToPILImage
+from utils import adaptive_instance_normalization, grid_image, transform, Range
+from glob import glob
+parser = argparse.ArgumentParser()
+parser.add_argument('--content_image', type=str, help='Content image file path')
+parser.add_argument('--content_dir', type=str, help='Content image folder path')
+parser.add_argument('--style_image', type=str, help='Style image file path')
+parser.add_argument('--style_dir', type=str, help='Content image folder path')
+parser.add_argument('--decoder_weight', type=str, required=True, help='Decoder weight file path')
+parser.add_argument('--alpha', type=float, default=1.0, choices=[Range(0.0, 1.0)], help='Alpha [0.0, 1.0] controls style transfer level')
+parser.add_argument('--cuda', action='store_true', help='Use CUDA')
+parser.add_argument('--grid_pth', type=str, default=None, help='Specify a grid image path (default=None) if generate a grid image that contains all style transferred images')
+args = parser.parse_args()
+assert args.content_image or args.content_dir
+assert args.style_image or args.style_dir
+assert args.decoder_weight
+device = torch.device('cuda' if args.cuda and torch.cuda.is_available() else 'cpu')
+def style_transfer(content_tensor, style_tensor, encoder, decoder, alpha=1.0):
+	"""
+	Given content image and style image, generate feature maps with encoder, apply
+	neural style transfer with adaptive instance normalization, generate output image
+	with decoder
+	Args:
+		content_tensor (torch.FloatTensor): Content image
+		style_tensor (torch.FloatTensor): Style Image
+		encoder: Encoder (vgg19) network
+		decoder: Decoder network
+		alpha (float, default=1.0): Weight of style image feature
+	Return:
+		output_tensor (torch.FloatTensor): Style Transfer output image
+	"""
+	content_enc = encoder(content_tensor)
+	style_enc = encoder(style_tensor)
+	transfer_enc = adaptive_instance_normalization(content_enc, style_enc)
+	mix_enc = alpha * transfer_enc + (1-alpha) * content_enc
+	return decoder(mix_enc)
+def main():
+	# Read content images and style images
+	if args.content_image:
+		content_pths = [Path(args.content_image)]
+	else:
+		content_pths = [Path(f) for f in glob(args.content_dir+'/*')]
+	if args.style_image:
+		style_pths = [Path(args.style_image)]
+	else:
+		style_pths = [Path(f) for f in glob(args.style_dir+'/*')]
+	out_dir = './results/'
+	os.makedirs(out_dir, exist_ok=True)
+	# Load AdaIN model
+	vgg = torch.load('vgg_normalized.pth')
+	model = AdaINNet(vgg).to(device)
+	model.decoder.load_state_dict(torch.load(args.decoder_weight))
+	model.eval()
+	# Prepare image transform
+	t = transform(512)
+	# Prepare grid image
+	if args.grid_pth:
+		imgs = [np.zeros((1,1))]
+		for style_pth in style_pths:
+			imgs.append(Image.open(style_pth))
+	# Timer
+	times = []
+	for content_pth in content_pths:
+		content_img = Image.open(content_pth)
+		content_tensor = t(content_img).unsqueeze(0).to(device)
+		if args.grid_pth:
+			imgs.append(content_img)
+		for style_pth in style_pths:
+			style_tensor = t(Image.open(style_pth)).unsqueeze(0).to(device)
+			tic = time.perf_counter() # Start time
+			with torch.no_grad():
+				out_tensor = style_transfer(content_tensor, style_tensor, model.encoder, model.decoder, args.alpha).cpu()
+			toc = time.perf_counter() # End time
+			print("Content: " + content_pth.stem + ". Style: " \
+				+ style_pth.stem + '. Style Transfer time: %.4f seconds' % (toc-tic))
+			times.append(toc-tic)
+			out_pth = out_dir + content_pth.stem + '_style_' + style_pth.stem + '_alpha' + str(args.alpha) + content_pth.suffix
+			save_image(out_tensor, out_pth)
+			if args.grid_pth:
+				imgs.append(Image.open(out_pth))
+	avg = sum(times)/len(times)
+	print("Average style transfer time: %.4f seconds" % (avg))
+	if args.grid_pth:
+		print("Generating grid image")
+		grid_image(len(content_pths) + 1, len(style_pths) + 1, imgs, save_pth=args.grid_pth)
+		print("Finished")
+if __name__ == '__main__':
+	main()

test_interpolate.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os
+import argparse
+import torch
+import time
+import numpy as np
+from pathlib import Path
+from AdaIN import AdaINNet
+from PIL import Image
+from torchvision.utils import save_image
+from utils import adaptive_instance_normalization, transform, Range, grid_image
+from glob import glob
+parser = argparse.ArgumentParser()
+parser.add_argument('--content_image', type=str, help='Test image file path')
+parser.add_argument('--style_image', type=str, required=True, help='Multiple Style image file path, separated by comma')
+parser.add_argument('--decoder_weight', type=str, required=True, help='Decoder weight file path')
+parser.add_argument('--alpha', type=float, default=1.0, choices=[Range(0.0, 1.0)], help='Alpha [0.0, 1.0] controls style transfer level')
+parser.add_argument('--interpolation_weights', type=str, help='Weights of interpolate multiple style images')
+parser.add_argument('--cuda', action='store_true', help='Use CUDA')
+parser.add_argument('--grid_pth', type=str, default=None, help='Specify a grid image path (default=None) if generate a grid image that contains all style transferred images. \
+	if use grid mode, provide 4 style images')
+args = parser.parse_args()
+assert args.content_image
+assert args.style_image
+assert args.decoder_weight
+assert args.interpolation_weights or args.grid_pth
+device = torch.device('cuda' if args.cuda and torch.cuda.is_available() else 'cpu')
+def interpolate_style_transfer(content_tensor, style_tensor, encoder, decoder, alpha=1.0, interpolation_weights=None):
+	"""
+	Given content image and multiple style images, generate feature maps with encoder, apply
+	neural style transfer with adaptive instance normalization, interpolate style image features
+	with interpolation weights, generate output image with decoder
+	Args:
+		content_tensor (torch.FloatTensor): Content image
+		style_tensor (torch.FloatTensor): Multiple Style Images
+		encoder: Encoder (vgg19) network
+		decoder: Decoder network
+		alpha (float, default=1.0): Weight of style image feature
+		interpolation_weights (list): Weight of each style image
+	Return:
+		output_tensor (torch.FloatTensor): Interpolate Style Transfer output image
+	"""
+	content_enc = encoder(content_tensor)
+	style_enc = encoder(style_tensor)
+	transfer_enc = torch.zeros_like(content_enc).to(device)
+	full_enc = adaptive_instance_normalization(content_enc, style_enc)
+	for i, w in enumerate(interpolation_weights):
+		transfer_enc += w * full_enc[i]
+	mix_enc = alpha * transfer_enc + (1-alpha) * content_enc
+	return decoder(mix_enc)
+def main():
+	# Read content and style image
+	if args.content_image:
+		content_pths = [Path(args.content_image)]
+	else:
+		content_pths = [Path(f) for f in glob(args.content_dir+'/*')]
+	style_pths_list = args.style_image.split(',')
+	style_pths = [Path(pth) for pth in style_pths_list]
+	inter_weights = []
+	# If grid mode, use 4 style images, 5x5 interpolation weights
+	if args.grid_pth:
+		assert len(style_pths) == 4
+		inter_weights = [ [ min(4-a, 4-b) / 4,  min(4-a, b) / 4, min(a, 4-b) / 4, min(a, b) / 4] \
+			for a in range(5) for b in range(5) ]
+	# Use user input interpolation weights
+	else:
+		inter_weight = [float(i) for i in args.interpolation_weights.split(',')]
+		inter_weight = [i / sum(inter_weight) for i in inter_weight]
+		inter_weights.append(inter_weight)
+	out_dir = './results_interpolate/'
+	os.makedirs(out_dir, exist_ok=True)
+	# Load AdaIN model
+	vgg = torch.load('vgg_normalized.pth')
+	model = AdaINNet(vgg).to(device)
+	model.decoder.load_state_dict(torch.load(args.decoder_weight))
+	model.eval()
+	# Prepare image transform
+	t = transform(512)
+	imgs = []
+	for content_pth in content_pths:
+		content_tensor = t(Image.open(content_pth)).unsqueeze(0).to(device)
+		style_tensor = []
+		for style_pth in style_pths:
+			img = Image.open(style_pth)
+			style_tensor.append(transform([512, 512])(img)) # Convert style images to same size
+		style_tensor = torch.stack(style_tensor, dim=0).to(device)
+		for inter_weight in inter_weights:
+			with torch.no_grad():
+				out_tensor = out_tensor = interpolate_style_transfer(content_tensor, style_tensor, model.encoder, model.decoder, args.alpha, inter_weight).cpu()
+			print("Content: " + content_pth.stem + ". Style: " + str([style_pth.stem for style_pth in style_pths]) + ". Interpolation weight: ", str(inter_weight))
+			out_pth = out_dir + content_pth.stem + '_interpolate_' + str(inter_weight) + content_pth.suffix
+			save_image(out_tensor, out_pth)
+			if args.grid_pth:
+				imgs.append(Image.open(out_pth))
+	if args.grid_pth:
+		print("Generating grid image")
+		grid_image(5, 5, imgs, save_pth=args.grid_pth)
+		print("Finished")
+if __name__ == '__main__':
+	main()

test_style_transfer.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import argparse
+import os
+import numpy as np
+from AdaIN import StyleTransferNet
+from PIL import Image
+import torch
+import torchvision.transforms as transforms
+from torchvision.utils import save_image
+class AlphaRange(object):
+	def __init__(self, start, end):
+		self.start = start
+		self.end = end
+	def __eq__(self, other):
+		return self.start <= other <= self.end
+	def __str__(self):
+		return 'Alpha Range'
+parser = argparse.ArgumentParser()
+parser.add_argument('--input_image', type=str, help='test image')
+parser.add_argument('--style_image', type=str, help='style image')
+parser.add_argument('--weight', type=str, help='decoder weight file')
+parser.add_argument('--alpha', type=float, default=1.0, choices=[AlphaRange(0.0, 1.0)], help='Level of style transfer, value between 0 and 1')
+parser.add_argument('--cuda', action='store_true', help='Using GPU to train')
+if __name__ == '__main__':
+	opt =parser.parse_args()
+	input_image = Image.open(opt.input_image)
+	style_image = Image.open(opt.style_image)
+	output_format = opt.input_image[opt.input_image.find('.'):]
+	out_dir = './results/'
+	os.makedirs(out_dir, exist_ok=True)
+	with torch.no_grad():
+		vgg_model = torch.load('vgg_normalized.pth')
+		net = StyleTransferNet(vgg_model)
+		net.decoder.load_state_dict(torch.load(opt.weight))
+		net.eval()
+		input_image = transforms.Resize(512)(input_image)
+		style_image = transforms.Resize(512)(style_image)
+		input_tensor = transforms.ToTensor()(input_image).unsqueeze(0)
+		style_tensor = transforms.ToTensor()(style_image).unsqueeze(0)
+		if torch.cuda.is_available() and opt.cuda:
+			net.cuda()
+			input_tensor = input_tensor.cuda()
+			style_tensor = style_tensor.cuda()
+		out_tensor = net([input_tensor, style_tensor], alpha = opt.alpha)
+		save_image(out_tensor, out_dir + opt.input_image[opt.input_image.rfind('/')+1: opt.input_image.find('.')]
+								+"_style_"+ opt.style_image[opt.style_image.rfind('/')+1: opt.style_image.find('.')]
+								+ output_format)

test_video.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import os
+import argparse
+import torch
+from pathlib import Path
+from AdaIN import AdaINNet
+from PIL import Image
+from utils import transform, adaptive_instance_normalization, Range
+import cv2
+import imageio
+import numpy as np
+from tqdm import tqdm
+parser = argparse.ArgumentParser()
+parser.add_argument('--content_video', type=str, required=True, help='Content video file path')
+parser.add_argument('--style_image', type=str, required=True, help='Style image file path')
+parser.add_argument('--decoder_weight', type=str, required=True, help='Decoder weight file path')
+parser.add_argument('--alpha', type=float, default=1.0, choices=[Range(0.0, 1.0)], help='Alpha [0.0, 1.0] controls style transfer level')
+parser.add_argument('--cuda', action='store_true', help='Use CUDA')
+args = parser.parse_args()
+device = torch.device('cuda' if args.cuda and torch.cuda.is_available() else 'cpu')
+def style_transfer(content_tensor, style_tensor, encoder, decoder, alpha=1.0):
+	"""
+	Given content image and style image, generate feature maps with encoder, apply
+	neural style transfer with adaptive instance normalization, generate output image
+	with decoder
+	Args:
+		content_tensor (torch.FloatTensor): Content image
+		style_tensor (torch.FloatTensor): Style Image
+		encoder: Encoder (vgg19) network
+		decoder: Decoder network
+		alpha (float, default=1.0): Weight of style image feature
+	Return:
+		output_tensor (torch.FloatTensor): Style Transfer output image
+	"""
+	content_enc = encoder(content_tensor)
+	style_enc = encoder(style_tensor)
+	transfer_enc = adaptive_instance_normalization(content_enc, style_enc)
+	mix_enc = alpha * transfer_enc + (1-alpha) * content_enc
+	return decoder(mix_enc)
+def main():
+	# Read video file
+	content_video_pth = Path(args.content_video)
+	content_video = cv2.VideoCapture(str(content_video_pth))
+	style_image_pth = Path(args.style_image)
+	style_image = Image.open(style_image_pth)
+	fps = int(content_video.get(cv2.CAP_PROP_FPS))
+	frame_count = int(content_video.get(cv2.CAP_PROP_FRAME_COUNT))
+	video_height = int(content_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+	video_width = int(content_video.get(cv2.CAP_PROP_FRAME_WIDTH))
+	video_tqdm = tqdm(frame_count)
+	out_dir = './results_video/'
+	os.makedirs(out_dir, exist_ok=True)
+	out_pth = Path(out_dir + content_video_pth.stem + '_style_' \
+		+ style_image_pth.stem + content_video_pth.suffix)
+	writer = imageio.get_writer(out_pth, mode='I', fps=fps)
+	# Load AdaIN model
+	vgg = torch.load('vgg_normalized.pth')
+	model = AdaINNet(vgg).to(device)
+	model.decoder.load_state_dict(torch.load(args.decoder_weight))
+	model.eval()
+	t = transform(512)
+	style_tensor = t(style_image).unsqueeze(0).to(device)
+	while content_video.isOpened():
+		ret, content_image = content_video.read()
+		if not ret: # Failed to read a frame
+			break
+		content_tensor = t(Image.fromarray(content_image)).unsqueeze(0).to(device)
+		with torch.no_grad():
+			out_tensor = style_transfer(content_tensor, style_tensor, model.encoder
+				, model.decoder, args.alpha).cpu().detach().numpy()
+		# Convert output frame to original size and rgb range (0,255)
+		out_tensor = np.squeeze(out_tensor, axis=0)
+		out_tensor = np.transpose(out_tensor, (1, 2, 0))
+		out_tensor = cv2.normalize(src=out_tensor, dst=None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
+		out_tensor = cv2.resize(out_tensor, (video_width, video_height), interpolation=cv2.INTER_CUBIC)
+		writer.append_data(np.array(out_tensor))
+		video_tqdm.update(1)
+	content_video.release()
+	print("\nContent: " + content_video_pth.stem + ". Style: " + style_image_pth.stem +'\n')
+if __name__ == '__main__':
+	main()

train.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import argparse
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+from utils import TrainSet
+from AdaIN import AdaINNet
+from tqdm import tqdm
+def main():
+	parser = argparse.ArgumentParser()
+	parser.add_argument('--content_dir', type=str, required=True, help='content images folder path')
+	parser.add_argument('--style_dir', type=str, required=True, help='style images folder path')
+	parser.add_argument('--epochs', type=int, default=1, help='Number of epoch')
+	parser.add_argument('--batch_size', type=int, default=8, help='Batch size')
+	parser.add_argument('--resume', type=int, default=0, help='Continue training from epoch')
+	parser.add_argument('--cuda', action='store_true', help='Use CUDA')
+	args = parser.parse_args()
+	device = torch.device('cuda' if args.cuda and torch.cuda.is_available() else 'cpu')
+	check_point_dir = './check_point/'
+	weights_dir = './weights/'
+	train_set = TrainSet(args.content_dir, args.style_dir)
+	train_loader = DataLoader(dataset=train_set, batch_size=args.batch_size, shuffle=True)
+	vgg_model = torch.load('vgg_normalized.pth')
+	model = AdaINNet(vgg_model).to(device)
+	decoder_optimizer = torch.optim.Adam(model.decoder.parameters(), lr=1e-6)
+	total_loss, content_loss, style_loss = 0.0, 0.0, 0.0
+	losses = []
+	iteration = 0
+	if args.resume > 0:
+		states = torch.load(check_point_dir + "epoch_" + str(args.resume)+'.pth')
+		model.decoder.load_state_dict(states['decoder'])
+		decoder_optimizer.load_state_dict(states['decoder_optimizer'])
+		losses = states['losses']
+		iteration = states['iteration']
+	for epoch in range(args.resume + 1, args.epochs + 1):
+		print("Begin epoch: %i/%i" % (epoch, int(args.epochs)))
+		train_tqdm = tqdm(train_loader)
+		train_tqdm.set_description('Loss: %.4f, Content loss: %.4f, Style loss: %.4f' % (total_loss, content_loss, style_loss))
+		losses.append((iteration, total_loss, content_loss, style_loss))
+		total_loss, content_loss, style_loss = 0.0, 0.0, 0.0
+		for content_batch, style_batch in train_tqdm:
+			content_batch = content_batch.to(device)
+			style_batch = style_batch.to(device)
+			loss_content, loss_style = model(content_batch, style_batch)
+			loss_scaled = loss_content + 10 * loss_style
+			loss_scaled.backward()
+			decoder_optimizer.step()
+			total_loss += loss_scaled.item() * style_batch.size(0)
+			decoder_optimizer.zero_grad()
+			total_num += style_batch.size(0)
+			if iteration % 100 == 0 and iteration > 0:
+				total_loss /= total_num
+				content_loss /= total_num
+				style_loss /= total_num
+				print('')
+				train_tqdm.set_description('Loss: %.4f, Content loss: %.4f, Style loss: %.4f' % (total_loss, content_loss, style_loss))
+				losses.append((iteration, total_loss, content_loss, style_loss))
+				total_loss, content_loss, style_loss = 0.0, 0.0, 0.0
+				total_num = 0
+			if iteration % np.ceil(len(train_loader.dataset)/args.batch_size) == 0 and iteration > 0:
+				total_loss /= total_num
+				content_loss /= total_num
+				style_loss /= total_num
+				total_num = 0
+			iteration += 1
+		print('Finished epoch: %i/%i' % (epoch, int(args.epochs)))
+		states = {'decoder': model.decoder.state_dict(), 'decoder_optimizer': decoder_optimizer.state_dict(),
+			'losses': losses, 'iteration': iteration}
+		torch.save(states, check_point_dir + 'epoch_%i.pth' % (epoch))
+		torch.save(model.decoder.state_dict(), weights_dir + 'decoder_epoch_%i.pth' % (epoch))
+		np.savetxt("losses", losses, fmt='%i,%.4f,%.4f,%.4f')
+if __name__ == '__main__':
+	main()

utils.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+from PIL import Image, ImageFile
+import torch
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+import matplotlib.pyplot as plt
+from pathlib import Path
+from glob import glob
+def adaptive_instance_normalization(x, y, eps=1e-5):
+	"""
+	Adaptive Instance Normalization. Perform neural style transfer given content image x
+	and style image y.
+	Args:
+		x (torch.FloatTensor): Content image tensor
+		y (torch.FloatTensor): Style image tensor
+		eps (float, default=1e-5): Small value to avoid zero division
+	Return:
+		output (torch.FloatTensor): AdaIN style transferred output
+	"""
+	mu_x = torch.mean(x, dim=[2, 3])
+	mu_y = torch.mean(y, dim=[2, 3])
+	mu_x = mu_x.unsqueeze(-1).unsqueeze(-1)
+	mu_y = mu_y.unsqueeze(-1).unsqueeze(-1)
+	sigma_x = torch.std(x, dim=[2, 3])
+	sigma_y = torch.std(y, dim=[2, 3])
+	sigma_x = sigma_x.unsqueeze(-1).unsqueeze(-1) + eps
+	sigma_y = sigma_y.unsqueeze(-1).unsqueeze(-1) + eps
+	return (x - mu_x) / sigma_x * sigma_y  + mu_y
+def transform(size):
+	"""
+	Image preprocess transformation. Resize image and convert to tensor.
+	Args:
+		size (int): Resize image size
+	Return:
+		output (torchvision.transforms): Composition of torchvision.transforms steps
+	"""
+	t = []
+	t.append(transforms.Resize(size))
+	t.append(transforms.ToTensor())
+	t = transforms.Compose(t)
+	return t
+def grid_image(row, col, images, height=6, width=6, save_pth='grid.png'):
+	"""
+	Generate and save an image that contains row x col grids of images.
+	Args:
+		row (int): number of rows
+		col (int): number of columns
+		images (list of PIL image): list of images.
+		height (int) : height of each image (inch)
+		width (int) : width of eac image (inch)
+		save_pth (str): save file path
+	"""
+	width = col * width
+	height = row * height
+	plt.figure(figsize=(width, height))
+	for i, image in enumerate(images):
+		plt.subplot(row, col, i+1)
+		plt.imshow(image)
+		plt.axis('off')
+		plt.subplots_adjust(wspace=0.01, hspace=0.01)
+	plt.savefig(save_pth)
+class TrainSet(Dataset):
+	"""
+	Build Training dataset
+	"""
+	def __init__(self, content_dir, style_dir, crop_size = 256):
+		super().__init__()
+		self.content_files = [Path(f) for f in glob(content_dir+'/*')]
+		self.style_files = [Path(f) for f in glob(style_dir+'/*')]
+		self.transform = transforms.Compose([
+			transforms.Resize(512, interpolation=transforms.InterpolationMode.BICUBIC),
+			transforms.RandomCrop(crop_size),
+			transforms.ToTensor(),
+			transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
+			])
+		Image.MAX_IMAGE_PIXELS = None
+		ImageFile.LOAD_TRUNCATED_IMAGES = True
+	def __len__(self):
+		return min(len(self.style_files), len(self.content_files))
+	def __getitem__(self, index):
+		content_img = Image.open(self.content_files[index]).convert('RGB')
+		style_img = Image.open(self.style_files[index]).convert('RGB')
+		content_sample = self.transform(content_img)
+		style_sample = self.transform(style_img)
+		return content_sample, style_sample
+class Range(object):
+	"""
+	Helper class for input argument range restriction
+	"""
+	def __init__(self, start, end):
+		self.start = start
+		self.end = end
+	def __eq__(self, other):
+		return self.start <= other <= self.end