TheNetherWatcher's picture
Upload folder using huggingface_hub
d0ffe9c verified
raw
history blame
5.63 kB
import glob
import logging
import os
import shutil
from pathlib import Path
import cv2
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
from tqdm.rich import tqdm
logger = logging.getLogger(__name__)
#https://github.com/jinwonkim93/laplacian-pyramid-blend
#https://blog.shikoan.com/pytorch-laplacian-pyramid/
class LaplacianPyramidBlender:
device = None
def get_gaussian_kernel(self):
kernel = np.array([
[1, 4, 6, 4, 1],
[4, 16, 24, 16, 4],
[6, 24, 36, 24, 6],
[4, 16, 24, 16, 4],
[1, 4, 6, 4, 1]], np.float32) / 256.0
gaussian_k = torch.as_tensor(kernel.reshape(1, 1, 5, 5),device=self.device)
return gaussian_k
def pyramid_down(self, image):
with torch.no_grad():
gaussian_k = self.get_gaussian_kernel()
multiband = [F.conv2d(image[:, i:i + 1,:,:], gaussian_k, padding=2, stride=2) for i in range(3)]
down_image = torch.cat(multiband, dim=1)
return down_image
def pyramid_up(self, image, size = None):
with torch.no_grad():
gaussian_k = self.get_gaussian_kernel()
if size is None:
upsample = F.interpolate(image, scale_factor=2)
else:
upsample = F.interpolate(image, size=size)
multiband = [F.conv2d(upsample[:, i:i + 1,:,:], gaussian_k, padding=2) for i in range(3)]
up_image = torch.cat(multiband, dim=1)
return up_image
def gaussian_pyramid(self, original, n_pyramids):
x = original
# pyramid down
pyramids = [original]
for i in range(n_pyramids):
x = self.pyramid_down(x)
pyramids.append(x)
return pyramids
def laplacian_pyramid(self, original, n_pyramids):
pyramids = self.gaussian_pyramid(original, n_pyramids)
# pyramid up - diff
laplacian = []
for i in range(len(pyramids) - 1):
diff = pyramids[i] - self.pyramid_up(pyramids[i + 1], pyramids[i].shape[2:])
laplacian.append(diff)
laplacian.append(pyramids[-1])
return laplacian
def laplacian_pyramid_blending_with_mask(self, src, target, mask, num_levels = 9):
# assume mask is float32 [0,1]
# generate Gaussian pyramid for src,target and mask
Gsrc = torch.as_tensor(np.expand_dims(src, axis=0), device=self.device)
Gtarget = torch.as_tensor(np.expand_dims(target, axis=0), device=self.device)
Gmask = torch.as_tensor(np.expand_dims(mask, axis=0), device=self.device)
lpA = self.laplacian_pyramid(Gsrc,num_levels)[::-1]
lpB = self.laplacian_pyramid(Gtarget,num_levels)[::-1]
gpMr = self.gaussian_pyramid(Gmask,num_levels)[::-1]
# Now blend images according to mask in each level
LS = []
for idx, (la,lb,Gmask) in enumerate(zip(lpA,lpB,gpMr)):
lo = lb * (1.0 - Gmask)
if idx <= 2:
lo += lb * Gmask
else:
lo += la * Gmask
LS.append(lo)
# now reconstruct
ls_ = LS.pop(0)
for lap in LS:
ls_ = self.pyramid_up(ls_, lap.shape[2:]) + lap
result = ls_.squeeze(dim=0).to('cpu').detach().numpy().copy()
return result
def __call__(self,
src_image: np.ndarray,
target_image: np.ndarray,
mask_image: np.ndarray,
device
):
self.device = device
num_levels = int(np.log2(src_image.shape[0]))
#normalize image to 0, 1
mask_image = np.clip(mask_image, 0, 1).transpose([2, 0, 1])
src_image = src_image.transpose([2, 0, 1]).astype(np.float32) / 255.0
target_image = target_image.transpose([2, 0, 1]).astype(np.float32) / 255.0
composite_image = self.laplacian_pyramid_blending_with_mask(src_image, target_image, mask_image, num_levels)
composite_image = np.clip(composite_image*255, 0 , 255).astype(np.uint8)
composite_image=composite_image.transpose([1, 2, 0])
return composite_image
def composite(bg_dir, fg_list, output_dir, masked_area_list, device="cuda"):
bg_list = sorted(glob.glob( os.path.join(bg_dir ,"[0-9]*.png"), recursive=False))
blender = LaplacianPyramidBlender()
for bg, fg_array, mask in tqdm(zip(bg_list, fg_list, masked_area_list),total=len(bg_list), desc="compositing"):
name = Path(bg).name
save_path = output_dir / name
if fg_array is None:
logger.info(f"composite fg_array is None -> skip")
shutil.copy(bg, save_path)
continue
if mask is None:
logger.info(f"mask is None -> skip")
shutil.copy(bg, save_path)
continue
bg = np.asarray(Image.open(bg)).copy()
fg = fg_array
mask = np.concatenate([mask, mask, mask], 2)
h, w, _ = bg.shape
fg = cv2.resize(fg, dsize=(w,h))
mask = cv2.resize(mask, dsize=(w,h))
mask = mask.astype(np.float32)
# mask = mask * 255
mask = cv2.GaussianBlur(mask, (15, 15), 0)
mask = mask / 255
fg = fg * mask + bg * (1-mask)
img = blender(fg, bg, mask,device)
img = Image.fromarray(img)
img.save(save_path)
def simple_composite(bg_dir, fg_list, output_dir, masked_area_list, device="cuda"):
bg_list = sorted(glob.glob( os.path.join(bg_dir ,"[0-9]*.png"), recursive=False))
for bg, fg_array, mask in tqdm(zip(bg_list, fg_list, masked_area_list),total=len(bg_list), desc="compositing"):
name = Path(bg).name
save_path = output_dir / name
if fg_array is None:
logger.info(f"composite fg_array is None -> skip")
shutil.copy(bg, save_path)
continue
if mask is None:
logger.info(f"mask is None -> skip")
shutil.copy(bg, save_path)
continue
bg = np.asarray(Image.open(bg)).copy()
fg = fg_array
mask = np.concatenate([mask, mask, mask], 2)
h, w, _ = bg.shape
fg = cv2.resize(fg, dsize=(w,h))
mask = cv2.resize(mask, dsize=(w,h))
mask = mask.astype(np.float32)
mask = cv2.GaussianBlur(mask, (15, 15), 0)
mask = mask / 255
img = fg * mask + bg * (1-mask)
img = img.clip(0 , 255).astype(np.uint8)
img = Image.fromarray(img)
img.save(save_path)