Spaces:
Running
Running
import torch | |
import torchvision.transforms as transforms | |
import cv2 | |
import numpy as np | |
from .model import BiSeNet | |
mask_regions = { | |
"Background":0, | |
"Skin":1, | |
"L-Eyebrow":2, | |
"R-Eyebrow":3, | |
"L-Eye":4, | |
"R-Eye":5, | |
"Eye-G":6, | |
"L-Ear":7, | |
"R-Ear":8, | |
"Ear-R":9, | |
"Nose":10, | |
"Mouth":11, | |
"U-Lip":12, | |
"L-Lip":13, | |
"Neck":14, | |
"Neck-L":15, | |
"Cloth":16, | |
"Hair":17, | |
"Hat":18 | |
} | |
run_with_cuda = False | |
def init_parser(pth_path, use_cuda=False): | |
global run_with_cuda | |
run_with_cuda = use_cuda | |
n_classes = 19 | |
net = BiSeNet(n_classes=n_classes) | |
if run_with_cuda: | |
net.cuda() | |
net.load_state_dict(torch.load(pth_path)) | |
else: | |
net.load_state_dict(torch.load(pth_path, map_location=torch.device('cpu'))) | |
net.eval() | |
return net | |
def image_to_parsing(img, net): | |
img = cv2.resize(img, (512, 512)) | |
img = img[:,:,::-1] | |
transform = transforms.Compose([ | |
transforms.ToTensor(), | |
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) | |
]) | |
img = transform(img.copy()) | |
img = torch.unsqueeze(img, 0) | |
with torch.no_grad(): | |
if run_with_cuda: | |
img = img.cuda() | |
out = net(img)[0] | |
parsing = out.squeeze(0).cpu().numpy().argmax(0) | |
return parsing | |
def get_mask(parsing, classes): | |
res = parsing == classes[0] | |
for val in classes[1:]: | |
res += parsing == val | |
return res | |
def swap_regions(source, target, net, includes=[1,2,3,4,5,10,11,12,13], excludes=[7,8], blur_size=25): | |
parsing = image_to_parsing(source, net) | |
if len(includes) == 0: | |
return source, np.zeros_like(source) | |
include_mask = get_mask(parsing, includes) | |
include_mask = np.repeat(np.expand_dims(include_mask.astype('float32'), axis=2), 3, 2) | |
if len(excludes) > 0: | |
exclude_mask = get_mask(parsing, excludes) | |
exclude_mask = np.repeat(np.expand_dims(exclude_mask.astype('float32'), axis=2), 3, 2) | |
include_mask -= exclude_mask | |
mask = 1 - cv2.GaussianBlur(include_mask.clip(0,1), (0, 0), blur_size) | |
result = (1 - mask) * cv2.resize(source, (512, 512)) + mask * cv2.resize(target, (512, 512)) | |
result = cv2.resize(result.astype("float32"), (source.shape[1], source.shape[0])) | |
return result, mask.astype('float32') | |
def mask_regions_to_list(values): | |
out_ids = [] | |
for value in values: | |
if value in mask_regions.keys(): | |
out_ids.append(mask_regions.get(value)) | |
return out_ids | |