Spaces:

hujiecpp
/

PE3R

Running on Zero

App Files Files Community

PE3R / modules /dust3r /utils /image.py

hujiecpp

init project

1b65314 5 months ago

raw

history blame

5.98 kB

	# Copyright (C) 2024-present Naver Corporation. All rights reserved.
	# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
	#
	# --------------------------------------------------------
	# utilitary functions about images (loading/converting...)
	# --------------------------------------------------------
	import os
	import torch
	import numpy as np
	import PIL.Image
	from PIL.ImageOps import exif_transpose
	import torchvision.transforms as tvf
	os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
	import cv2 # noqa

	try:
	from pillow_heif import register_heif_opener # noqa
	register_heif_opener()
	heif_support_enabled = True
	except ImportError:
	heif_support_enabled = False

	ImgNorm = tvf.Compose([tvf.ToTensor(), tvf.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


	def img_to_arr( img ):
	if isinstance(img, str):
	img = imread_cv2(img)
	return img

	def imread_cv2(path, options=cv2.IMREAD_COLOR):
	""" Open an image or a depthmap with opencv-python.
	"""
	if path.endswith(('.exr', 'EXR')):
	options = cv2.IMREAD_ANYDEPTH
	img = cv2.imread(path, options)
	if img is None:
	raise IOError(f'Could not load image={path} with {options=}')
	if img.ndim == 3:
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	return img


	def rgb(ftensor, true_shape=None):
	if isinstance(ftensor, list):
	return [rgb(x, true_shape=true_shape) for x in ftensor]
	if isinstance(ftensor, torch.Tensor):
	ftensor = ftensor.detach().cpu().numpy() # H,W,3
	if ftensor.ndim == 3 and ftensor.shape[0] == 3:
	ftensor = ftensor.transpose(1, 2, 0)
	elif ftensor.ndim == 4 and ftensor.shape[1] == 3:
	ftensor = ftensor.transpose(0, 2, 3, 1)
	if true_shape is not None:
	H, W = true_shape
	ftensor = ftensor[:H, :W]
	if ftensor.dtype == np.uint8:
	img = np.float32(ftensor) / 255
	else:
	img = (ftensor * 0.5) + 0.5
	return img.clip(min=0, max=1)


	def _resize_pil_image(img, long_edge_size):
	S = max(img.size)
	if S > long_edge_size:
	interp = PIL.Image.LANCZOS
	elif S <= long_edge_size:
	interp = PIL.Image.BICUBIC
	new_size = tuple(int(round(x*long_edge_size/S)) for x in img.size)
	return img.resize(new_size, interp)


	def load_images(images, cog_seg_maps, size, square_ok=False, verbose=True):
	""" open and convert all images in a list or folder to proper input format for DUSt3R
	"""
	# if isinstance(folder_or_list, str):
	# if verbose:
	# print(f'>> Loading images from {folder_or_list}')
	# root, folder_content = folder_or_list, sorted(os.listdir(folder_or_list))

	# elif isinstance(folder_or_list, list):
	# if verbose:
	# print(f'>> Loading a list of {len(folder_or_list)} images')
	# root, folder_content = '', folder_or_list

	# else:
	# raise ValueError(f'bad {folder_or_list=} ({type(folder_or_list)})')

	# supported_images_extensions = ['.jpg', '.jpeg', '.png']
	# if heif_support_enabled:
	# supported_images_extensions += ['.heic', '.heif']
	# supported_images_extensions = tuple(supported_images_extensions)
	pil_images = images.pil_images

	mean_colors = {}
	mean_colors_cnt = {}
	for i, img in enumerate(pil_images):

	img_np = np.array(img)
	seg_map = cog_seg_maps[i]
	unique_labels = np.unique(seg_map)
	for label in unique_labels:
	if label == -1:
	continue
	mask = (seg_map == label)
	mean_color = img_np[mask].mean(axis=0)
	if label in mean_colors.keys():
	mean_colors[label] += mean_color
	mean_colors_cnt[label] += 1
	else:
	mean_colors[label] = mean_color
	mean_colors_cnt[label] = 1

	for key in mean_colors.keys():
	mean_colors[key] /= mean_colors_cnt[key]

	imgs = []
	for i, img in enumerate(pil_images):
	img = pil_images[i]

	img_np = np.array(img)
	smoothed_image = np.zeros_like(img_np)
	seg_map = cog_seg_maps[i]
	unique_labels = np.unique(seg_map)
	for label in unique_labels:
	mask = (seg_map == label)
	if label == -1:
	smoothed_image[mask] = img_np[mask]
	continue
	smoothed_image[mask] = mean_colors[label]
	smoothed_image = cv2.addWeighted(img_np, 0.05, smoothed_image, 0.95, 0)
	smoothed_image = PIL.Image.fromarray(smoothed_image)

	W1, H1 = img.size
	if size == 224:
	# resize short side to 224 (then crop)
	img = _resize_pil_image(img, round(size * max(W1/H1, H1/W1)))
	smoothed_image = _resize_pil_image(smoothed_image, round(size * max(W1/H1, H1/W1)))
	else:
	# resize long side to 512
	img = _resize_pil_image(img, size)
	smoothed_image = _resize_pil_image(smoothed_image, size)

	W, H = img.size
	cx, cy = W//2, H//2
	if size == 224:
	half = min(cx, cy)
	img = img.crop((cx-half, cy-half, cx+half, cy+half))
	smoothed_image = smoothed_image.crop((cx-half, cy-half, cx+half, cy+half))
	else:
	halfw, halfh = ((2cx)//16)8, ((2cy)//16)8
	if not (square_ok) and W == H:
	halfh = 3*halfw/4
	img = img.crop((cx-halfw, cy-halfh, cx+halfw, cy+halfh))
	smoothed_image = smoothed_image.crop((cx-halfw, cy-halfh, cx+halfw, cy+halfh))

	# W2, H2 = img.size
	# if verbose:
	# print(f' - adding image {i} with resolution {W1}x{H1} --> {W2}x{H2}')

	imgs.append(dict(img=ImgNorm(img)[None], ori_img=ImgNorm(img)[None], smoothed_img=ImgNorm(smoothed_image)[None], true_shape=np.int32(
	[img.size[::-1]]), idx=len(imgs), instance=str(len(imgs))))

	if verbose:
	print(f' (Found {len(imgs)} images)')
	return imgs