Spaces:
Running
on
Zero
Running
on
Zero
import os | |
os.environ['OPENCV_IO_ENABLE_OPENEXR'] = '1' | |
from typing import IO | |
import zipfile | |
import json | |
import io | |
from typing import * | |
from pathlib import Path | |
import re | |
import numpy as np | |
import cv2 | |
from .tools import timeit | |
LEGACY_SEGFORMER_CLASSES = [ | |
'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ', | |
'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth', | |
'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car', | |
'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug', | |
'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe', | |
'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', | |
'signboard', 'chest of drawers', 'counter', 'sand', 'sink', | |
'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path', | |
'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door', | |
'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table', | |
'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove', | |
'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar', | |
'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', | |
'chandelier', 'awning', 'streetlight', 'booth', 'television receiver', | |
'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister', | |
'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van', | |
'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything', | |
'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent', | |
'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank', | |
'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake', | |
'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce', | |
'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen', | |
'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', | |
'clock', 'flag' | |
] | |
LEGACY_SEGFORMER_LABELS = {k: i for i, k in enumerate(LEGACY_SEGFORMER_CLASSES)} | |
def write_rgbd_zip( | |
file: Union[IO, os.PathLike], | |
image: Union[np.ndarray, bytes], | |
depth: Union[np.ndarray, bytes], mask: Union[np.ndarray, bytes], | |
segmentation_mask: Union[np.ndarray, bytes] = None, segmentation_labels: Union[Dict[str, int], bytes] = None, | |
intrinsics: np.ndarray = None, | |
normal: np.ndarray = None, normal_mask: np.ndarray = None, | |
meta: Union[Dict[str, Any], bytes] = None, | |
*, image_quality: int = 95, depth_type: Literal['linear', 'log', 'disparity'] = 'linear', depth_format: Literal['png', 'exr'] = 'png', depth_max_dynamic_range: float = 1e4, png_compression: int = 7 | |
): | |
""" | |
Write RGBD data as zip archive containing the image, depth, mask, segmentation_mask, and meta data. | |
In the zip file there will be: | |
- `meta.json`: The meta data as a JSON file. | |
- `image.jpg`: The RGB image as a JPEG file. | |
- `depth.png/exr`: The depth map as a PNG or EXR file, depending on the `depth_type`. | |
- `mask.png` (optional): The mask as a uint8 PNG file. | |
- `segmentation_mask.png` (optional): The segformer mask as a uint8/uint16 PNG file. | |
You can provided those data as np.ndarray or bytes. If you provide them as np.ndarray, they will be properly processed and encoded. | |
If you provide them as bytes, they will be written as is, assuming they are already encoded. | |
""" | |
if meta is None: | |
meta = {} | |
elif isinstance(meta, bytes): | |
meta = json.loads(meta.decode()) | |
if isinstance(image, bytes): | |
image_bytes = image | |
elif isinstance(image, np.ndarray): | |
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
image_bytes = cv2.imencode('.jpg', image, [cv2.IMWRITE_JPEG_QUALITY, image_quality])[1].tobytes() | |
if isinstance(depth, bytes): | |
depth_bytes = depth | |
elif isinstance(depth, np.ndarray): | |
meta['depth_type'] = depth_type | |
if depth_type == 'linear': | |
if depth.dtype == np.float16: | |
depth_format = 'exr' | |
depth_bytes = cv2.imencode('.exr', depth.astype(np.float32), [cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_HALF])[1].tobytes() | |
elif np.issubdtype(depth.dtype, np.floating): | |
depth_format = 'exr' | |
depth_bytes = cv2.imencode('.exr', depth.astype(np.float32), [cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_FLOAT])[1].tobytes() | |
elif depth.dtype in [np.uint8, np.uint16]: | |
depth_format = 'png' | |
depth_bytes = cv2.imencode('.png', depth, [cv2.IMWRITE_PNG_COMPRESSION, png_compression])[1].tobytes() | |
elif depth_type == 'log': | |
depth_format = 'png' | |
depth = depth.astype(np.float32) | |
near = max(depth[mask].min(), 1e-3) | |
far = min(depth[mask].max(), near * depth_max_dynamic_range) | |
depth = ((np.log(depth.clip(near, far) / near) / np.log(far / near)).clip(0, 1) * 65535).astype(np.uint16) | |
depth_bytes = cv2.imencode('.png', depth, [cv2.IMWRITE_PNG_COMPRESSION, png_compression])[1].tobytes() | |
meta['depth_near'] = float(near) | |
meta['depth_far'] = float(far) | |
elif depth_type == 'disparity': | |
depth_format = 'png' | |
depth = depth.astype(np.float32) | |
depth = 1 / (depth + 1e-12) | |
depth = (depth / depth[mask].max()).clip(0, 1) | |
if np.unique(depth) < 200: | |
depth = (depth * 255).astype(np.uint8) | |
else: | |
depth = (depth * 65535).astype(np.uint16) | |
depth_bytes = cv2.imencode('.png', depth, [cv2.IMWRITE_PNG_COMPRESSION, png_compression])[1].tobytes() | |
if isinstance(mask, bytes): | |
mask_bytes = mask | |
elif isinstance(mask, np.ndarray): | |
mask_bytes = cv2.imencode('.png', mask.astype(np.uint8) * 255)[1].tobytes() | |
if segmentation_mask is not None: | |
if isinstance(segmentation_mask, bytes): | |
segmentation_mask_bytes = segmentation_mask | |
else: | |
segmentation_mask_bytes = cv2.imencode('.png', segmentation_mask)[1].tobytes() | |
assert segmentation_labels is not None, "You provided a segmentation mask, but not the corresponding labels." | |
if isinstance(segmentation_labels, bytes): | |
segmentation_labels = json.loads(segmentation_labels) | |
meta['segmentation_labels'] = segmentation_labels | |
if intrinsics is not None: | |
meta['intrinsics'] = intrinsics.tolist() | |
if normal is not None: | |
if isinstance(normal, bytes): | |
normal_bytes = normal | |
elif isinstance(normal, np.ndarray): | |
normal = ((normal * [0.5, -0.5, -0.5] + 0.5).clip(0, 1) * 65535).astype(np.uint16) | |
normal = cv2.cvtColor(normal, cv2.COLOR_RGB2BGR) | |
normal_bytes = cv2.imencode('.png', normal, [cv2.IMWRITE_PNG_COMPRESSION, png_compression])[1].tobytes() | |
if normal_mask is None: | |
normal_mask = np.ones(image.shape[:2], dtype=bool) | |
normal_mask_bytes = cv2.imencode('.png', normal_mask.astype(np.uint8) * 255)[1].tobytes() | |
meta_bytes = meta if isinstance(meta, bytes) else json.dumps(meta).encode() | |
with zipfile.ZipFile(file, 'w') as z: | |
z.writestr('meta.json', meta_bytes) | |
z.writestr('image.jpg', image_bytes) | |
z.writestr(f'depth.{depth_format}', depth_bytes) | |
z.writestr('mask.png', mask_bytes) | |
if segmentation_mask is not None: | |
z.writestr('segmentation_mask.png', segmentation_mask_bytes) | |
if normal is not None: | |
z.writestr('normal.png', normal_bytes) | |
z.writestr('normal_mask.png', normal_mask_bytes) | |
def read_rgbd_zip(file: Union[str, Path, IO], return_bytes: bool = False) -> Dict[str, Union[np.ndarray, Dict[str, Any], bytes]]: | |
""" | |
Read an RGBD zip file and return the image, depth, mask, segmentation_mask, intrinsics, and meta data. | |
### Parameters: | |
- `file: Union[str, Path, IO]` | |
The file path or file object to read from. | |
- `return_bytes: bool = False` | |
If True, return the image, depth, mask, and segmentation_mask as raw bytes. | |
### Returns: | |
- `Tuple[Dict[str, Union[np.ndarray, Dict[str, Any]]], Dict[str, bytes]]` | |
A dictionary containing: (If missing, the value will be None; if return_bytes is True, the value will be bytes) | |
- `image`: RGB numpy.ndarray of shape (H, W, 3). | |
- `depth`: float32 numpy.ndarray of shape (H, W). | |
- `mask`: bool numpy.ndarray of shape (H, W). | |
- `segformer_mask`: uint8 numpy.ndarray of shape (H, W). | |
- `intrinsics`: float32 numpy.ndarray of shape (3, 3). | |
- `meta`: Dict[str, Any]. | |
""" | |
# Load & extract archive | |
with zipfile.ZipFile(file, 'r') as z: | |
meta = z.read('meta.json') | |
if not return_bytes: | |
meta = json.loads(z.read('meta.json')) | |
image = z.read('image.jpg') | |
if not return_bytes: | |
image = cv2.imdecode(np.frombuffer(z.read('image.jpg'), np.uint8), cv2.IMREAD_COLOR) | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
depth_name = next(s for s in z.namelist() if s.startswith('depth')) | |
depth = z.read(depth_name) | |
if not return_bytes: | |
depth = cv2.imdecode(np.frombuffer(z.read(depth_name), np.uint8), cv2.IMREAD_UNCHANGED) | |
if 'mask.png' in z.namelist(): | |
mask = z.read('mask.png') | |
if not return_bytes: | |
mask = cv2.imdecode(np.frombuffer(z.read('mask.png'), np.uint8), cv2.IMREAD_UNCHANGED) > 0 | |
else: | |
mask = None | |
if 'segformer_mask.png' in z.namelist(): | |
# NOTE: Legacy support for segformer_mask.png | |
segmentation_mask = z.read('segformer_mask.png') | |
segmentation_labels = None | |
if not return_bytes: | |
segmentation_mask = cv2.imdecode(np.frombuffer(segmentation_mask, np.uint8), cv2.IMREAD_UNCHANGED) | |
segmentation_labels = LEGACY_SEGFORMER_LABELS | |
elif 'segmentation_mask.png' in z.namelist(): | |
segmentation_mask = z.read('segmentation_mask.png') | |
segmentation_labels = None | |
if not return_bytes: | |
segmentation_mask = cv2.imdecode(np.frombuffer(segmentation_mask, np.uint8), cv2.IMREAD_UNCHANGED) | |
segmentation_labels = meta['segmentation_labels'] | |
else: | |
segmentation_mask = None | |
segmentation_labels = None | |
if 'normal.png' in z.namelist(): | |
normal = z.read('normal.png') | |
if not return_bytes: | |
normal = cv2.imdecode(np.frombuffer(z.read('normal.png'), np.uint8), cv2.IMREAD_UNCHANGED) | |
normal = cv2.cvtColor(normal, cv2.COLOR_BGR2RGB) | |
normal = (normal.astype(np.float32) / 65535 - 0.5) * [2.0, -2.0, -2.0] | |
normal = normal / np.linalg.norm(normal, axis=-1, keepdims=True) | |
if 'normal_mask.png' in z.namelist(): | |
normal_mask = z.read('normal_mask.png') | |
normal_mask = cv2.imdecode(np.frombuffer(normal_mask, np.uint8), cv2.IMREAD_UNCHANGED) > 0 | |
else: | |
normal_mask = np.ones(image.shape[:2], dtype=bool) | |
else: | |
normal, normal_mask = None, None | |
# recover linear depth | |
if not return_bytes: | |
if mask is None: | |
mask = np.ones(image.shape[:2], dtype=bool) | |
if meta['depth_type'] == 'linear': | |
depth = depth.astype(np.float32) | |
mask = mask & (depth > 0) | |
elif meta['depth_type'] == 'log': | |
near, far = meta['depth_near'], meta['depth_far'] | |
if depth.dtype == np.uint16: | |
depth = depth.astype(np.float32) / 65535 | |
elif depth.dtype == np.uint8: | |
depth = depth.astype(np.float32) / 255 | |
depth = near ** (1 - depth) * far ** depth | |
mask = mask & ~np.isnan(depth) | |
elif meta['depth_type'] == 'disparity': | |
mask = mask & (depth > 0) | |
if depth.dtype == np.uint16: | |
depth = depth.astype(np.float32) / 65535 | |
elif depth.dtype == np.uint8: | |
depth = depth.astype(np.float32) / 255 | |
depth = 1 / (depth + 1e-12) | |
# intrinsics | |
if not return_bytes and 'intrinsics' in meta: | |
intrinsics = np.array(meta['intrinsics'], dtype=np.float32) | |
else: | |
intrinsics = None | |
# depth unit | |
if not return_bytes and 'depth_unit' in meta: | |
depth_unit_str = meta['depth_unit'] | |
if r := re.match(r'([\d.]*)(\w*)', depth_unit_str): | |
digits, unit = r.groups() | |
depth_unit = float(digits or 1) * {'m': 1, 'cm': 0.01, 'mm': 0.001}[unit] | |
else: | |
depth_unit = None | |
else: | |
depth_unit = None | |
return_dict = { | |
'image': image, | |
'depth': depth, | |
'mask': mask, | |
'segmentation_mask': segmentation_mask, | |
'segmentation_labels': segmentation_labels, | |
'normal': normal, | |
'normal_mask': normal_mask, | |
'intrinsics': intrinsics, | |
'depth_unit': depth_unit, | |
'meta': meta, | |
} | |
return_dict = {k: v for k, v in return_dict.items() if v is not None} | |
return return_dict | |
def write_rgbxyz(file: Union[IO, Path], image: np.ndarray, points: np.ndarray, mask: np.ndarray = None, image_quality: int = 95): | |
if isinstance(image, bytes): | |
image_bytes = image | |
elif isinstance(image, np.ndarray): | |
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
image_bytes = cv2.imencode('.jpg', image, [cv2.IMWRITE_JPEG_QUALITY, image_quality])[1].tobytes() | |
if isinstance(points, bytes): | |
points_bytes = points | |
elif isinstance(points, np.ndarray): | |
points_bytes = cv2.imencode('.exr', points.astype(np.float32), [cv2.IMWRITE_EXR_TYPE, cv2.IMWRITE_EXR_TYPE_FLOAT])[1].tobytes() | |
if mask is None: | |
mask = np.ones(image.shape[:2], dtype=bool) | |
if isinstance(mask, bytes): | |
mask_bytes = mask | |
elif isinstance(mask, np.ndarray): | |
mask_bytes = cv2.imencode('.png', mask.astype(np.uint8) * 255)[1].tobytes() | |
is_archive = hasattr(file, 'write') or Path(file).suffix == '.zip' | |
if is_archive: | |
with zipfile.ZipFile(file, 'w') as z: | |
z.writestr('image.jpg', image_bytes) | |
z.writestr('points.exr', points_bytes) | |
if mask is not None: | |
z.writestr('mask.png', mask_bytes) | |
else: | |
file = Path(file) | |
file.mkdir(parents=True, exist_ok=True) | |
with open(file / 'image.jpg', 'wb') as f: | |
f.write(image_bytes) | |
with open(file / 'points.exr', 'wb') as f: | |
f.write(points_bytes) | |
if mask is not None: | |
with open(file / 'mask.png', 'wb') as f: | |
f.write(mask_bytes) | |
def read_rgbxyz(file: Union[IO, str, Path]) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]: | |
is_archive = hasattr(file, 'read') or Path(file).suffix == '.zip' | |
if is_archive: | |
with zipfile.ZipFile(file, 'r') as z: | |
image = cv2.imdecode(np.frombuffer(z.read('image.jpg'), np.uint8), cv2.IMREAD_COLOR) | |
points = cv2.imdecode(np.frombuffer(z.read('points.exr'), np.uint8), cv2.IMREAD_UNCHANGED) | |
if 'mask.png' in z.namelist(): | |
mask = cv2.imdecode(np.frombuffer(z.read('mask.png'), np.uint8), cv2.IMREAD_UNCHANGED) > 0 | |
else: | |
mask = np.ones(image.shape[:2], dtype=bool) | |
else: | |
file = Path(file) | |
file.mkdir(parents=True, exist_ok=True) | |
image = cv2.imread(str(file / 'image.jpg'), cv2.IMREAD_COLOR) | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
points = cv2.imread(str(file / 'points.exr'), cv2.IMREAD_UNCHANGED) | |
if (file /'mask.png').exists(): | |
mask = cv2.imread(str(file / 'mask.png'), cv2.IMREAD_UNCHANGED) > 0 | |
else: | |
mask = np.ones(image.shape[:2], dtype=bool) | |
return image, points, mask | |