Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import glob | |
import os.path as osp | |
import numpy as np | |
import torch | |
import torch.nn.functional as F | |
from torch.utils.data import Dataset | |
from loguru import logger | |
import random | |
from src.utils.dataset import read_pretrain_gray | |
class PretrainDataset(Dataset): | |
def __init__(self, | |
root_dir, | |
mode='train', | |
img_resize=None, | |
df=None, | |
img_padding=False, | |
frame_gap=2, | |
**kwargs): | |
""" | |
Manage image pairs of KAIST Multispectral Pedestrian Detection Benchmark Dataset. | |
Args: | |
root_dir (str): KAIST Multispectral Pedestrian root directory that has `phoenix`. | |
mode (str): options are ['train', 'val'] | |
img_resize (int, optional): the longer edge of resized images. None for no resize. 640 is recommended. | |
This is useful during training with batches and testing with memory intensive algorithms. | |
df (int, optional): image size division factor. NOTE: this will change the final image size after img_resize. | |
img_padding (bool): If set to 'True', zero-pad the image to squared size. This is useful during training. | |
augment_fn (callable, optional): augments images with pre-defined visual effects. | |
""" | |
super().__init__() | |
self.root_dir = root_dir | |
self.mode = mode | |
# specify which part of the data is used for trainng and testing | |
if mode == 'train': | |
assert img_resize is not None and img_padding | |
self.start_ratio = 0.0 | |
self.end_ratio = 0.9 | |
elif mode == 'val': | |
assert img_resize is not None and img_padding | |
self.start_ratio = 0.9 | |
self.end_ratio = 1.0 | |
else: | |
raise NotImplementedError() | |
# parameters for image resizing, padding | |
self.img_resize = img_resize | |
self.df = df | |
self.img_padding = img_padding | |
# for training XoFTR | |
self.coarse_scale = getattr(kwargs, 'coarse_scale', 0.125) | |
self.pair_paths = self.generate_kaist_pairs(root_dir, frame_gap=frame_gap, second_frame_range=0) | |
def get_kaist_image_paths(self, root_dir): | |
vis_img_paths = [] | |
lwir_img_paths = [] | |
img_num_per_folder = [] | |
# Recursively search for folders named "image" | |
for folder, subfolders, filenames in os.walk(root_dir): | |
if "visible" in subfolders and "lwir" in subfolders: | |
vis_img_folder = osp.join(folder, "visible") | |
lwir_img_folder = osp.join(folder, "lwir") | |
# Use glob to find image files (you can add more extensions if needed) | |
vis_imgs_i = glob.glob(osp.join(vis_img_folder, '*.jpg')) | |
vis_imgs_i.sort() | |
lwir_imgs_i = glob.glob(osp.join(lwir_img_folder, '*.jpg')) | |
lwir_imgs_i.sort() | |
vis_img_paths.append(vis_imgs_i) | |
lwir_img_paths.append(lwir_imgs_i) | |
img_num_per_folder.append(len(vis_imgs_i)) | |
assert len(vis_imgs_i) == len(lwir_imgs_i), f"Image numbers do not match in {folder}, {len(vis_imgs_i)} != {len(lwir_imgs_i)}" | |
# Add more image file extensions as necessary | |
return vis_img_paths, lwir_img_paths, img_num_per_folder | |
def generate_kaist_pairs(self, root_dir, frame_gap, second_frame_range): | |
""" generate image pairs (Vis-TIR) from KAIST Pedestrian dataset | |
Args: | |
root_dir: root directory for the dataset | |
frame_gap (int): the frame gap between consecutive images | |
second_frame_range (int): the range for second image i.e. for the first ind i, second ind j element of [i-10, i+10] | |
Returns: | |
pair_paths (list) | |
""" | |
vis_img_paths, lwir_img_paths, img_num_per_folder = self.get_kaist_image_paths(root_dir) | |
pair_paths = [] | |
for i in range(len(img_num_per_folder)): | |
num_img = img_num_per_folder[i] | |
inds_vis = torch.arange(int(self.start_ratio * num_img), | |
int(self.end_ratio * num_img), | |
frame_gap, dtype=int) | |
if second_frame_range > 0: | |
inds_lwir = inds_vis + torch.randint(-second_frame_range, second_frame_range, (inds_vis.shape[0],)) | |
inds_lwir[inds_lwir<int(self.start_ratio * num_img)] = int(self.start_ratio * num_img) | |
inds_lwir[inds_lwir>int(self.end_ratio * num_img)-1] = int(self.end_ratio * num_img)-1 | |
else: | |
inds_lwir = inds_vis | |
for j, k in zip(inds_vis, inds_lwir): | |
img_name0 = os.path.relpath(vis_img_paths[i][j], root_dir) | |
img_name1 = os.path.relpath(lwir_img_paths[i][k], root_dir) | |
if torch.rand(1) > 0.5: | |
img_name0, img_name1 = img_name1, img_name0 | |
pair_paths.append([img_name0, img_name1]) | |
random.shuffle(pair_paths) | |
return pair_paths | |
def __len__(self): | |
return len(self.pair_paths) | |
def __getitem__(self, idx): | |
# read grayscale and normalized image, and mask. (1, h, w) and (h, w) | |
img_name0 = osp.join(self.root_dir, self.pair_paths[idx][0]) | |
img_name1 = osp.join(self.root_dir, self.pair_paths[idx][1]) | |
if self.mode == "train" and torch.rand(1) > 0.5: | |
img_name0, img_name1 = img_name1, img_name0 | |
image0, image0_norm, mask0, scale0, image0_mean, image0_std = read_pretrain_gray( | |
img_name0, self.img_resize, self.df, self.img_padding, None) | |
image1, image1_norm, mask1, scale1, image1_mean, image1_std = read_pretrain_gray( | |
img_name1, self.img_resize, self.df, self.img_padding, None) | |
data = { | |
'image0': image0, # (1, h, w) | |
'image1': image1, | |
'image0_norm': image0_norm, | |
'image1_norm': image1_norm, | |
'scale0': scale0, # [scale_w, scale_h] | |
'scale1': scale1, | |
"image0_mean": image0_mean, | |
"image0_std": image0_std, | |
"image1_mean": image1_mean, | |
"image1_std": image1_std, | |
'dataset_name': 'PreTrain', | |
'pair_id': idx, | |
'pair_names': (self.pair_paths[idx][0], self.pair_paths[idx][1]), | |
} | |
# for XoFTR training | |
if mask0 is not None: # img_padding is True | |
if self.coarse_scale: | |
[ts_mask_0, ts_mask_1] = F.interpolate(torch.stack([mask0, mask1], dim=0)[None].float(), | |
scale_factor=self.coarse_scale, | |
mode='nearest', | |
recompute_scale_factor=False)[0].bool() | |
data.update({'mask0': ts_mask_0, 'mask1': ts_mask_1}) | |
return data | |