import configparser import csv import os import os.path as osp import pickle import numpy as np import pycocotools.mask as rletools import torch from PIL import Image class MOTObjDetect(torch.utils.data.Dataset): """ Data class for the Multiple Object Tracking Dataset """ def __init__(self, root, transforms=None, vis_threshold=0.25, split_seqs=None, frame_range_start=0.0, frame_range_end=1.0): self.root = root self.transforms = transforms self._vis_threshold = vis_threshold self._classes = ('background', 'pedestrian') self._img_paths = [] self._split_seqs = split_seqs self.mots_gts = {} for f in sorted(os.listdir(root)): path = os.path.join(root, f) if not os.path.isdir(path): continue if split_seqs is not None and f not in split_seqs: continue config_file = os.path.join(path, 'seqinfo.ini') assert os.path.exists(config_file), \ 'Path does not exist: {}'.format(config_file) config = configparser.ConfigParser() config.read(config_file) seq_len = int(config['Sequence']['seqLength']) im_ext = config['Sequence']['imExt'] im_dir = config['Sequence']['imDir'] img_dir = os.path.join(path, im_dir) start_frame = int(frame_range_start * seq_len) end_frame = int(frame_range_end * seq_len) # for i in range(seq_len): for i in range(start_frame, end_frame): img_path = os.path.join(img_dir, f"{i + 1:06d}{im_ext}") assert os.path.exists( img_path), f'Path does not exist: {img_path}' self._img_paths.append(img_path) # print(len(self._img_paths)) if self.has_masks: gt_file = os.path.join( os.path.dirname(img_dir), 'gt', 'gt.txt') self.mots_gts[gt_file] = load_mots_gt(gt_file) def __str__(self): if self._split_seqs is None: return self.root return f"{self.root}/{self._split_seqs}" @property def num_classes(self): return len(self._classes) def _get_annotation(self, idx): """ """ if 'test' in self.root: num_objs = 0 boxes = torch.zeros((num_objs, 4), dtype=torch.float32) return {'boxes': boxes, 'labels': torch.ones((num_objs,), dtype=torch.int64), 'image_id': torch.tensor([idx]), 'area': (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]), 'iscrowd': torch.zeros((num_objs,), dtype=torch.int64), 'visibilities': torch.zeros((num_objs), dtype=torch.float32)} img_path = self._img_paths[idx] file_index = int(os.path.basename(img_path).split('.')[0]) gt_file = os.path.join(os.path.dirname( os.path.dirname(img_path)), 'gt', 'gt.txt') assert os.path.exists(gt_file), \ 'GT file does not exist: {}'.format(gt_file) bounding_boxes = [] if self.has_masks: mask_objects_per_frame = self.mots_gts[gt_file][file_index] masks = [] for mask_object in mask_objects_per_frame: # class_id = 1 is car # class_id = 2 is pedestrian # class_id = 10 IGNORE if mask_object.class_id in [1, 10] or not rletools.area(mask_object.mask): continue bbox = rletools.toBbox(mask_object.mask) x1, y1, w, h = [int(c) for c in bbox] bb = {} bb['bb_left'] = x1 bb['bb_top'] = y1 bb['bb_width'] = w bb['bb_height'] = h # print(bb, rletools.area(mask_object.mask)) bb['visibility'] = 1.0 bb['track_id'] = mask_object.track_id masks.append(rletools.decode(mask_object.mask)) bounding_boxes.append(bb) else: with open(gt_file, "r") as inf: reader = csv.reader(inf, delimiter=',') for row in reader: visibility = float(row[8]) if int(row[0]) == file_index and int(row[6]) == 1 and int(row[7]) == 1 and visibility and visibility >= self._vis_threshold: bb = {} bb['bb_left'] = int(row[2]) bb['bb_top'] = int(row[3]) bb['bb_width'] = int(row[4]) bb['bb_height'] = int(row[5]) bb['visibility'] = float(row[8]) bb['track_id'] = int(row[1]) bounding_boxes.append(bb) num_objs = len(bounding_boxes) boxes = torch.zeros((num_objs, 4), dtype=torch.float32) visibilities = torch.zeros((num_objs), dtype=torch.float32) track_ids = torch.zeros((num_objs), dtype=torch.long) for i, bb in enumerate(bounding_boxes): # Make pixel indexes 0-based, should already be 0-based (or not) x1 = bb['bb_left'] # - 1 y1 = bb['bb_top'] # - 1 # This -1 accounts for the width (width of 1 x1=x2) x2 = x1 + bb['bb_width'] # - 1 y2 = y1 + bb['bb_height'] # - 1 boxes[i, 0] = x1 boxes[i, 1] = y1 boxes[i, 2] = x2 boxes[i, 3] = y2 visibilities[i] = bb['visibility'] track_ids[i] = bb['track_id'] annos = {'boxes': boxes, 'labels': torch.ones((num_objs,), dtype=torch.int64), 'image_id': torch.tensor([idx]), 'area': (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]), 'iscrowd': torch.zeros((num_objs,), dtype=torch.int64), 'visibilities': visibilities, 'track_ids': track_ids, } if self.has_masks: # annos['masks'] = torch.tensor(masks, dtype=torch.uint8) annos['masks'] = torch.from_numpy(np.stack(masks)) return annos @property def has_masks(self): return '/MOTS20/' in self.root def __getitem__(self, idx): # load images ad masks img_path = self._img_paths[idx] # mask_path = os.path.join(self.root, "PedMasks", self.masks[idx]) img = Image.open(img_path).convert("RGB") target = self._get_annotation(idx) if self.transforms is not None: img, target = self.transforms(img, target) return img, target def __len__(self): return len(self._img_paths) def write_results_files(self, results, output_dir): """Write the detections in the format for MOT17Det sumbission all_boxes[image] = N x 5 array of detections in (x1, y1, x2, y2, score) Each file contains these lines: , , , , , , , , , Files to sumbit: ./MOT17-01.txt ./MOT17-02.txt ./MOT17-03.txt ./MOT17-04.txt ./MOT17-05.txt ./MOT17-06.txt ./MOT17-07.txt ./MOT17-08.txt ./MOT17-09.txt ./MOT17-10.txt ./MOT17-11.txt ./MOT17-12.txt ./MOT17-13.txt ./MOT17-14.txt """ #format_str = "{}, -1, {}, {}, {}, {}, {}, -1, -1, -1" files = {} for image_id, res in results.items(): path = self._img_paths[image_id] img1, name = osp.split(path) # get image number out of name frame = int(name.split('.')[0]) # smth like /train/MOT17-09-FRCNN or /train/MOT17-09 tmp = osp.dirname(img1) # get the folder name of the sequence and split it tmp = osp.basename(tmp).split('-') # Now get the output name of the file out = tmp[0]+'-'+tmp[1]+'.txt' outfile = osp.join(output_dir, out) # check if out in keys and create empty list if not if outfile not in files.keys(): files[outfile] = [] if 'masks' in res: delimiter = ' ' # print(torch.unique(res['masks'][0])) # > 0.5 #res['masks'].bool() masks = res['masks'].squeeze(dim=1) index_map = torch.arange(masks.size(0))[:, None, None] index_map = index_map.expand_as(masks) masks = torch.logical_and( # remove background masks > 0.5, # remove overlapp by largest probablity index_map == masks.argmax(dim=0) ) for res_i in range(len(masks)): track_id = -1 if 'track_ids' in res: track_id = res['track_ids'][res_i].item() mask = masks[res_i] mask = np.asfortranarray(mask) rle_mask = rletools.encode(mask) files[outfile].append( [frame, track_id, 2, # class pedestrian mask.shape[0], mask.shape[1], rle_mask['counts'].decode(encoding='UTF-8')]) else: delimiter = ',' for res_i in range(len(res['boxes'])): track_id = -1 if 'track_ids' in res: track_id = res['track_ids'][res_i].item() box = res['boxes'][res_i] score = res['scores'][res_i] x1 = box[0].item() y1 = box[1].item() x2 = box[2].item() y2 = box[3].item() out = [frame, track_id, x1, y1, x2 - x1, y2 - y1, score.item(), -1, -1, -1] if 'keypoints' in res: out.extend(res['keypoints'][res_i] [:, :2].flatten().tolist()) out.extend(res['keypoints_scores'] [res_i].flatten().tolist()) files[outfile].append(out) for k, v in files.items(): with open(k, "w") as of: writer = csv.writer(of, delimiter=delimiter) for d in v: writer.writerow(d) class SegmentedObject: """ Helper class for segmentation objects. """ def __init__(self, mask: dict, class_id: int, track_id: int, full_bbox=None) -> None: self.mask = mask self.class_id = class_id self.track_id = track_id self.full_bbox = full_bbox def load_mots_gt(path: str) -> dict: """Load MOTS ground truth from path.""" objects_per_frame = {} track_ids_per_frame = {} # Check that no frame contains two objects with same id combined_mask_per_frame = {} # Check that no frame contains overlapping masks with open(path, "r") as gt_file: for line in gt_file: line = line.strip() fields = line.split(" ") frame = int(fields[0]) if frame not in objects_per_frame: objects_per_frame[frame] = [] # if frame not in track_ids_per_frame: # track_ids_per_frame[frame] = set() # if int(fields[1]) in track_ids_per_frame[frame]: # assert False, f"Multiple objects with track id {fields[1]} in frame {fields[0]}" # else: # track_ids_per_frame[frame].add(int(fields[1])) class_id = int(fields[2]) if not (class_id == 1 or class_id == 2 or class_id == 10): assert False, "Unknown object class " + fields[2] mask = { 'size': [int(fields[3]), int(fields[4])], 'counts': fields[5].encode(encoding='UTF-8')} if frame not in combined_mask_per_frame: combined_mask_per_frame[frame] = mask elif rletools.area(rletools.merge([ combined_mask_per_frame[frame], mask], intersect=True)): assert False, "Objects with overlapping masks in frame " + \ fields[0] else: combined_mask_per_frame[frame] = rletools.merge( [combined_mask_per_frame[frame], mask], intersect=False) full_bbox = None if len(fields) == 10: full_bbox = [int(fields[6]), int(fields[7]), int(fields[8]), int(fields[9])] objects_per_frame[frame].append(SegmentedObject( mask, class_id, int(fields[1]), full_bbox )) return objects_per_frame