# vim: expandtab:ts=4:sw=4 import os import errno import argparse import numpy as np import cv2 import tensorflow.compat.v1 as tf #tf.compat.v1.disable_eager_execution() physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) def _run_in_batches(f, data_dict, out, batch_size): data_len = len(out) num_batches = int(data_len / batch_size) s, e = 0, 0 for i in range(num_batches): s, e = i * batch_size, (i + 1) * batch_size batch_data_dict = {k: v[s:e] for k, v in data_dict.items()} out[s:e] = f(batch_data_dict) if e < len(out): batch_data_dict = {k: v[e:] for k, v in data_dict.items()} out[e:] = f(batch_data_dict) def extract_image_patch(image, bbox, patch_shape): """Extract image patch from bounding box. Parameters ---------- image : ndarray The full image. bbox : array_like The bounding box in format (x, y, width, height). patch_shape : Optional[array_like] This parameter can be used to enforce a desired patch shape (height, width). First, the `bbox` is adapted to the aspect ratio of the patch shape, then it is clipped at the image boundaries. If None, the shape is computed from :arg:`bbox`. Returns ------- ndarray | NoneType An image patch showing the :arg:`bbox`, optionally reshaped to :arg:`patch_shape`. Returns None if the bounding box is empty or fully outside of the image boundaries. """ bbox = np.array(bbox) if patch_shape is not None: # correct aspect ratio to patch shape target_aspect = float(patch_shape[1]) / patch_shape[0] new_width = target_aspect * bbox[3] bbox[0] -= (new_width - bbox[2]) / 2 bbox[2] = new_width # convert to top left, bottom right bbox[2:] += bbox[:2] bbox = bbox.astype(np.int32) # clip at image boundaries bbox[:2] = np.maximum(0, bbox[:2]) bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:]) if np.any(bbox[:2] >= bbox[2:]): return None sx, sy, ex, ey = bbox image = image[sy:ey, sx:ex] image = cv2.resize(image, tuple(patch_shape[::-1])) return image class ImageEncoder(object): def __init__(self, checkpoint_filename, input_name="images", output_name="features"): self.session = tf.Session() with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle: graph_def = tf.GraphDef() graph_def.ParseFromString(file_handle.read()) tf.import_graph_def(graph_def, name="net") self.input_var = tf.get_default_graph().get_tensor_by_name( "%s:0" % input_name) self.output_var = tf.get_default_graph().get_tensor_by_name( "%s:0" % output_name) assert len(self.output_var.get_shape()) == 2 assert len(self.input_var.get_shape()) == 4 self.feature_dim = self.output_var.get_shape().as_list()[-1] self.image_shape = self.input_var.get_shape().as_list()[1:] def __call__(self, data_x, batch_size=32): out = np.zeros((len(data_x), self.feature_dim), np.float32) _run_in_batches( lambda x: self.session.run(self.output_var, feed_dict=x), {self.input_var: data_x}, out, batch_size) return out def create_box_encoder(model_filename, input_name="images", output_name="features", batch_size=32): image_encoder = ImageEncoder(model_filename, input_name, output_name) image_shape = image_encoder.image_shape def encoder(image, boxes): image_patches = [] for box in boxes: patch = extract_image_patch(image, box, image_shape[:2]) if patch is None: print("WARNING: Failed to extract image patch: %s." % str(box)) patch = np.random.uniform( 0., 255., image_shape).astype(np.uint8) image_patches.append(patch) image_patches = np.asarray(image_patches) return image_encoder(image_patches, batch_size) return encoder def generate_detections(encoder, mot_dir, output_dir, detection_dir=None): """Generate detections with features. Parameters ---------- encoder : Callable[image, ndarray] -> ndarray The encoder function takes as input a BGR color image and a matrix of bounding boxes in format `(x, y, w, h)` and returns a matrix of corresponding feature vectors. mot_dir : str Path to the MOTChallenge directory (can be either train or test). output_dir Path to the output directory. Will be created if it does not exist. detection_dir Path to custom detections. The directory structure should be the default MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the standard MOTChallenge detections. """ if detection_dir is None: detection_dir = mot_dir try: os.makedirs(output_dir) except OSError as exception: if exception.errno == errno.EEXIST and os.path.isdir(output_dir): pass else: raise ValueError( "Failed to created output directory '%s'" % output_dir) for sequence in os.listdir(mot_dir): print("Processing %s" % sequence) sequence_dir = os.path.join(mot_dir, sequence) image_dir = os.path.join(sequence_dir, "img1") image_filenames = { int(os.path.splitext(f)[0]): os.path.join(image_dir, f) for f in os.listdir(image_dir)} detection_file = os.path.join( detection_dir, sequence, "det/det.txt") detections_in = np.loadtxt(detection_file, delimiter=',') detections_out = [] frame_indices = detections_in[:, 0].astype(np.int32) min_frame_idx = frame_indices.astype(np.int32).min() max_frame_idx = frame_indices.astype(np.int32).max() for frame_idx in range(min_frame_idx, max_frame_idx + 1): print("Frame %05d/%05d" % (frame_idx, max_frame_idx)) mask = frame_indices == frame_idx rows = detections_in[mask] if frame_idx not in image_filenames: print("WARNING could not find image for frame %d" % frame_idx) continue bgr_image = cv2.imread( image_filenames[frame_idx], cv2.IMREAD_COLOR) features = encoder(bgr_image, rows[:, 2:6].copy()) detections_out += [np.r_[(row, feature)] for row, feature in zip(rows, features)] output_filename = os.path.join(output_dir, "%s.npy" % sequence) np.save( output_filename, np.asarray(detections_out), allow_pickle=False) def parse_args(): """Parse command line arguments. """ parser = argparse.ArgumentParser(description="Re-ID feature extractor") parser.add_argument( "--model", default="resources/networks/mars-small128.pb", help="Path to freezed inference graph protobuf.") parser.add_argument( "--mot_dir", help="Path to MOTChallenge directory (train or test)", required=True) parser.add_argument( "--detection_dir", help="Path to custom detections. Defaults to " "standard MOT detections Directory structure should be the default " "MOTChallenge structure: [sequence]/det/det.txt", default=None) parser.add_argument( "--output_dir", help="Output directory. Will be created if it does not" " exist.", default="detections") return parser.parse_args() def main(): args = parse_args() encoder = create_box_encoder(args.model, batch_size=32) generate_detections(encoder, args.mot_dir, args.output_dir, args.detection_dir) if __name__ == "__main__": main()