import json import os import numpy as np import pandas as pd import torch from pycocotools.coco import COCO from torchvision.ops.boxes import box_convert, box_iou from tqdm import tqdm class NpEncoder(json.JSONEncoder): """Custom JSON encoder for NumPy data types. This encoder handles NumPy-specific types that are not serializable by the default JSON library by converting them into standard Python types. """ def default(self, obj): """Converts NumPy objects to their native Python equivalents. Args: obj (any): The object to encode. Returns: any: The JSON-serializable representation of the object. """ if isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.floating): return float(obj) elif isinstance(obj, np.ndarray): return obj.tolist() else: return super(NpEncoder, self).default(obj) class Ensembler: """A class to ensemble predictions from multiple object detection models. This class loads ground truth data and predictions from several models, performs non-maximum suppression (NMS) to merge overlapping detections, and saves the final ensembled results in COCO format. """ def __init__( self, output_dir, dataset_name, grplist, iou_thresh, coco_gt_path=None, coco_instances_results_fname=None ): """Initializes the Ensembler. Args: output_dir (str): The base directory where model outputs and ensembled results are stored. dataset_name (str): The name of the dataset being evaluated. grplist (list[str]): A list of subdirectory names, where each subdirectory contains the prediction file from one model. iou_thresh (float): The IoU threshold for considering two bounding boxes as overlapping during NMS. coco_gt_path (str, optional): The full path to the ground truth COCO JSON file. If None, it's assumed to be in `output_dir`. Defaults to None. coco_instances_results_fname (str, optional): The filename for the COCO prediction files within each model's subdirectory. Defaults to "coco_instances_results.json". """ self.output_dir = output_dir self.dataset_name = dataset_name self.grplist = grplist self.iou_thresh = iou_thresh self.n_detectors = len(grplist) if coco_gt_path is None: fname_gt = os.path.join(output_dir, dataset_name + "_coco_format.json") else: fname_gt = coco_gt_path if coco_instances_results_fname is None: fname_dt = "coco_instances_results.json" else: fname_dt = coco_instances_results_fname # load in ground truth (form image lists) coco_gt = COCO(fname_gt) # populate detector truths dtlist = [] for grp in grplist: fname = os.path.join(output_dir, grp, fname_dt) dtlist.append(coco_gt.loadRes(fname)) print("Successfully loaded {} into memory. {} instance detected.\n".format(fname, len(dtlist[-1].anns))) self.coco_gt = coco_gt self.cats = [cat["id"] for cat in self.coco_gt.dataset["categories"]] self.dtlist = dtlist self.results = [] print( "Working with {} models, {} categories, and {} images.".format( self.n_detectors, len(self.cats), len(self.coco_gt.imgs.keys()) ) ) def mean_score_nms(self): """Performs non-maximum suppression by merging overlapping boxes. This method iterates through all images and categories, merging sets of overlapping bounding boxes from different detectors based on the IoU threshold. For each merged set, it calculates a mean score and selects the single box with the highest original score as the representative detection for the ensembled output. Returns: Ensembler: The instance itself, with the `self.results` attribute populated with the ensembled predictions. """ def nik_merge(lsts): """Niklas B. https://github.com/rikpg/IntersectionMerge/blob/master/core.py""" sets = [set(lst) for lst in lsts if lst] merged = 1 while merged: merged = 0 results = [] while sets: common, rest = sets[0], sets[1:] sets = [] for x in rest: if x.isdisjoint(common): sets.append(x) else: merged = 1 common |= x results.append(common) sets = results return sets winning_list = [] print( "Computing mean score non-max suppression ensembling for {} images.".format(len(self.coco_gt.imgs.keys())) ) for img in tqdm(self.coco_gt.imgs.keys()): # print(img) dflist = [] # a dataframe of detections obj_set = set() # a set of objects (frozensets) for i, coco_dt in enumerate(self.dtlist): # for each detector append predictions to df dflist.append(pd.DataFrame(coco_dt.imgToAnns[img]).assign(det=i)) df = pd.concat(dflist, ignore_index=True) if not df.empty: for cat in self.cats: # for each category dfcat = df[df["category_id"] == cat] ts = box_convert( torch.tensor(dfcat["bbox"]), in_fmt="xywh", out_fmt="xyxy" ) # list of tensor boxes for cateogory iou_bool = np.array((box_iou(ts, ts) > self.iou_thresh)) # compute IoU matrix and threshold for i in range(len(dfcat)): # for each detection in that category fset = frozenset(dfcat.index[iou_bool[i]]) obj_set.add(fset) # compute set of sets representing objects # find overlapping sets # for fs in obj_set: #for existing sets # if fs&fset: #check for # fsnew = fs.union(fset) # obj_set.remove(fs) # obj_set.add(fsnew) obj_set = nik_merge(obj_set) for s in obj_set: # for each detected objects, find winning box and assign score as mean of scores dfset = dfcat.loc[list(s)] mean_score = dfset["score"].sum() / max( self.n_detectors, len(s) ) # allows for more detections than detectors winning_box = dfset.iloc[dfset["score"].argmax()].to_dict() winning_box["score"] = mean_score winning_list.append(winning_box) print("{} resulting instances from NMS".format(len(winning_list))) self.results = winning_list return self def save_coco_instances(self, fname="coco_instances_results.json"): """Saves the ensembled prediction results to a JSON file. The output file follows the COCO instance format and can be used for further evaluation. Args: fname (str, optional): The filename for the output JSON file. Defaults to "coco_instances_results.json". """ if self.results: with open(os.path.join(self.output_dir, fname), "w") as f: f.write(json.dumps(self.results, cls=NpEncoder)) f.flush() if __name__ == "__main__": # Example usage: # This assumes an 'output' directory with subdirectories 'fold1', 'fold2', etc., # each containing a 'coco_instances_results.json' file. ens = Ensembler("dev", ["fold1", "fold2", "fold3", "fold4", "fold5"], 0.2) ens.mean_score_nms()