Spaces:

henry000
/

YOLO

Running

henry000 commited on Aug 10, 2024

Commit

e0c8580

1 Parent(s): 10420ed

✨ [Add] mutli-GPU on validation mode

Files changed (2) hide show

yolo/tools/solver.py CHANGED Viewed

@@ -26,6 +26,7 @@ from yolo.utils.logging_utils import ProgressLogger, log_model_structure
 from yolo.utils.model_utils import (
     ExponentialMovingAverage,
     PostProccess,
     create_optimizer,
     create_scheduler,
     predicts_to_json,
@@ -146,7 +147,7 @@ class ModelTrainer:
             self.progress.finish_one_epoch(epoch_loss, epoch_idx=epoch_idx)
             mAPs = self.validator.solve(self.validation_dataloader, epoch_idx=epoch_idx)
-            if self.good_epoch(mAPs):
                 self.save_checkpoint(epoch_idx=epoch_idx)
             # TODO: save model if result are better than before
         self.progress.finish_train()
@@ -254,6 +255,7 @@ class ModelValidator:
         self.progress.visualize_image(images, targets, predicts, epoch_idx=epoch_idx)
         with open(self.json_path, "w") as f:
             json.dump(predict_json, f)
         if hasattr(self, "coco_gt"):
             self.progress.start_pycocotools()

 from yolo.utils.model_utils import (
     ExponentialMovingAverage,
     PostProccess,
+    collect_prediction,
     create_optimizer,
     create_scheduler,
     predicts_to_json,
             self.progress.finish_one_epoch(epoch_loss, epoch_idx=epoch_idx)
             mAPs = self.validator.solve(self.validation_dataloader, epoch_idx=epoch_idx)
+            if mAPs is not None and self.good_epoch(mAPs):
                 self.save_checkpoint(epoch_idx=epoch_idx)
             # TODO: save model if result are better than before
         self.progress.finish_train()
         self.progress.visualize_image(images, targets, predicts, epoch_idx=epoch_idx)
         with open(self.json_path, "w") as f:
+            predict_json = collect_prediction(predict_json, self.progress.local_rank)
             json.dump(predict_json, f)
         if hasattr(self, "coco_gt"):
             self.progress.start_pycocotools()

yolo/utils/model_utils.py CHANGED Viewed

@@ -130,7 +130,7 @@ class PostProccess:
         self.converter = converter
         self.nms = nms_cfg
-    def __call__(self, predict, rev_tensor: Optional[Tensor] = None):
         prediction = self.converter(predict["Main"])
         pred_class, _, pred_bbox = prediction[:3]
         pred_conf = prediction[3] if len(prediction) == 4 else None
@@ -140,6 +140,26 @@ class PostProccess:
         return pred_bbox
 def predicts_to_json(img_paths, predicts, rev_tensor):
     """
     TODO: function document

         self.converter = converter
         self.nms = nms_cfg
+    def __call__(self, predict, rev_tensor: Optional[Tensor] = None) -> List[Tensor]:
         prediction = self.converter(predict["Main"])
         pred_class, _, pred_bbox = prediction[:3]
         pred_conf = prediction[3] if len(prediction) == 4 else None
         return pred_bbox
+def collect_prediction(predict_json: List, local_rank: int) -> List:
+    """
+    Collects predictions from all distributed processes and gathers them on the main process (rank 0).
+    Args:
+        predict_json (List): The prediction data (can be of any type) generated by the current process.
+        local_rank (int): The rank of the current process. Typically, rank 0 is the main process.
+    Returns:
+        List: The combined list of predictions from all processes if on rank 0, otherwise predict_json.
+    """
+    if dist.is_initialized() and local_rank == 0:
+        all_predictions = [None for _ in range(dist.get_world_size())]
+        dist.gather_object(predict_json, all_predictions, dst=0)
+        predict_json = [item for sublist in all_predictions for item in sublist]
+    elif dist.is_initialized():
+        dist.gather_object(predict_json, None, dst=0)
+    return predict_json
 def predicts_to_json(img_paths, predicts, rev_tensor):
     """
     TODO: function document