|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import sys |
|
import torch |
|
import cv2 |
|
from PIL import Image |
|
from eval.grounded_sam.grounded_sam2_florence2_autolabel_pipeline import FlorenceSAM |
|
|
|
class ObjectDetector: |
|
def __init__(self, device): |
|
self.device = torch.device(device) |
|
self.detector = FlorenceSAM(device) |
|
|
|
def get_instances(self, gen_image, label, min_size=64): |
|
_, instance_result_dict = \ |
|
self.detector.od_grounding_and_segmentation( |
|
image=gen_image, text_input=label, |
|
) |
|
instances = instance_result_dict["instance_images"] |
|
|
|
filtered_instances = [] |
|
for img in instances: |
|
width, height = img.shape[:2] |
|
if width * height < min_size * min_size or min(width, height) < min_size // 4: |
|
continue |
|
|
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
|
img = Image.fromarray(img) |
|
filtered_instances.append(img) |
|
|
|
return filtered_instances |
|
|
|
def get_multiple_instances(self, gen_image, label, min_size=64): |
|
|
|
_, instance_result_dict = \ |
|
self.detector.od_grounding_and_segmentation( |
|
image=gen_image, text_input=label, |
|
) |
|
|
|
return instance_result_dict |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
from glob import glob |
|
from tqdm import tqdm |
|
from src.train.data.data_utils import split_grid, pad_to_square |
|
from eval.idip.dino import DINOScore |
|
|
|
detector = ObjectDetector("cuda") |
|
dino_model = DINOScore("cuda") |
|
|
|
gen_image = Image.open("assets/tests/20250320-151038.jpeg").convert("RGB") |
|
label = "two people" |
|
|
|
save_dir = f"tmp" |
|
os.makedirs(save_dir, exist_ok=True) |
|
|
|
|
|
for i, img in enumerate([gen_image]): |
|
found_ips = detector.get_instances(img, label, min_size=img.size[0]//20)[:3] |
|
found_ips = [pad_to_square(x) for x in found_ips] |
|
for j, ip in enumerate(found_ips): |
|
|
|
score = 1 |
|
pad_to_square(ip).save(f"{save_dir}/{label}_{i}_{j}_{score}.png") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|