import os from glob import glob import cv2 import numpy as np from PIL import Image import torch from torchvision import transforms import gradio as gr from models.GCoNet import GCoNet device = ['cpu', 'cuda'][0] class ImagePreprocessor(): def __init__(self) -> None: self.transform_image = transforms.Compose([ transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) def proc(self, image): image = self.transform_image(image) return image model = GCoNet(bb_pretrained=False).to(device) state_dict = './ultimate_duts_cocoseg (The best one).pth' if os.path.exists(state_dict): gconet_dict = torch.load(state_dict, map_location=device) model.load_state_dict(gconet_dict) model.eval() def pred_maps(image_1, image_2, image_3, image_4): images = [image_1, image_2, image_3, image_4] image_shapes = [image.shape[:2] for image in images] images = [Image.fromarray(image) for image in images] images_proc = [] image_preprocessor = ImagePreprocessor() for image in images: images_proc.append(image_preprocessor.proc(image)) images_proc = torch.cat([image_proc.unsqueeze(0) for image_proc in images_proc]) with torch.no_grad(): scaled_preds_tensor = model(images_proc.to(device))[-1] preds = [] for image_shape, pred_tensor in zip(image_shapes, scaled_preds_tensor): if device == 'cuda': pred_tensor = pred_tensor.cpu() preds.append(torch.nn.functional.interpolate(pred_tensor.unsqueeze(0), size=image_shape, mode='bilinear', align_corners=True).squeeze().numpy()) image_preds = [] for image, pred in zip(images, preds): image_preds.append( np.hstack([np.array(image.convert('RGB')), cv2.cvtColor((pred*255).astype(np.uint8), cv2.COLOR_GRAY2RGB)]) ) return image_preds[:] N = 4 # examples = [[_] for _ in glob('example_images/butterfly/*')][:N] ipt = [gr.Image().style(width=600, height=150) for _ in range(N)] opt = [gr.Image().style(width=600, height=150) for _ in range(N)] demo = gr.Interface( fn=pred_maps, inputs=ipt, outputs=opt, # examples=examples, interpretation='default', title='Online demo for `GCoNet+: A Stronger Group Collaborative Co-Salient Object Detector (T-PAMI 2023)`', description='Upload pictures, most of which contain salient objects of the same class. Our demo will give you the binary maps of these co-salient objects :)\n**********Example images need to be dropped into each block, instead of click.**********' ) demo.launch(debug=True)