Spaces:

hyz317
/

StdGEN

Sleeping

App Files Files Community

YulianSa commited on Mar 13

Commit

911a293

1 Parent(s): 01c0065

update

Browse files

Files changed (1) hide show

infer_api.py +18 -0

infer_api.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from PIL import Image
 import glob
@@ -102,6 +103,7 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 VIEWS = ['front', 'front_right', 'right', 'back', 'left', 'front_left']
 def set_seed(seed):
     random.seed(seed)
     np.random.seed(seed)
@@ -165,6 +167,7 @@ def process_image(image, totensor, width, height):
     return totensor(image)
 @torch.no_grad()
 def inference(validation_pipeline, bkg_remover, input_image, vae, feature_extractor, image_encoder, unet, ref_unet, tokenizer,
               text_encoder, pretrained_model_path, generator, validation, val_width, val_height, unet_condition_type,
@@ -268,6 +271,7 @@ def save_image_numpy(ndarr):
     im = im.resize((1024, 1024), Image.LANCZOS)
     return im
 def run_multiview_infer(data, pipeline, cfg: TestConfig, num_levels=3):
     if cfg.seed is None:
         generator = None
@@ -333,6 +337,7 @@ def run_multiview_infer(data, pipeline, cfg: TestConfig, num_levels=3):
     return results
 def load_multiview_pipeline(cfg):
     pipeline = StableUnCLIPImg2ImgPipeline.from_pretrained(
         cfg.pretrained_path,
@@ -450,6 +455,7 @@ def calc_horizontal_offset2(target_mask, source_img):
     return best_offset_value
 def get_distract_mask(generator, color_0, color_1, normal_0=None, normal_1=None, thres=0.25, ratio=0.50, outside_thres=0.10, outside_ratio=0.20):
     distract_area = np.abs(color_0 - color_1).sum(axis=-1) > thres
     if normal_0 is not None and normal_1 is not None:
@@ -516,6 +522,7 @@ def get_distract_mask(generator, color_0, color_1, normal_0=None, normal_1=None,
 class InferRefineAPI:
     def __init__(self, config):
         self.sam = sam_model_registry["vit_h"](checkpoint="./ckpt/sam_vit_h_4b8939.pth").cuda()
         self.generator = SamAutomaticMaskGenerator(
@@ -529,6 +536,7 @@ class InferRefineAPI:
         )
         self.outside_ratio = 0.20
     def refine(self, meshes, imgs):
         fixed_v, fixed_f, fixed_t = None, None, None
         flow_vert, flow_vector = None, None
@@ -680,6 +688,7 @@ class InferRefineAPI:
 class InferSlrmAPI:
     def __init__(self, config):
         self.config_path = config['config_path']
         self.config = OmegaConf.load(self.config_path)
@@ -694,6 +703,7 @@ class InferSlrmAPI:
         self.model.init_flexicubes_geometry(self.device, fovy=30.0, is_ortho=self.model.is_ortho)
         self.model = self.model.eval()
     def gen(self, imgs):
         imgs = [ cv2.imread(img[0])[:, :, ::-1] for img in imgs ]
         imgs = np.stack(imgs, axis=0).astype(np.float32) / 255.0
@@ -701,6 +711,7 @@ class InferSlrmAPI:
         mesh_glb_fpaths = self.make3d(imgs)
         return mesh_glb_fpaths[1:4] + mesh_glb_fpaths[0:1]
     def make3d(self, images):
         input_cameras = torch.tensor(np.load('slrm/cameras.npy')).to(device)
@@ -724,6 +735,7 @@ class InferSlrmAPI:
         return mesh_glb_fpaths
     def make_mesh(self, mesh_fpath, planes, level=None):
         mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
         mesh_dirname = os.path.dirname(mesh_fpath)
@@ -751,6 +763,7 @@ class InferSlrmAPI:
 class InferMultiviewAPI:
     def __init__(self, config):
         parser = argparse.ArgumentParser()
         parser.add_argument("--seed", type=int, default=42)
@@ -784,6 +797,7 @@ class InferMultiviewAPI:
         return im
     def gen(self, img, seed, num_levels):
         set_seed(seed)
         data = {}
@@ -801,6 +815,7 @@ class InferMultiviewAPI:
 class InferCanonicalAPI:
     def __init__(self, config):
         self.config = config
         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
@@ -810,6 +825,7 @@ class InferCanonicalAPI:
         self.setup(**self.loaded_config)
     def setup(self,
         validation: Dict,
         pretrained_model_path: str,
@@ -858,6 +874,7 @@ class InferCanonicalAPI:
         self.bkg_remover = BkgRemover()
     def canonicalize(self, image, seed):
         generator = torch.Generator(device=device).manual_seed(seed)
         return inference(
@@ -866,6 +883,7 @@ class InferCanonicalAPI:
             use_noise=self.use_noise, noise_d=self.noise_d, crop=True, seed=seed, timestep=self.timestep
         )
     def gen(self, img_input, seed=0):
         if np.array(img_input).shape[-1] == 4 and np.array(img_input)[..., 3].min() == 255:
             # convert to RGB

+import spaces
 from PIL import Image
 import glob
 VIEWS = ['front', 'front_right', 'right', 'back', 'left', 'front_left']
+@spaces.GPU
 def set_seed(seed):
     random.seed(seed)
     np.random.seed(seed)
     return totensor(image)
+@spaces.GPU
 @torch.no_grad()
 def inference(validation_pipeline, bkg_remover, input_image, vae, feature_extractor, image_encoder, unet, ref_unet, tokenizer,
               text_encoder, pretrained_model_path, generator, validation, val_width, val_height, unet_condition_type,
     im = im.resize((1024, 1024), Image.LANCZOS)
     return im
+@spaces.GPU
 def run_multiview_infer(data, pipeline, cfg: TestConfig, num_levels=3):
     if cfg.seed is None:
         generator = None
     return results
+@spaces.GPU
 def load_multiview_pipeline(cfg):
     pipeline = StableUnCLIPImg2ImgPipeline.from_pretrained(
         cfg.pretrained_path,
     return best_offset_value
+@spaces.GPU
 def get_distract_mask(generator, color_0, color_1, normal_0=None, normal_1=None, thres=0.25, ratio=0.50, outside_thres=0.10, outside_ratio=0.20):
     distract_area = np.abs(color_0 - color_1).sum(axis=-1) > thres
     if normal_0 is not None and normal_1 is not None:
 class InferRefineAPI:
+    @spaces.GPU
     def __init__(self, config):
         self.sam = sam_model_registry["vit_h"](checkpoint="./ckpt/sam_vit_h_4b8939.pth").cuda()
         self.generator = SamAutomaticMaskGenerator(
         )
         self.outside_ratio = 0.20
+    @spaces.GPU
     def refine(self, meshes, imgs):
         fixed_v, fixed_f, fixed_t = None, None, None
         flow_vert, flow_vector = None, None
 class InferSlrmAPI:
+    @spaces.GPU
     def __init__(self, config):
         self.config_path = config['config_path']
         self.config = OmegaConf.load(self.config_path)
         self.model.init_flexicubes_geometry(self.device, fovy=30.0, is_ortho=self.model.is_ortho)
         self.model = self.model.eval()
+    @spaces.GPU
     def gen(self, imgs):
         imgs = [ cv2.imread(img[0])[:, :, ::-1] for img in imgs ]
         imgs = np.stack(imgs, axis=0).astype(np.float32) / 255.0
         mesh_glb_fpaths = self.make3d(imgs)
         return mesh_glb_fpaths[1:4] + mesh_glb_fpaths[0:1]
+    @spaces.GPU
     def make3d(self, images):
         input_cameras = torch.tensor(np.load('slrm/cameras.npy')).to(device)
         return mesh_glb_fpaths
+    @spaces.GPU
     def make_mesh(self, mesh_fpath, planes, level=None):
         mesh_basename = os.path.basename(mesh_fpath).split('.')[0]
         mesh_dirname = os.path.dirname(mesh_fpath)
 class InferMultiviewAPI:
+    @spaces.GPU
     def __init__(self, config):
         parser = argparse.ArgumentParser()
         parser.add_argument("--seed", type=int, default=42)
         return im
+    @spaces.GPU
     def gen(self, img, seed, num_levels):
         set_seed(seed)
         data = {}
 class InferCanonicalAPI:
+    @spaces.GPU
     def __init__(self, config):
         self.config = config
         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         self.setup(**self.loaded_config)
+    @spaces.GPU
     def setup(self,
         validation: Dict,
         pretrained_model_path: str,
         self.bkg_remover = BkgRemover()
+    @spaces.GPU
     def canonicalize(self, image, seed):
         generator = torch.Generator(device=device).manual_seed(seed)
         return inference(
             use_noise=self.use_noise, noise_d=self.noise_d, crop=True, seed=seed, timestep=self.timestep
         )
+    @spaces.GPU
     def gen(self, img_input, seed=0):
         if np.array(img_input).shape[-1] == 4 and np.array(img_input)[..., 3].min() == 255:
             # convert to RGB