Spaces:

tencent
/

Hunyuan3D-1

Running on L40S

App Files Files Community

Huiwenshi commited on Nov 8, 2024

Commit

0694d37

verified ·

1 Parent(s): d7aa774

Delete folder infer/.ipynb_checkpoints with huggingface_hub

Browse files

Files changed (7) hide show

infer/.ipynb_checkpoints/__init__-checkpoint.py +0 -32
infer/.ipynb_checkpoints/gif_render-checkpoint.py +0 -79
infer/.ipynb_checkpoints/image_to_views-checkpoint.py +0 -126
infer/.ipynb_checkpoints/removebg-checkpoint.py +0 -101
infer/.ipynb_checkpoints/text_to_image-checkpoint.py +0 -105
infer/.ipynb_checkpoints/utils-checkpoint.py +0 -87
infer/.ipynb_checkpoints/views_to_mesh-checkpoint.py +0 -154

infer/.ipynb_checkpoints/__init__-checkpoint.py DELETED Viewed

@@ -1,32 +0,0 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-from .removebg import Removebg
-from .text_to_image import Text2Image
-from .image_to_views import Image2Views, save_gif
-from .views_to_mesh import Views2Mesh
-from .gif_render import GifRenderer
-from .utils import seed_everything, auto_amp_inference
-from .utils import get_parameter_number, set_parameter_grad_false

infer/.ipynb_checkpoints/gif_render-checkpoint.py DELETED Viewed

@@ -1,79 +0,0 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os, sys
-sys.path.insert(0, f"{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}")
-from svrm.ldm.vis_util import render
-from infer.utils import seed_everything, timing_decorator
-class GifRenderer():
-    '''
-        render frame(s) of mesh using pytorch3d
-    '''
-    def __init__(self, device="cuda:0"):
-        self.device = device
-    @timing_decorator("gif render")
-    def __call__(
-        self,
-        obj_filename,
-        elev=0,
-        azim=0,
-        resolution=512,
-        gif_dst_path='',
-        n_views=120,
-        fps=30,
-        rgb=True
-    ):
-        render(
-            obj_filename,
-            elev=elev,
-            azim=azim,
-            resolution=resolution,
-            gif_dst_path=gif_dst_path,
-            n_views=n_views,
-            fps=fps,
-            device=self.device,
-            rgb=rgb
-        )
-if __name__ == "__main__":
-    import argparse
-    def get_args():
-        parser = argparse.ArgumentParser()
-        parser.add_argument("--mesh_path", type=str, required=True)
-        parser.add_argument("--output_gif_path", type=str, required=True)
-        parser.add_argument("--device", default="cuda:0", type=str)
-        return parser.parse_args()
-    args = get_args()
-    gif_renderer = GifRenderer(device=args.device)
-    gif_renderer(
-        args.mesh_path,
-        gif_dst_path = args.output_gif_path
-    )

infer/.ipynb_checkpoints/image_to_views-checkpoint.py DELETED Viewed

@@ -1,126 +0,0 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os, sys
-sys.path.insert(0, f"{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}")
-import time
-import torch
-import random
-import numpy as np
-from PIL import Image
-from einops import rearrange
-from PIL import Image, ImageSequence
-from infer.utils import seed_everything, timing_decorator, auto_amp_inference
-from infer.utils import get_parameter_number, set_parameter_grad_false, str_to_bool
-from mvd.hunyuan3d_mvd_std_pipeline import HunYuan3D_MVD_Std_Pipeline
-from mvd.hunyuan3d_mvd_lite_pipeline import Hunyuan3d_MVD_Lite_Pipeline
-def save_gif(pils, save_path, df=False):
-    # save a list of PIL.Image to gif
-    spf = 4000 / len(pils)
-    os.makedirs(os.path.dirname(save_path), exist_ok=True)
-    pils[0].save(save_path, format="GIF", save_all=True, append_images=pils[1:], duration=spf, loop=0)
-    return save_path
-class Image2Views():
-    def __init__(self, device="cuda:0", use_lite=False, save_memory=False):
-        self.device = device
-        if use_lite:
-            self.pipe = Hunyuan3d_MVD_Lite_Pipeline.from_pretrained(
-                "./weights/mvd_lite",
-                torch_dtype = torch.float16,
-                use_safetensors = True,
-            )
-        else:
-            self.pipe = HunYuan3D_MVD_Std_Pipeline.from_pretrained(
-                "./weights/mvd_std",
-                torch_dtype = torch.float16,
-                use_safetensors = True,
-            )
-        self.pipe = self.pipe.to(device)
-        self.order = [0, 1, 2, 3, 4, 5] if use_lite else [0, 2, 4, 5, 3, 1]
-        self.save_memory = save_memory
-        set_parameter_grad_false(self.pipe.unet)
-        print('image2views unet model', get_parameter_number(self.pipe.unet))
-    @torch.no_grad()
-    @timing_decorator("image to views")
-    @auto_amp_inference
-    def __call__(self, *args, **kwargs):
-        if self.save_memory:
-            self.pipe = self.pipe.to(self.device)
-            torch.cuda.empty_cache()
-            res = self.call(*args, **kwargs)
-            self.pipe = self.pipe.to("cpu")
-        else:
-            res = self.call(*args, **kwargs)
-        torch.cuda.empty_cache()
-        return res
-    def call(self, pil_img, seed=0, steps=50, guidance_scale=2.0):
-        seed_everything(seed)
-        generator = torch.Generator(device=self.device)
-        res_img = self.pipe(pil_img,
-                            num_inference_steps=steps,
-                            guidance_scale=guidance_scale,
-                            generat=generator).images
-        show_image = rearrange(np.asarray(res_img[0], dtype=np.uint8), '(n h) (m w) c -> (n m) h w c', n=3, m=2)
-        pils = [res_img[1]]+[Image.fromarray(show_image[idx]) for idx in self.order]
-        torch.cuda.empty_cache()
-        return res_img, pils
-if __name__ == "__main__":
-    import argparse
-    def get_args():
-        parser = argparse.ArgumentParser()
-        parser.add_argument("--rgba_path", type=str, required=True)
-        parser.add_argument("--output_views_path", type=str, required=True)
-        parser.add_argument("--output_cond_path", type=str, required=True)
-        parser.add_argument("--seed", default=0, type=int)
-        parser.add_argument("--steps", default=50, type=int)
-        parser.add_argument("--device", default="cuda:0", type=str)
-        parser.add_argument("--use_lite", default='false', type=str)
-        return parser.parse_args()
-    args = get_args()
-    args.use_lite = str_to_bool(args.use_lite)
-    rgba_pil = Image.open(args.rgba_path)
-    assert rgba_pil.mode == "RGBA", "rgba_pil must be RGBA mode"
-    model = Image2Views(device=args.device, use_lite=args.use_lite)
-    (views_pil, cond), _ = model(rgba_pil, seed=args.seed, steps=args.steps)
-    views_pil.save(args.output_views_path)
-    cond.save(args.output_cond_path)

infer/.ipynb_checkpoints/removebg-checkpoint.py DELETED Viewed

@@ -1,101 +0,0 @@
-import os, sys
-sys.path.insert(0, f"{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}")
-import numpy as np
-from PIL import Image
-from rembg import remove, new_session
-from infer.utils import timing_decorator
-class Removebg():
-    def __init__(self, name="u2net"):
-        self.session = new_session(name)
-    @timing_decorator("remove background")
-    def __call__(self, rgb_maybe, force=True):
-        '''
-            args:
-                rgb_maybe: PIL.Image, with RGB mode or RGBA mode
-                force: bool, if input is RGBA mode, covert to RGB then remove bg
-            return:
-                rgba_img: PIL.Image, with RGBA mode
-        '''
-        if rgb_maybe.mode == "RGBA":
-            if force:
-                rgb_maybe = rgb_maybe.convert("RGB")
-                rgba_img = remove(rgb_maybe, session=self.session)
-            else:
-                rgba_img = rgb_maybe
-        else:
-            rgba_img = remove(rgb_maybe, session=self.session)
-        rgba_img = white_out_background(rgba_img)
-        rgba_img = preprocess(rgba_img)
-        return rgba_img
-def white_out_background(pil_img):
-    data = pil_img.getdata()
-    new_data = []
-    for r, g, b, a in data:
-        if a < 16:  # background
-            new_data.append((255, 255, 255, 0))  # full white color
-        else:
-            is_white = (r>235) and (g>235) and (b>235)
-            new_r = 235 if is_white else r
-            new_g = 235 if is_white else g
-            new_b = 235 if is_white else b
-            new_data.append((new_r, new_g, new_b, a))
-    pil_img.putdata(new_data)
-    return pil_img
-def preprocess(rgba_img, size=(512,512), ratio=1.15):
-    image = np.asarray(rgba_img)
-    rgb, alpha = image[:,:,:3] / 255., image[:,:,3:] / 255.
-    # crop
-    coords = np.nonzero(alpha > 0.1)
-    x_min, x_max = coords[0].min(), coords[0].max()
-    y_min, y_max = coords[1].min(), coords[1].max()
-    rgb = (rgb[x_min:x_max, y_min:y_max, :] * 255).astype("uint8")
-    alpha = (alpha[x_min:x_max, y_min:y_max, 0] * 255).astype("uint8")
-    # padding
-    h, w = rgb.shape[:2]
-    resize_side = int(max(h, w) * ratio)
-    pad_h, pad_w = resize_side - h, resize_side - w
-    start_h, start_w = pad_h // 2, pad_w // 2
-    new_rgb = np.ones((resize_side, resize_side, 3), dtype=np.uint8) * 255
-    new_alpha = np.zeros((resize_side, resize_side), dtype=np.uint8)
-    new_rgb[start_h:start_h + h, start_w:start_w + w] = rgb
-    new_alpha[start_h:start_h + h, start_w:start_w + w] = alpha
-    rgba_array = np.concatenate((new_rgb, new_alpha[:,:,None]), axis=-1)
-    rgba_image = Image.fromarray(rgba_array, 'RGBA')
-    rgba_image = rgba_image.resize(size)
-    return rgba_image
-if __name__ == "__main__":
-    import argparse
-    def get_args():
-        parser = argparse.ArgumentParser()
-        parser.add_argument("--rgb_path", type=str, required=True)
-        parser.add_argument("--output_rgba_path", type=str, required=True)
-        parser.add_argument("--force", default=False, action="store_true")
-        return parser.parse_args()
-    args = get_args()
-    rgb_maybe = Image.open(args.rgb_path)
-    model = Removebg()
-    rgba_pil = model(rgb_maybe, args.force)
-    rgba_pil.save(args.output_rgba_path)

infer/.ipynb_checkpoints/text_to_image-checkpoint.py DELETED Viewed

@@ -1,105 +0,0 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os , sys
-sys.path.insert(0, f"{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}")
-import torch
-from diffusers import HunyuanDiTPipeline, AutoPipelineForText2Image
-from infer.utils import seed_everything, timing_decorator, auto_amp_inference
-from infer.utils import get_parameter_number, set_parameter_grad_false
-class Text2Image():
-    def __init__(self, pretrain="weights/hunyuanDiT", device="cuda:0", save_memory=None):
-        '''
-            save_memory: if GPU memory is low, can set it
-        '''
-        self.save_memory = save_memory
-        self.device = device
-        self.pipe = AutoPipelineForText2Image.from_pretrained(
-            pretrain,
-            torch_dtype = torch.float16,
-            enable_pag = True,
-            pag_applied_layers = ["blocks.(16|17|18|19)"]
-        )
-        set_parameter_grad_false(self.pipe.transformer)
-        print('text2image transformer model', get_parameter_number(self.pipe.transformer))
-        if not save_memory:
-            self.pipe = self.pipe.to(device)
-        self.neg_txt = "文本,特写,裁剪,出框,最差质量,低质量,JPEG伪影,PGLY,重复,病态,残缺,多余的手指,变异的手," \
-                       "画得不好的手,画得不好的脸,变异,畸形,模糊,脱水,糟糕的解剖学,糟糕的比例,多余的肢体,克隆的脸," \
-                       "毁容,恶心的比例,畸形的肢体,缺失的手臂,缺失的腿,额外的手臂,额外的腿,融合的手指,手指太多,长脖子"
-    @torch.no_grad()
-    @timing_decorator('text to image')
-    @auto_amp_inference
-    def __call__(self, *args, **kwargs):
-        if self.save_memory:
-            self.pipe = self.pipe.to(self.device)
-            torch.cuda.empty_cache()
-            res = self.call(*args, **kwargs)
-            self.pipe = self.pipe.to("cpu")
-        else:
-            res = self.call(*args, **kwargs)
-        torch.cuda.empty_cache()
-        return res
-    def call(self, prompt, seed=0, steps=25):
-        '''
-            args:
-                prompr: str
-                seed: int
-                steps: int
-            return:
-                rgb: PIL.Image
-        '''
-        print("prompt is:", prompt)
-        prompt = prompt + ",白色背景,3D风格,最佳质量"
-        seed_everything(seed)
-        generator = torch.Generator(device=self.device)
-        if seed is not None: generator = generator.manual_seed(int(seed))
-        rgb = self.pipe(prompt=prompt, negative_prompt=self.neg_txt, num_inference_steps=steps,
-            pag_scale=1.3, width=1024, height=1024, generator=generator, return_dict=False)[0][0]
-        torch.cuda.empty_cache()
-        return rgb
-if __name__ == "__main__":
-    import argparse
-    def get_args():
-        parser = argparse.ArgumentParser()
-        parser.add_argument("--text2image_path", default="weights/hunyuanDiT", type=str)
-        parser.add_argument("--text_prompt", default="", type=str)
-        parser.add_argument("--output_img_path", default="./outputs/test/img.jpg", type=str)
-        parser.add_argument("--device", default="cuda:0", type=str)
-        parser.add_argument("--seed", default=0, type=int)
-        parser.add_argument("--steps", default=25, type=int)
-        return parser.parse_args()
-    args = get_args()
-    text2image_model = Text2Image(device=args.device)
-    rgb_img = text2image_model(args.text_prompt, seed=args.seed, steps=args.steps)
-    rgb_img.save(args.output_img_path)

infer/.ipynb_checkpoints/utils-checkpoint.py DELETED Viewed

@@ -1,87 +0,0 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os
-import time
-import random
-import numpy as np
-import torch
-from torch.cuda.amp import autocast, GradScaler
-from functools import wraps
-def seed_everything(seed):
-    '''
-        seed everthing
-    '''
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    os.environ["PL_GLOBAL_SEED"] = str(seed)
-def timing_decorator(category: str):
-    '''
-        timing_decorator: record time
-    '''
-    def decorator(func):
-        func.call_count = 0
-        @wraps(func)
-        def wrapper(*args, **kwargs):
-            start_time = time.time()
-            result = func(*args, **kwargs)
-            end_time = time.time()
-            elapsed_time = end_time - start_time
-            func.call_count += 1
-            print(f"[HunYuan3D]-[{category}], cost time: {elapsed_time:.4f}s") # huiwen
-            return result
-        return wrapper
-    return decorator
-def auto_amp_inference(func):
-    '''
-        with torch.cuda.amp.autocast()"
-            xxx
-    '''
-    @wraps(func)
-    def wrapper(*args, **kwargs):
-        with autocast():
-            output = func(*args, **kwargs)
-        return output
-    return wrapper
-def get_parameter_number(model):
-    total_num = sum(p.numel() for p in model.parameters())
-    trainable_num = sum(p.numel() for p in model.parameters() if p.requires_grad)
-    return {'Total': total_num, 'Trainable': trainable_num}
-def set_parameter_grad_false(model):
-    for p in model.parameters():
-        p.requires_grad = False
-def str_to_bool(s):
-    if s.lower() in ['true', 't', 'yes', 'y', '1']:
-        return True
-    elif s.lower() in ['false', 'f', 'no', 'n', '0']:
-        return False
-    else:
-        raise f"bool arg must one of ['true', 't', 'yes', 'y', '1', 'false', 'f', 'no', 'n', '0']"

infer/.ipynb_checkpoints/views_to_mesh-checkpoint.py DELETED Viewed

@@ -1,154 +0,0 @@
-# Open Source Model Licensed under the Apache License Version 2.0
-# and Other Licenses of the Third-Party Components therein:
-# The below Model in this distribution may have been modified by THL A29 Limited
-# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited.
-# Copyright (C) 2024 THL A29 Limited, a Tencent company.  All rights reserved.
-# The below software and/or models in this distribution may have been
-# modified by THL A29 Limited ("Tencent Modifications").
-# All Tencent Modifications are Copyright (C) THL A29 Limited.
-# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT
-# except for the third-party components listed below.
-# Hunyuan 3D does not impose any additional limitations beyond what is outlined
-# in the repsective licenses of these third-party components.
-# Users must comply with all terms and conditions of original licenses of these third-party
-# components and must ensure that the usage of the third party components adheres to
-# all relevant laws and regulations.
-# For avoidance of doubts, Hunyuan 3D means the large language models and
-# their software and algorithms, including trained model weights, parameters (including
-# optimizer states), machine-learning model code, inference-enabling code, training-enabling code,
-# fine-tuning enabling code and other elements of the foregoing made publicly available
-# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
-import os, sys
-sys.path.insert(0, f"{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}")
-import time
-import torch
-import random
-import numpy as np
-from PIL import Image
-from einops import rearrange
-from PIL import Image, ImageSequence
-from infer.utils import seed_everything, timing_decorator, auto_amp_inference
-from infer.utils import get_parameter_number, set_parameter_grad_false, str_to_bool
-from svrm.predictor import MV23DPredictor
-class Views2Mesh():
-    def __init__(self, mv23d_cfg_path, mv23d_ckt_path,
-                 device="cuda:0", use_lite=False, save_memory=False):
-        '''
-            mv23d_cfg_path: config yaml file
-            mv23d_ckt_path: path to ckpt
-            use_lite: lite version
-            save_memory: cpu auto
-        '''
-        self.mv23d_predictor = MV23DPredictor(mv23d_ckt_path, mv23d_cfg_path, device=device)
-        self.mv23d_predictor.model.eval()
-        self.order = [0, 1, 2, 3, 4, 5] if use_lite else [0, 2, 4, 5, 3, 1]
-        self.device = device
-        self.save_memory = save_memory
-        set_parameter_grad_false(self.mv23d_predictor.model)
-        print('view2mesh model', get_parameter_number(self.mv23d_predictor.model))
-    @torch.no_grad()
-    @timing_decorator("views to mesh")
-    @auto_amp_inference
-    def __call__(self, *args, **kwargs):
-        if self.save_memory:
-            self.mv23d_predictor.model = self.mv23d_predictor.model.to(self.device)
-            torch.cuda.empty_cache()
-            res = self.call(*args, **kwargs)
-            self.mv23d_predictor.model = self.mv23d_predictor.model.to("cpu")
-        else:
-            res = self.call(*args, **kwargs)
-        torch.cuda.empty_cache()
-        return res
-    def call(
-        self,
-        views_pil=None,
-        cond_pil=None,
-        gif_pil=None,
-        seed=0,
-        target_face_count = 10000,
-        do_texture_mapping = True,
-        save_folder='./outputs/test'
-    ):
-        '''
-            can set views_pil, cond_pil simutaously or set gif_pil only
-            seed: int
-            target_face_count: int
-            save_folder: path to save mesh files
-        '''
-        save_dir = save_folder
-        os.makedirs(save_dir, exist_ok=True)
-        if views_pil is not None and cond_pil is not None:
-            show_image = rearrange(np.asarray(views_pil, dtype=np.uint8),
-                                   '(n h) (m w) c -> (n m) h w c', n=3, m=2)
-            views = [Image.fromarray(show_image[idx]) for idx in self.order]
-            image_list = [cond_pil]+ views
-            image_list = [img.convert('RGB') for img in image_list]
-        elif gif_pil is not None:
-            image_list = [img.convert('RGB') for img in ImageSequence.Iterator(gif_pil)]
-        image_input = image_list[0]
-        image_list = image_list[1:] + image_list[:1]
-        seed_everything(seed)
-        self.mv23d_predictor.predict(
-            image_list,
-            save_dir = save_dir,
-            image_input = image_input,
-            target_face_count = target_face_count,
-            do_texture_mapping = do_texture_mapping
-        )
-        torch.cuda.empty_cache()
-        return save_dir
-if __name__ == "__main__":
-    import argparse
-    def get_args():
-        parser = argparse.ArgumentParser()
-        parser.add_argument("--views_path", type=str, required=True)
-        parser.add_argument("--cond_path", type=str, required=True)
-        parser.add_argument("--save_folder", default="./outputs/test/", type=str)
-        parser.add_argument("--mv23d_cfg_path", default="./svrm/configs/svrm.yaml", type=str)
-        parser.add_argument("--mv23d_ckt_path", default="weights/svrm/svrm.safetensors", type=str)
-        parser.add_argument("--max_faces_num", default=90000, type=int,
-            help="max num of face, suggest 90000 for effect, 10000 for speed")
-        parser.add_argument("--device", default="cuda:0", type=str)
-        parser.add_argument("--use_lite", default='false', type=str)
-        parser.add_argument("--do_texture_mapping", default='false', type=str)
-        return parser.parse_args()
-    args = get_args()
-    args.use_lite = str_to_bool(args.use_lite)
-    args.do_texture_mapping = str_to_bool(args.do_texture_mapping)
-    views = Image.open(args.views_path)
-    cond = Image.open(args.cond_path)
-    views_to_mesh_model = Views2Mesh(
-        args.mv23d_cfg_path,
-        args.mv23d_ckt_path,
-        device = args.device,
-        use_lite = args.use_lite
-    )
-    views_to_mesh_model(
-        views,  cond,  0,
-        target_face_count = args.max_faces_num,
-        save_folder = args.save_folder,
-        do_texture_mapping = args.do_texture_mapping
-    )