File size: 7,873 Bytes
904ef7d 6f790c0 f6e1b58 904ef7d 6f790c0 904ef7d f6e1b58 904ef7d 6f790c0 f6e1b58 904ef7d f6e1b58 6f790c0 904ef7d f6e1b58 904ef7d 6f790c0 904ef7d 9e50a29 904ef7d 6f790c0 904ef7d 6f790c0 904ef7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import torch
import argparse
from nerf.provider import NeRFDataset
from nerf.utils import *
from optimizer import Shampoo
from nerf.gui import NeRFGUI
# torch.autograd.set_detect_anomaly(True)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--text', default=None, help="text prompt")
parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --dir_text")
parser.add_argument('-O2', action='store_true', help="equals --fp16 --dir_text")
parser.add_argument('--test', action='store_true', help="test mode")
parser.add_argument('--workspace', type=str, default='workspace')
parser.add_argument('--guidance', type=str, default='stable-diffusion', help='choose from [stable-diffusion, clip]')
parser.add_argument('--seed', type=int, default=0)
### training options
parser.add_argument('--iters', type=int, default=15000, help="training iters")
parser.add_argument('--lr', type=float, default=1e-3, help="initial learning rate")
parser.add_argument('--ckpt', type=str, default='latest')
parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
parser.add_argument('--max_steps', type=int, default=1024, help="max num steps sampled per ray (only valid when using --cuda_ray)")
parser.add_argument('--num_steps', type=int, default=256, help="num steps sampled per ray (only valid when not using --cuda_ray)")
parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
parser.add_argument('--albedo_iters', type=int, default=15000, help="training iters that only use albedo shading")
# model options
parser.add_argument('--bg_radius', type=float, default=1.4, help="if positive, use a background model at sphere(bg_radius)")
parser.add_argument('--density_thresh', type=float, default=10, help="threshold for density grid to be occupied")
# network backbone
parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
parser.add_argument('--backbone', type=str, default='grid', help="nerf backbone, choose from [grid, tcnn, vanilla]")
# rendering resolution in training
parser.add_argument('--w', type=int, default=128, help="render width for NeRF in training")
parser.add_argument('--h', type=int, default=128, help="render height for NeRF in training")
### dataset options
parser.add_argument('--bound', type=float, default=1, help="assume the scene is bounded in box(-bound, bound)")
parser.add_argument('--dt_gamma', type=float, default=0, help="dt_gamma (>=0) for adaptive ray marching. set to 0 to disable, >0 to accelerate rendering (but usually with worse quality)")
parser.add_argument('--min_near', type=float, default=0.1, help="minimum near distance for camera")
parser.add_argument('--radius_range', type=float, nargs='*', default=[1.0, 1.5], help="training camera radius range")
parser.add_argument('--fovy_range', type=float, nargs='*', default=[40, 70], help="training camera fovy range")
parser.add_argument('--dir_text', action='store_true', help="direction-encode the text prompt, by appending front/side/back/overhead view")
parser.add_argument('--angle_overhead', type=float, default=30, help="[0, angle_overhead] is the overhead region")
parser.add_argument('--angle_front', type=float, default=30, help="[0, angle_front] is the front region, [180, 180+angle_front] the back region, otherwise the side region.")
parser.add_argument('--lambda_entropy', type=float, default=1e-4, help="loss scale for alpha entropy")
parser.add_argument('--lambda_orient', type=float, default=1e-2, help="loss scale for orientation")
### GUI options
parser.add_argument('--gui', action='store_true', help="start a GUI")
parser.add_argument('--W', type=int, default=800, help="GUI width")
parser.add_argument('--H', type=int, default=800, help="GUI height")
parser.add_argument('--radius', type=float, default=3, help="default GUI camera radius from center")
parser.add_argument('--fovy', type=float, default=60, help="default GUI camera fovy")
parser.add_argument('--light_theta', type=float, default=60, help="default GUI light direction in [0, 180], corresponding to elevation [90, -90]")
parser.add_argument('--light_phi', type=float, default=0, help="default GUI light direction in [0, 360), azimuth")
parser.add_argument('--max_spp', type=int, default=1, help="GUI rendering max sample per pixel")
opt = parser.parse_args()
if opt.O:
opt.fp16 = True
opt.cuda_ray = True
opt.dir_text = True
elif opt.O2:
opt.fp16 = True
opt.dir_text = True
if opt.backbone == 'vanilla':
from nerf.network import NeRFNetwork
elif opt.backbone == 'tcnn':
from nerf.network_tcnn import NeRFNetwork
elif opt.backbone == 'grid':
from nerf.network_grid import NeRFNetwork
else:
raise NotImplementedError(f'--backbone {opt.backbone} is not implemented!')
print(opt)
seed_everything(opt.seed)
model = NeRFNetwork(opt)
print(model)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if opt.test:
guidance = None # no need to load guidance model at test
trainer = Trainer('ngp', opt, model, guidance, device=device, workspace=opt.workspace, fp16=opt.fp16, use_checkpoint=opt.ckpt)
if opt.gui:
gui = NeRFGUI(opt, trainer)
gui.render()
else:
test_loader = NeRFDataset(opt, device=device, type='test', H=opt.H, W=opt.W, size=100).dataloader()
trainer.test(test_loader)
# trainer.save_mesh(resolution=256)
else:
if opt.guidance == 'stable-diffusion':
from nerf.sd import StableDiffusion
guidance = StableDiffusion(device)
elif opt.guidance == 'clip':
from nerf.clip import CLIP
guidance = CLIP(device)
else:
raise NotImplementedError(f'--guidance {opt.guidance} is not implemented.')
optimizer = lambda model: torch.optim.Adam(model.get_params(opt.lr), betas=(0.9, 0.99), eps=1e-15)
# optimizer = lambda model: Shampoo(model.get_params(opt.lr))
train_loader = NeRFDataset(opt, device=device, type='train', H=opt.h, W=opt.w, size=100).dataloader()
# decay to 0.01 * init_lr at last iter step
scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(optimizer, lambda iter: 0.01 ** min(iter / opt.iters, 1))
trainer = Trainer('ngp', opt, model, guidance, device=device, workspace=opt.workspace, optimizer=optimizer, ema_decay=0.95, fp16=opt.fp16, lr_scheduler=scheduler, use_checkpoint=opt.ckpt, eval_interval=1)
if opt.gui:
trainer.train_loader = train_loader # attach dataloader to trainer
gui = NeRFGUI(opt, trainer)
gui.render()
else:
valid_loader = NeRFDataset(opt, device=device, type='val', H=opt.H, W=opt.W, size=5).dataloader()
max_epoch = np.ceil(opt.iters / len(train_loader)).astype(np.int32)
trainer.train(train_loader, valid_loader, max_epoch)
# also test
test_loader = NeRFDataset(opt, device=device, type='test', H=opt.H, W=opt.W, size=100).dataloader()
trainer.test(test_loader)
trainer.save_mesh(resolution=256) |