gwang-kim's picture
u
f12ab4c
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
import numpy as np
import os
import sys
import pathlib
import util
import tensorflow as tf
sys.path.insert(0, os.path.join(sys.path[0], '../..')) # for nvdiffrast
import nvdiffrast.tensorflow as dr
#----------------------------------------------------------------------------
# Cube shape fitter.
#----------------------------------------------------------------------------
def fit_cube(max_iter = 5000,
resolution = 4,
discontinuous = False,
repeats = 1,
log_interval = 10,
display_interval = None,
display_res = 512,
out_dir = '.',
log_fn = None,
imgsave_interval = None,
imgsave_fn = None):
if out_dir:
os.makedirs(out_dir, exist_ok=True)
datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
fn = 'cube_%s.npz' % ('d' if discontinuous else 'c')
with np.load(f'{datadir}/{fn}') as f:
pos_idx, vtxp, col_idx, vtxc = f.values()
print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], vtxp.shape[0]))
# Transformation matrix input to TF graph.
mtx_in = tf.placeholder(tf.float32, [4, 4])
# Setup TF graph for reference.
vtxw = np.concatenate([vtxp, np.ones([vtxp.shape[0], 1])], axis=1).astype(np.float32)
pos_clip = tf.matmul(vtxw, mtx_in, transpose_b=True)[tf.newaxis, ...]
rast_out, _ = dr.rasterize(pos_clip, pos_idx, resolution=[resolution, resolution], output_db=False)
color, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out, col_idx)
color = dr.antialias(color, rast_out, pos_clip, pos_idx)
# Optimized variables.
vtxc_opt = tf.get_variable('vtxc', initializer=tf.zeros_initializer(), shape=vtxc.shape)
vtxp_opt = tf.get_variable('vtxp', initializer=tf.zeros_initializer(), shape=vtxp.shape)
# Optimization variable setters for initialization.
vtxc_opt_in = tf.placeholder(tf.float32, vtxc.shape)
vtxp_opt_in = tf.placeholder(tf.float32, vtxp.shape)
opt_set = tf.group(tf.assign(vtxc_opt, vtxc_opt_in), tf.assign(vtxp_opt, vtxp_opt_in))
# Setup TF graph for what we optimize result.
vtxw_opt = tf.concat([vtxp_opt, tf.ones([vtxp.shape[0], 1], tf.float32)], axis=1)
pos_clip_opt = tf.matmul(vtxw_opt, mtx_in, transpose_b=True)[tf.newaxis, ...]
rast_out_opt, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[resolution, resolution], output_db=False)
color_opt, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_opt, col_idx)
color_opt = dr.antialias(color_opt, rast_out_opt, pos_clip_opt, pos_idx)
# Image-space loss and optimizer.
loss = tf.reduce_mean((color_opt - color)**2)
lr_in = tf.placeholder(tf.float32, [])
train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.999).minimize(loss, var_list=[vtxp_opt, vtxc_opt])
# Setup TF graph for display.
rast_out_disp, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[display_res, display_res], output_db=False)
color_disp, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_disp, col_idx)
color_disp = dr.antialias(color_disp, rast_out_disp, pos_clip_opt, pos_idx)
rast_out_disp_ref, _ = dr.rasterize(pos_clip, pos_idx, resolution=[display_res, display_res], output_db=False)
color_disp_ref, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out_disp_ref, col_idx)
color_disp_ref = dr.antialias(color_disp_ref, rast_out_disp_ref, pos_clip, pos_idx)
# Geometric error calculation
geom_loss = tf.reduce_mean(tf.reduce_sum((tf.abs(vtxp_opt) - .5)**2, axis=1)**0.5)
# Open log file.
log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None
# Repeats.
for rep in range(repeats):
# Optimize.
ang = 0.0
gl_avg = []
util.init_uninitialized_vars()
for it in range(max_iter + 1):
# Initialize optimization.
if it == 0:
vtxp_init = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp
vtxc_init = np.random.uniform(0.0, 1.0, size=vtxc.shape)
util.run(opt_set, {vtxc_opt_in: vtxc_init.astype(np.float32), vtxp_opt_in: vtxp_init.astype(np.float32)})
# Learning rate ramp.
lr = 1e-2
lr = lr * max(0.01, 10**(-it*0.0005))
# Random rotation/translation matrix for optimization.
r_rot = util.random_rotation_translation(0.25)
# Smooth rotation for display.
a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
# Modelview and modelview + projection matrices.
proj = util.projection(x=0.4)
r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot)
r_mvp = np.matmul(proj, r_mv).astype(np.float32)
a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
a_mvp = np.matmul(proj, a_mv).astype(np.float32)
# Run training and measure geometric error.
gl_val, _ = util.run([geom_loss, train_op], {mtx_in: r_mvp, lr_in: lr})
gl_avg.append(gl_val)
# Print/save log.
if log_interval and (it % log_interval == 0):
gl_val, gl_avg = np.mean(np.asarray(gl_avg)), []
s = ("rep=%d," % rep) if repeats > 1 else ""
s += "iter=%d,err=%f" % (it, gl_val)
print(s)
if log_file:
log_file.write(s + "\n")
# Show/save image.
display_image = display_interval and (it % display_interval == 0)
save_image = imgsave_interval and (it % imgsave_interval == 0)
if display_image or save_image:
ang = ang + 0.1
img_o = util.run(color_opt, {mtx_in: r_mvp})[0]
img_b = util.run(color, {mtx_in: r_mvp})[0]
img_d = util.run(color_disp, {mtx_in: a_mvp})[0]
img_r = util.run(color_disp_ref, {mtx_in: a_mvp})[0]
scl = display_res // img_o.shape[0]
img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1)
img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1)
result_image = np.concatenate([img_o, img_b, img_d, img_r], axis=1)
if display_image:
util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
if save_image:
util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)
# All repeats done.
if log_file:
log_file.close()
#----------------------------------------------------------------------------
# Main function.
#----------------------------------------------------------------------------
def main():
display_interval = 0
discontinuous = False
resolution = 0
def usage():
print("Usage: python cube.py [-v] [-discontinuous] resolution")
exit()
for a in sys.argv[1:]:
if a == '-v':
display_interval = 100
elif a == '-discontinuous':
discontinuous = True
elif a.isdecimal():
resolution = int(a)
else:
usage()
if resolution <= 0:
usage()
# Initialize TensorFlow.
util.init_tf()
# Run.
out_dir = 'out/cube_%s_%d' % (('d' if discontinuous else 'c'), resolution)
fit_cube(max_iter=5000, resolution=resolution, discontinuous=discontinuous, log_interval=10, display_interval=display_interval, out_dir=out_dir, log_fn='log.txt', imgsave_interval=1000, imgsave_fn='img_%06d.png')
# Done.
print("Done.")
#----------------------------------------------------------------------------
if __name__ == "__main__":
main()
#----------------------------------------------------------------------------