Alesteba commited on
Commit
6591782
·
1 Parent(s): 15a1066

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +274 -0
app.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import tensorflow as tf
3
+ import numpy as np
4
+
5
+ # Setting random seed to obtain reproducible results.
6
+ tf.random.set_seed(42)
7
+
8
+ # Initialize global variables.
9
+ AUTO = tf.data.AUTOTUNE
10
+ BATCH_SIZE = 1
11
+ NUM_SAMPLES = 32
12
+ POS_ENCODE_DIMS = 16
13
+ EPOCHS = 20
14
+ H = 50
15
+ W = 29
16
+ focal = 138.88
17
+
18
+ def encode_position(x):
19
+ """Encodes the position into its corresponding Fourier feature.
20
+ Args:
21
+ x: The input coordinate.
22
+ Returns:
23
+ Fourier features tensors of the position.
24
+ """
25
+ positions = [x]
26
+ for i in range(POS_ENCODE_DIMS):
27
+ for fn in [tf.sin, tf.cos]:
28
+ positions.append(fn(2.0 ** i * x))
29
+ return tf.concat(positions, axis=-1)
30
+
31
+
32
+ def get_rays(height, width, focal, pose):
33
+ """Computes origin point and direction vector of rays.
34
+ Args:
35
+ height: Height of the image.
36
+ width: Width of the image.
37
+ focal: The focal length between the images and the camera.
38
+ pose: The pose matrix of the camera.
39
+ Returns:
40
+ Tuple of origin point and direction vector for rays.
41
+ """
42
+ # Build a meshgrid for the rays.
43
+ i, j = tf.meshgrid(
44
+ tf.range(width, dtype=tf.float32),
45
+ tf.range(height, dtype=tf.float32),
46
+ indexing="xy",
47
+ )
48
+
49
+ # Normalize the x axis coordinates.
50
+ transformed_i = (i - width * 0.5) / focal
51
+
52
+ # Normalize the y axis coordinates.
53
+ transformed_j = (j - height * 0.5) / focal
54
+
55
+ # Create the direction unit vectors.
56
+ directions = tf.stack([transformed_i, -transformed_j, -tf.ones_like(i)], axis=-1)
57
+
58
+ # Get the camera matrix.
59
+ camera_matrix = pose[:3, :3]
60
+ height_width_focal = pose[:3, -1]
61
+
62
+ # Get origins and directions for the rays.
63
+ transformed_dirs = directions[..., None, :]
64
+ camera_dirs = transformed_dirs * camera_matrix
65
+ ray_directions = tf.reduce_sum(camera_dirs, axis=-1)
66
+ ray_origins = tf.broadcast_to(height_width_focal, tf.shape(ray_directions))
67
+
68
+ # Return the origins and directions.
69
+ return (ray_origins, ray_directions)
70
+
71
+
72
+ def render_flat_rays(ray_origins, ray_directions, near, far, num_samples, rand=False):
73
+ """Renders the rays and flattens it.
74
+ Args:
75
+ ray_origins: The origin points for rays.
76
+ ray_directions: The direction unit vectors for the rays.
77
+ near: The near bound of the volumetric scene.
78
+ far: The far bound of the volumetric scene.
79
+ num_samples: Number of sample points in a ray.
80
+ rand: Choice for randomising the sampling strategy.
81
+ Returns:
82
+ Tuple of flattened rays and sample points on each rays.
83
+ """
84
+ # Compute 3D query points.
85
+ # Equation: r(t) = o+td -> Building the "t" here.
86
+ t_vals = tf.linspace(near, far, num_samples)
87
+ if rand:
88
+ # Inject uniform noise into sample space to make the sampling
89
+ # continuous.
90
+ shape = list(ray_origins.shape[:-1]) + [num_samples]
91
+ noise = tf.random.uniform(shape=shape) * (far - near) / num_samples
92
+ t_vals = t_vals + noise
93
+
94
+ # Equation: r(t) = o + td -> Building the "r" here.
95
+ rays = ray_origins[..., None, :] + (
96
+ ray_directions[..., None, :] * t_vals[..., None]
97
+ )
98
+ rays_flat = tf.reshape(rays, [-1, 3])
99
+ rays_flat = encode_position(rays_flat)
100
+ return (rays_flat, t_vals)
101
+
102
+
103
+ def map_fn(pose):
104
+ """Maps individual pose to flattened rays and sample points.
105
+ Args:
106
+ pose: The pose matrix of the camera.
107
+ Returns:
108
+ Tuple of flattened rays and sample points corresponding to the
109
+ camera pose.
110
+ """
111
+ (ray_origins, ray_directions) = get_rays(height=H, width=W, focal=focal, pose=pose)
112
+ (rays_flat, t_vals) = render_flat_rays(
113
+ ray_origins=ray_origins,
114
+ ray_directions=ray_directions,
115
+ near=2.0,
116
+ far=6.0,
117
+ num_samples=NUM_SAMPLES,
118
+ rand=True,
119
+ )
120
+ return (rays_flat, t_vals)
121
+
122
+
123
+ def render_rgb_depth(model, rays_flat, t_vals, rand=True, train=True):
124
+ """Generates the RGB image and depth map from model prediction.
125
+ Args:
126
+ model: The MLP model that is trained to predict the rgb and
127
+ volume density of the volumetric scene.
128
+ rays_flat: The flattened rays that serve as the input to
129
+ the NeRF model.
130
+ t_vals: The sample points for the rays.
131
+ rand: Choice to randomise the sampling strategy.
132
+ train: Whether the model is in the training or testing phase.
133
+ Returns:
134
+ Tuple of rgb image and depth map.
135
+ """
136
+ # Get the predictions from the nerf model and reshape it.
137
+ if train:
138
+ predictions = model(rays_flat)
139
+ else:
140
+ predictions = model.predict(rays_flat)
141
+ predictions = tf.reshape(predictions, shape=(BATCH_SIZE, H, W, NUM_SAMPLES, 4))
142
+
143
+ # Slice the predictions into rgb and sigma.
144
+ rgb = tf.sigmoid(predictions[..., :-1])
145
+ sigma_a = tf.nn.relu(predictions[..., -1])
146
+
147
+ # Get the distance of adjacent intervals.
148
+ delta = t_vals[..., 1:] - t_vals[..., :-1]
149
+ # delta shape = (num_samples)
150
+ if rand:
151
+ delta = tf.concat(
152
+ [delta, tf.broadcast_to([1e10], shape=(BATCH_SIZE, H, W, 1))], axis=-1
153
+ )
154
+ alpha = 1.0 - tf.exp(-sigma_a * delta)
155
+ else:
156
+ delta = tf.concat(
157
+ [delta, tf.broadcast_to([1e10], shape=(BATCH_SIZE, 1))], axis=-1
158
+ )
159
+ alpha = 1.0 - tf.exp(-sigma_a * delta[:, None, None, :])
160
+
161
+ # Get transmittance.
162
+ exp_term = 1.0 - alpha
163
+ epsilon = 1e-10
164
+ transmittance = tf.math.cumprod(exp_term + epsilon, axis=-1, exclusive=True)
165
+ weights = alpha * transmittance
166
+ rgb = tf.reduce_sum(weights[..., None] * rgb, axis=-2)
167
+
168
+ if rand:
169
+ depth_map = tf.reduce_sum(weights * t_vals, axis=-1)
170
+ else:
171
+ depth_map = tf.reduce_sum(weights * t_vals[:, None, None], axis=-1)
172
+ return (rgb, depth_map)
173
+
174
+
175
+ def get_translation_t(t):
176
+ """Get the translation matrix for movement in t."""
177
+ matrix = [
178
+ [1, 0, 0, 0],
179
+ [0, 1, 0, 0],
180
+ [0, 0, 1, t],
181
+ [0, 0, 0, 1],
182
+ ]
183
+ return tf.convert_to_tensor(matrix, dtype=tf.float32)
184
+
185
+
186
+ def get_rotation_phi(phi):
187
+ """Get the rotation matrix for movement in phi."""
188
+ matrix = [
189
+ [1, 0, 0, 0],
190
+ [0, tf.cos(phi), -tf.sin(phi), 0],
191
+ [0, tf.sin(phi), tf.cos(phi), 0],
192
+ [0, 0, 0, 1],
193
+ ]
194
+ return tf.convert_to_tensor(matrix, dtype=tf.float32)
195
+
196
+
197
+ def get_rotation_theta(theta):
198
+ """Get the rotation matrix for movement in theta."""
199
+ matrix = [
200
+ [tf.cos(theta), 0, -tf.sin(theta), 0],
201
+ [0, 1, 0, 0],
202
+ [tf.sin(theta), 0, tf.cos(theta), 0],
203
+ [0, 0, 0, 1],
204
+ ]
205
+ return tf.convert_to_tensor(matrix, dtype=tf.float32)
206
+
207
+
208
+ def pose_spherical(theta, phi, t):
209
+ """
210
+ Get the camera to world matrix for the corresponding theta, phi
211
+ and t.
212
+ """
213
+ c2w = get_translation_t(t)
214
+ c2w = get_rotation_phi(phi / 180.0 * np.pi) @ c2w
215
+ c2w = get_rotation_theta(theta / 180.0 * np.pi) @ c2w
216
+ c2w = np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) @ c2w
217
+ return c2w
218
+
219
+
220
+ def show_rendered_image(r,theta,phi):
221
+ # Get the camera to world matrix.
222
+ c2w = pose_spherical(theta, phi, r)
223
+
224
+ ray_oris, ray_dirs = get_rays(H, W, focal, c2w)
225
+ rays_flat, t_vals = render_flat_rays(
226
+ ray_oris, ray_dirs, near=2.0, far=6.0, num_samples=NUM_SAMPLES, rand=False
227
+ )
228
+
229
+ rgb, depth = render_rgb_depth(
230
+ nerf_loaded, rays_flat[None, ...], t_vals[None, ...], rand=False, train=False
231
+ )
232
+ return(rgb[0], depth[0])
233
+
234
+
235
+ # app.py text matter starts here
236
+ st.title('NeRF:3D volumetric rendering with NeRF')
237
+ st.markdown("Authors: [Aritra Roy Gosthipathy](https://twitter.com/ariG23498) and [Ritwik Raha](https://twitter.com/ritwik_raha)")
238
+ st.markdown("## Description")
239
+ st.markdown("[NeRF](https://arxiv.org/abs/2003.08934) proposes an ingenious way to synthesize novel views of a scene by modelling the volumetric scene function through a neural network.")
240
+ st.markdown("## Interactive Demo")
241
+
242
+ # download the model:
243
+
244
+ from huggingface_hub import snapshot_download
245
+ snapshot_download(repo_id="Alesteba/your-model-name", local_dir="./nerf")
246
+
247
+ # load the pre-trained model
248
+ nerf_loaded = tf.keras.models.load_model("nerf", compile=False)
249
+
250
+ # set the values of r theta phi
251
+ r = 4.0
252
+ theta = st.slider("Enter a value for Θ:", min_value=0.0, max_value=360.0)
253
+ phi = -30.0
254
+ color, depth = show_rendered_image(r, theta, phi)
255
+
256
+ col1, col2= st.columns(2)
257
+
258
+ with col1:
259
+ color = tf.keras.utils.array_to_img(color)
260
+ st.image(color, caption="Color Image", clamp=True, width=300)
261
+
262
+ with col2:
263
+ depth = tf.keras.utils.array_to_img(depth[..., None])
264
+ st.image(depth, caption="Depth Map", clamp=True, width=300)
265
+
266
+ st.markdown("## Tutorials")
267
+ st.markdown("- [Keras](https://keras.io/examples/vision/nerf/)")
268
+ st.markdown("- [PyImageSearch NeRF 1](https://www.pyimagesearch.com/2021/11/10/computer-graphics-and-deep-learning-with-nerf-using-tensorflow-and-keras-part-1/)")
269
+ st.markdown("- [PyImageSearch NeRF 2](https://www.pyimagesearch.com/2021/11/17/computer-graphics-and-deep-learning-with-nerf-using-tensorflow-and-keras-part-2/)")
270
+ st.markdown("- [PyImageSearch NeRF 3](https://www.pyimagesearch.com/2021/11/24/computer-graphics-and-deep-learning-with-nerf-using-tensorflow-and-keras-part-3/)")
271
+
272
+ st.markdown("## Credits")
273
+ st.markdown("- [PyImageSearch](https://www.pyimagesearch.com/)")
274
+ st.markdown("- [JarvisLabs.ai GPU credits](https://jarvislabs.ai/)")