Spaces:

Alesteba
/

NeRF-ficus-pxl

Runtime error

App Files Files Community

Alesteba commited on Apr 2, 2023

Commit

6591782

1 Parent(s): 15a1066

Create app.py

Browse files

Files changed (1) hide show

app.py +274 -0

app.py ADDED Viewed

	@@ -0,0 +1,274 @@

+import streamlit as st
+import tensorflow as tf
+import numpy as np
+# Setting random seed to obtain reproducible results.
+tf.random.set_seed(42)
+# Initialize global variables.
+AUTO = tf.data.AUTOTUNE
+BATCH_SIZE = 1
+NUM_SAMPLES = 32
+POS_ENCODE_DIMS = 16
+EPOCHS = 20
+H = 50
+W = 29
+focal = 138.88
+def encode_position(x):
+    """Encodes the position into its corresponding Fourier feature.
+    Args:
+        x: The input coordinate.
+    Returns:
+        Fourier features tensors of the position.
+    """
+    positions = [x]
+    for i in range(POS_ENCODE_DIMS):
+        for fn in [tf.sin, tf.cos]:
+            positions.append(fn(2.0 ** i * x))
+    return tf.concat(positions, axis=-1)
+def get_rays(height, width, focal, pose):
+    """Computes origin point and direction vector of rays.
+    Args:
+        height: Height of the image.
+        width: Width of the image.
+        focal: The focal length between the images and the camera.
+        pose: The pose matrix of the camera.
+    Returns:
+        Tuple of origin point and direction vector for rays.
+    """
+    # Build a meshgrid for the rays.
+    i, j = tf.meshgrid(
+        tf.range(width, dtype=tf.float32),
+        tf.range(height, dtype=tf.float32),
+        indexing="xy",
+    )
+    # Normalize the x axis coordinates.
+    transformed_i = (i - width * 0.5) / focal
+    # Normalize the y axis coordinates.
+    transformed_j = (j - height * 0.5) / focal
+    # Create the direction unit vectors.
+    directions = tf.stack([transformed_i, -transformed_j, -tf.ones_like(i)], axis=-1)
+    # Get the camera matrix.
+    camera_matrix = pose[:3, :3]
+    height_width_focal = pose[:3, -1]
+    # Get origins and directions for the rays.
+    transformed_dirs = directions[..., None, :]
+    camera_dirs = transformed_dirs * camera_matrix
+    ray_directions = tf.reduce_sum(camera_dirs, axis=-1)
+    ray_origins = tf.broadcast_to(height_width_focal, tf.shape(ray_directions))
+    # Return the origins and directions.
+    return (ray_origins, ray_directions)
+def render_flat_rays(ray_origins, ray_directions, near, far, num_samples, rand=False):
+    """Renders the rays and flattens it.
+    Args:
+        ray_origins: The origin points for rays.
+        ray_directions: The direction unit vectors for the rays.
+        near: The near bound of the volumetric scene.
+        far: The far bound of the volumetric scene.
+        num_samples: Number of sample points in a ray.
+        rand: Choice for randomising the sampling strategy.
+    Returns:
+       Tuple of flattened rays and sample points on each rays.
+    """
+    # Compute 3D query points.
+    # Equation: r(t) = o+td -> Building the "t" here.
+    t_vals = tf.linspace(near, far, num_samples)
+    if rand:
+        # Inject uniform noise into sample space to make the sampling
+        # continuous.
+        shape = list(ray_origins.shape[:-1]) + [num_samples]
+        noise = tf.random.uniform(shape=shape) * (far - near) / num_samples
+        t_vals = t_vals + noise
+    # Equation: r(t) = o + td -> Building the "r" here.
+    rays = ray_origins[..., None, :] + (
+        ray_directions[..., None, :] * t_vals[..., None]
+    )
+    rays_flat = tf.reshape(rays, [-1, 3])
+    rays_flat = encode_position(rays_flat)
+    return (rays_flat, t_vals)
+def map_fn(pose):
+    """Maps individual pose to flattened rays and sample points.
+    Args:
+        pose: The pose matrix of the camera.
+    Returns:
+        Tuple of flattened rays and sample points corresponding to the
+        camera pose.
+    """
+    (ray_origins, ray_directions) = get_rays(height=H, width=W, focal=focal, pose=pose)
+    (rays_flat, t_vals) = render_flat_rays(
+        ray_origins=ray_origins,
+        ray_directions=ray_directions,
+        near=2.0,
+        far=6.0,
+        num_samples=NUM_SAMPLES,
+        rand=True,
+    )
+    return (rays_flat, t_vals)
+def render_rgb_depth(model, rays_flat, t_vals, rand=True, train=True):
+    """Generates the RGB image and depth map from model prediction.
+    Args:
+        model: The MLP model that is trained to predict the rgb and
+            volume density of the volumetric scene.
+        rays_flat: The flattened rays that serve as the input to
+            the NeRF model.
+        t_vals: The sample points for the rays.
+        rand: Choice to randomise the sampling strategy.
+        train: Whether the model is in the training or testing phase.
+    Returns:
+        Tuple of rgb image and depth map.
+    """
+    # Get the predictions from the nerf model and reshape it.
+    if train:
+        predictions = model(rays_flat)
+    else:
+        predictions = model.predict(rays_flat)
+    predictions = tf.reshape(predictions, shape=(BATCH_SIZE, H, W, NUM_SAMPLES, 4))
+    # Slice the predictions into rgb and sigma.
+    rgb = tf.sigmoid(predictions[..., :-1])
+    sigma_a = tf.nn.relu(predictions[..., -1])
+    # Get the distance of adjacent intervals.
+    delta = t_vals[..., 1:] - t_vals[..., :-1]
+    # delta shape = (num_samples)
+    if rand:
+        delta = tf.concat(
+            [delta, tf.broadcast_to([1e10], shape=(BATCH_SIZE, H, W, 1))], axis=-1
+        )
+        alpha = 1.0 - tf.exp(-sigma_a * delta)
+    else:
+        delta = tf.concat(
+            [delta, tf.broadcast_to([1e10], shape=(BATCH_SIZE, 1))], axis=-1
+        )
+        alpha = 1.0 - tf.exp(-sigma_a * delta[:, None, None, :])
+    # Get transmittance.
+    exp_term = 1.0 - alpha
+    epsilon = 1e-10
+    transmittance = tf.math.cumprod(exp_term + epsilon, axis=-1, exclusive=True)
+    weights = alpha * transmittance
+    rgb = tf.reduce_sum(weights[..., None] * rgb, axis=-2)
+    if rand:
+        depth_map = tf.reduce_sum(weights * t_vals, axis=-1)
+    else:
+        depth_map = tf.reduce_sum(weights * t_vals[:, None, None], axis=-1)
+    return (rgb, depth_map)
+def get_translation_t(t):
+    """Get the translation matrix for movement in t."""
+    matrix = [
+        [1, 0, 0, 0],
+        [0, 1, 0, 0],
+        [0, 0, 1, t],
+        [0, 0, 0, 1],
+    ]
+    return tf.convert_to_tensor(matrix, dtype=tf.float32)
+def get_rotation_phi(phi):
+    """Get the rotation matrix for movement in phi."""
+    matrix = [
+        [1, 0, 0, 0],
+        [0, tf.cos(phi), -tf.sin(phi), 0],
+        [0, tf.sin(phi), tf.cos(phi), 0],
+        [0, 0, 0, 1],
+    ]
+    return tf.convert_to_tensor(matrix, dtype=tf.float32)
+def get_rotation_theta(theta):
+    """Get the rotation matrix for movement in theta."""
+    matrix = [
+        [tf.cos(theta), 0, -tf.sin(theta), 0],
+        [0, 1, 0, 0],
+        [tf.sin(theta), 0, tf.cos(theta), 0],
+        [0, 0, 0, 1],
+    ]
+    return tf.convert_to_tensor(matrix, dtype=tf.float32)
+def pose_spherical(theta, phi, t):
+    """
+    Get the camera to world matrix for the corresponding theta, phi
+    and t.
+    """
+    c2w = get_translation_t(t)
+    c2w = get_rotation_phi(phi / 180.0 * np.pi) @ c2w
+    c2w = get_rotation_theta(theta / 180.0 * np.pi) @ c2w
+    c2w = np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) @ c2w
+    return c2w
+def show_rendered_image(r,theta,phi):
+    # Get the camera to world matrix.
+    c2w = pose_spherical(theta, phi, r)
+    ray_oris, ray_dirs = get_rays(H, W, focal, c2w)
+    rays_flat, t_vals = render_flat_rays(
+        ray_oris, ray_dirs, near=2.0, far=6.0, num_samples=NUM_SAMPLES, rand=False
+    )
+    rgb, depth = render_rgb_depth(
+        nerf_loaded, rays_flat[None, ...], t_vals[None, ...], rand=False, train=False
+    )
+    return(rgb[0], depth[0])
+# app.py text matter starts here
+st.title('NeRF:3D volumetric rendering with NeRF')
+st.markdown("Authors: [Aritra Roy Gosthipathy](https://twitter.com/ariG23498) and [Ritwik Raha](https://twitter.com/ritwik_raha)")
+st.markdown("## Description")
+st.markdown("[NeRF](https://arxiv.org/abs/2003.08934) proposes an ingenious way to synthesize novel views of a scene by modelling the volumetric scene function through a neural network.")
+st.markdown("## Interactive Demo")
+# download the model:
+from huggingface_hub import snapshot_download
+snapshot_download(repo_id="Alesteba/your-model-name", local_dir="./nerf")
+# load the pre-trained model
+nerf_loaded = tf.keras.models.load_model("nerf", compile=False)
+# set the values of r theta phi
+r = 4.0
+theta = st.slider("Enter a value for Θ:", min_value=0.0, max_value=360.0)
+phi = -30.0
+color, depth = show_rendered_image(r, theta, phi)
+col1, col2= st.columns(2)
+with col1:
+    color = tf.keras.utils.array_to_img(color)
+    st.image(color, caption="Color Image", clamp=True, width=300)
+with col2:
+    depth = tf.keras.utils.array_to_img(depth[..., None])
+    st.image(depth, caption="Depth Map", clamp=True, width=300)
+st.markdown("## Tutorials")
+st.markdown("- [Keras](https://keras.io/examples/vision/nerf/)")
+st.markdown("- [PyImageSearch NeRF 1](https://www.pyimagesearch.com/2021/11/10/computer-graphics-and-deep-learning-with-nerf-using-tensorflow-and-keras-part-1/)")
+st.markdown("- [PyImageSearch NeRF 2](https://www.pyimagesearch.com/2021/11/17/computer-graphics-and-deep-learning-with-nerf-using-tensorflow-and-keras-part-2/)")
+st.markdown("- [PyImageSearch NeRF 3](https://www.pyimagesearch.com/2021/11/24/computer-graphics-and-deep-learning-with-nerf-using-tensorflow-and-keras-part-3/)")
+st.markdown("## Credits")
+st.markdown("- [PyImageSearch](https://www.pyimagesearch.com/)")
+st.markdown("- [JarvisLabs.ai GPU credits](https://jarvislabs.ai/)")