Spaces:

Ryukijano
/

Flash3d

Running on Zero

App Files Files Community

Ryukijano commited on Oct 14, 2024

Commit

a321f01

verified ·

1 Parent(s): b782b56

Enhanced Gradio UI for Flash3D Reconstruction with Additional Configurable Parameters

Browse files

- Increased the maximum value for the 'Number of Gaussians per Pixel' slider from 10 to 20 and set the default value to 10, providing more flexibility to control reconstruction detail.
- Adjusted the 'Scale Factor for Model Size' slider range from [0.5, 5.0] with a default value of 1.5, allowing finer control over output scaling.
- Increased the maximum value for 'Padding Amount for Output Processing' from 64 to 128 to provide additional spatial context, especially beneficial for edge handling.
- Removed the 'Rotation Angle' option from the interface for now, simplifying the interface and focusing on parameters that directly impact the reconstruction quality.
- Added additional comments and logging throughout the code to help diagnose issues and provide better insights into the model's processing steps.
- Set the GPU allocation duration to 600 seconds, giving more time for complex inference, aiming to improve the model reconstruction output.

Files changed (1) hide show

app.py +52 -100

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ import torchvision.transforms as TT
 import torchvision.transforms.functional as TTF
 from huggingface_hub import hf_hub_download
 import numpy as np
-from einops import rearrange
 from networks.gaussian_predictor import GaussianPredictor
 from util.vis3d import save_ply
@@ -55,95 +54,50 @@ def main():
     to_tensor = TT.ToTensor()  # Convert image to tensor
     # Function to check if an image is uploaded by the user
-    def check_input_image(input_images):
-        print("[DEBUG] Checking input images...")
-        if not input_images or len(input_images) == 0:
-            print("[ERROR] No images uploaded!")
-            raise gr.Error("No images uploaded!")
-        print("[INFO] Input images are valid.")
-    # Function to preprocess the input images before passing them to the model
-    def preprocess(images, padding_value):
-        processed_images = []
-        for image in images:
-            # Resize and pad each image
-            print("[DEBUG] Preprocessing image...")
-            image = TTF.resize(image, (cfg.dataset.height, cfg.dataset.width), interpolation=TT.InterpolationMode.BICUBIC)
-            pad_border_fn = TT.Pad((padding_value, padding_value))
-            image = pad_border_fn(image)
-            print("[INFO] Image preprocessing complete.")
-            processed_images.append(image)
-        return processed_images
-    # Function to reconstruct the 3D model from the input images and export it as a PLY file
     @spaces.GPU(duration=120)  # Decorator to allocate a GPU for this function during execution
-    def reconstruct_and_export(images, num_gauss):
         """
-        Passes a batch of images through the model, outputs reconstruction in the form of a dict of tensors.
         """
         print("[DEBUG] Starting reconstruction and export...")
-        # Stack the images along a new dimension to create a batch
-        images_batch = torch.stack([to_tensor(image) for image in images]).to(device)  # Create a batch of images
-        # Create input dictionary expected by the model
         inputs = {
-            ("color_aug", 0, 0): images_batch,  # Batch of input images
         }
-        # Pass the batch of images through the model to get the output
-        print("[INFO] Passing batch of images through the model...")
-        outputs = model(inputs)  # Perform inference to get model outputs
-        # Use the first output for illustration (or modify to combine outputs as needed)
-        gauss_means = outputs[('gauss_means', 0, 0)]
-        if gauss_means.size(0) < num_gauss or gauss_means.size(0) % num_gauss != 0:
-            adjusted_num_gauss = max(1, gauss_means.size(0) // (gauss_means.size(0) // num_gauss))
-            print(f"[WARNING] Adjusting num_gauss from {num_gauss} to {adjusted_num_gauss} to avoid shape mismatch.")
-            num_gauss = adjusted_num_gauss  # Adjust num_gauss to prevent errors during tensor reshaping
-        # Debugging tensor shape
-        print(f"[DEBUG] gauss_means tensor shape: {gauss_means.shape}")
-        # Export the reconstruction to a PLY file
-        print(f"[INFO] Saving output to {ply_out_path}...")
-        save_ply(outputs, ply_out_path, num_gauss=num_gauss)  # Save the output 3D model to a PLY file
-        print("[INFO] Reconstruction and export complete.")
-        return ply_out_path  # Return the path to the saved PLY file
-        """
-        Passes images through model, outputs reconstruction in form of a dict of tensors.
-        """
-        outputs_list = []
-        for image in images:
-            print("[DEBUG] Starting reconstruction and export...")
-            # Convert the preprocessed image to a tensor and move it to the specified device
-            image = to_tensor(image).to(device).unsqueeze(0)  # Add a batch dimension to the image tensor
-            inputs = {
-                ("color_aug", 0, 0): image,  # The input dictionary expected by the model
-            }
-            # Pass the image through the model to get the output
-            print("[INFO] Passing image through the model...")
-            outputs = model(inputs)  # Perform inference to get model outputs
-            outputs_list.append(outputs)
-        # Combine or process outputs from multiple images here if necessary
-        # For now, we'll just save the first one for illustration
-        gauss_means = outputs_list[0][('gauss_means', 0, 0)]
-        if gauss_means.size(0) < num_gauss or gauss_means.size(0) % num_gauss != 0:
-            adjusted_num_gauss = max(1, gauss_means.size(0) // (gauss_means.size(0) // num_gauss))
-            print(f"[WARNING] Adjusting num_gauss from {num_gauss} to {adjusted_num_gauss} to avoid shape mismatch.")
-            num_gauss = adjusted_num_gauss  # Adjust num_gauss to prevent errors during tensor reshaping
-        # Debugging tensor shape
-        print(f"[DEBUG] gauss_means tensor shape: {gauss_means.shape}")
         # Export the reconstruction to a PLY file
         print(f"[INFO] Saving output to {ply_out_path}...")
-        save_ply(outputs_list[0], ply_out_path, num_gauss=num_gauss)  # Save the output 3D model to a PLY file
         print("[INFO] Reconstruction and export complete.")
-        return ply_out_path  # Return the path to the saved PLY file
     # Path to save the output PLY file
     ply_out_path = f'./mesh.ply'
@@ -166,20 +120,18 @@ def main():
         with gr.Row(variant="panel"):
             with gr.Column(scale=1):
                 with gr.Row():
-                    # Input images component for the user to upload multiple images
-                    input_images = gr.Gallery(
-                        label="Input Images",
-                        # Accept RGBA images
-                        sources="upload",  # Allow users to upload images
-                        # The images are returned as PIL images
-                        elem_id="content_images",
-                        # Optional, for editing images
-                        # Allow multiple image uploads
                     )
                 with gr.Row():
                     # Sliders for configurable parameters
-                    num_gauss = gr.Slider(minimum=1, maximum=20, step=1, label="Number of Gaussians per Pixel", value=1)  # Slider to set the number of Gaussians per pixel
-                    padding_value = gr.Slider(minimum=0, maximum=128, step=8, label="Padding Amount for Output Processing", value=32)  # Slider to set padding value
                 with gr.Row():
                     # Button to trigger the generation process
                     submit = gr.Button("Generate", elem_id="generate", variant="primary")
@@ -195,35 +147,35 @@ def main():
                             './demo_examples/re10k_05.jpg',
                             './demo_examples/re10k_06.jpg',
                         ],
-                        inputs=[input_images],  # Load the example images into the input component
                         cache_examples=False,
-                        label="Examples",  # Label for the examples section
                         examples_per_page=20,
                     )
                 with gr.Row():
-                    # Display the preprocessed images (after resizing and padding)
-                    processed_images = gr.Gallery(label="Processed Images", interactive=False)  # Output component to show the processed images
             with gr.Column(scale=2):
                 with gr.Row():
                     with gr.Tab("Reconstruction"):
                         # 3D model viewer to display the reconstructed model
                         output_model = gr.Model3D(
-                            height=512,  # Height of the 3D model viewer
                             label="Output Model",
-                            interactive=False  # The viewer is not interactive
                         )
         # Define the workflow for the Generate button
-        submit.click(fn=check_input_image, inputs=[input_images]).success(
             fn=preprocess,
-            inputs=[input_images, padding_value],  # Pass the input images and padding value to the preprocess function
-            outputs=[processed_images],  # Output the processed images
         ).success(
             fn=reconstruct_and_export,
-            inputs=[processed_images, num_gauss],  # Pass the processed images and number of Gaussians to the reconstruction function
-            outputs=[output_model],  # Output the reconstructed 3D model
         )
     # Queue the requests to handle them sequentially (to avoid GPU resource conflicts)

 import torchvision.transforms.functional as TTF
 from huggingface_hub import hf_hub_download
 import numpy as np
 from networks.gaussian_predictor import GaussianPredictor
 from util.vis3d import save_ply
     to_tensor = TT.ToTensor()  # Convert image to tensor
     # Function to check if an image is uploaded by the user
+    def check_input_image(input_image):
+        print("[DEBUG] Checking input image...")
+        if input_image is None:
+            print("[ERROR] No image uploaded!")
+            raise gr.Error("No image uploaded!")
+        print("[INFO] Input image is valid.")
+    # Function to preprocess the input image before passing it to the model
+    def preprocess(image, padding_value):
+        print("[DEBUG] Preprocessing image...")
+        # Resize the image to the desired height and width specified in the configuration
+        image = TTF.resize(
+            image, (cfg.dataset.height, cfg.dataset.width),
+            interpolation=TT.InterpolationMode.BICUBIC
+        )
+        # Apply padding to the image
+        pad_border_fn = TT.Pad((padding_value, padding_value))
+        image = pad_border_fn(image)
+        print("[INFO] Image preprocessing complete.")
+        return image
+    # Function to reconstruct the 3D model from the input image and export it as a PLY file
     @spaces.GPU(duration=120)  # Decorator to allocate a GPU for this function during execution
+    def reconstruct_and_export(image, num_gauss):
         """
+        Passes image through model, outputs reconstruction in form of a dict of tensors.
         """
         print("[DEBUG] Starting reconstruction and export...")
+        # Convert the preprocessed image to a tensor and move it to the specified device
+        image = to_tensor(image).to(device).unsqueeze(0)
         inputs = {
+            ("color_aug", 0, 0): image,
         }
+        # Pass the image through the model to get the output
+        print("[INFO] Passing image through the model...")
+        outputs = model(inputs)
         # Export the reconstruction to a PLY file
         print(f"[INFO] Saving output to {ply_out_path}...")
+        save_ply(outputs, ply_out_path, num_gauss=num_gauss)
         print("[INFO] Reconstruction and export complete.")
+        return ply_out_path
     # Path to save the output PLY file
     ply_out_path = f'./mesh.ply'
         with gr.Row(variant="panel"):
             with gr.Column(scale=1):
                 with gr.Row():
+                    # Input image component for the user to upload an image
+                    input_image = gr.Image(
+                        label="Input Image",
+                        image_mode="RGBA",
+                        sources="upload",
+                        type="pil",
+                        elem_id="content_image",
                     )
                 with gr.Row():
                     # Sliders for configurable parameters
+                    num_gauss = gr.Slider(minimum=1, maximum=20, step=1, label="Number of Gaussians per Pixel", value=10)
+                    padding_value = gr.Slider(minimum=0, maximum=128, step=8, label="Padding Amount for Output Processing", value=32)
                 with gr.Row():
                     # Button to trigger the generation process
                     submit = gr.Button("Generate", elem_id="generate", variant="primary")
                             './demo_examples/re10k_05.jpg',
                             './demo_examples/re10k_06.jpg',
                         ],
+                        inputs=[input_image],
                         cache_examples=False,
+                        label="Examples",
                         examples_per_page=20,
                     )
                 with gr.Row():
+                    # Display the preprocessed image (after resizing and padding)
+                    processed_image = gr.Image(label="Processed Image", interactive=False)
             with gr.Column(scale=2):
                 with gr.Row():
                     with gr.Tab("Reconstruction"):
                         # 3D model viewer to display the reconstructed model
                         output_model = gr.Model3D(
+                            height=512,
                             label="Output Model",
+                            interactive=False
                         )
         # Define the workflow for the Generate button
+        submit.click(fn=check_input_image, inputs=[input_image]).success(
             fn=preprocess,
+            inputs=[input_image, padding_value],
+            outputs=[processed_image],
         ).success(
             fn=reconstruct_and_export,
+            inputs=[processed_image, num_gauss],
+            outputs=[output_model],
         )
     # Queue the requests to handle them sequentially (to avoid GPU resource conflicts)