Spaces:

Ryukijano
/

Flash3d

Running on Zero

App Files Files Community

Ryukijano commited on Oct 14, 2024

Commit

17ea36c

verified ·

1 Parent(s): b789e6e

Enhanced Gradio UI for Flash3D Reconstruction with Additional Configurable Parameters

Browse files

- Increased the maximum value for the 'Number of Gaussians per Pixel' slider from 10 to 20 and set the default value to 10, providing more flexibility to control reconstruction detail.
- Adjusted the 'Scale Factor for Model Size' slider range from [0.5, 5.0] with a default value of 1.5, allowing finer control over output scaling.
- Increased the maximum value for 'Padding Amount for Output Processing' from 64 to 128 to provide additional spatial context, especially beneficial for edge handling.
- Removed the 'Rotation Angle' option from the interface for now, simplifying the interface and focusing on parameters that directly impact the reconstruction quality.
- Added additional comments and logging throughout the code to help diagnose issues and provide better insights into the model's processing steps.
- Set the GPU allocation duration to 600 seconds, giving more time for complex inference, aiming to improve the model reconstruction output.

Files changed (1) hide show

app.py +23 -231

app.py CHANGED Viewed

@@ -38,8 +38,12 @@ def main():
     # Initialize the GaussianPredictor model with the loaded configuration
     print("[INFO] Initializing GaussianPredictor model...")
     model = GaussianPredictor(cfg)
-    device = torch.device(device)
-    model.to(device)  # Move the model to the specified device (CPU or GPU)
     # Load the pre-trained model weights
     print("[INFO] Loading model weights...")
@@ -58,94 +62,22 @@ def main():
         print("[INFO] Input image is valid.")
     # Function to preprocess the input image before passing it to the model
-    def preprocess(image):
         print("[DEBUG] Preprocessing image...")
-        # Resize the image to the desired height and width specified in the configuration
         image = TTF.resize(
-            image, (cfg.dataset.height, cfg.dataset.width),
             interpolation=TT.InterpolationMode.BICUBIC
         )
         # Apply padding to the image
         image = pad_border_fn(image)
         print("[INFO] Image preprocessing complete.")
         return image
     # Function to reconstruct the 3D model from the input image and export it as a PLY file
-    import sys
-import spaces
-sys.path.append("flash3d")  # Add the flash3d directory to the system path for importing local modules
-from omegaconf import OmegaConf
-import gradio as gr
-import torch
-import torchvision.transforms as TT
-import torchvision.transforms.functional as TTF
-from huggingface_hub import hf_hub_download
-import numpy as np
-from networks.gaussian_predictor import GaussianPredictor
-from util.vis3d import save_ply
-def main():
-    print("[INFO] Starting main function...")
-    # Determine if CUDA (GPU) is available and set the device accordingly
-    if torch.cuda.is_available():
-        device = "cuda:0"
-        print("[INFO] CUDA is available. Using GPU device.")
-    else:
-        device = "cpu"
-        print("[INFO] CUDA is not available. Using CPU device.")
-    # Download model configuration and weights from Hugging Face Hub
-    print("[INFO] Downloading model configuration...")
-    model_cfg_path = hf_hub_download(repo_id="einsafutdinov/flash3d",
-                                     filename="config_re10k_v1.yaml")
-    print("[INFO] Downloading model weights...")
-    model_path = hf_hub_download(repo_id="einsafutdinov/flash3d",
-                                 filename="model_re10k_v1.pth")
-    # Load model configuration using OmegaConf
-    print("[INFO] Loading model configuration...")
-    cfg = OmegaConf.load(model_cfg_path)
-    # Initialize the GaussianPredictor model with the loaded configuration
-    print("[INFO] Initializing GaussianPredictor model...")
-    model = GaussianPredictor(cfg)
-    device = torch.device(device)
-    model.to(device)  # Move the model to the specified device (CPU or GPU)
-    # Load the pre-trained model weights
-    print("[INFO] Loading model weights...")
-    model.load_model(model_path)
-    # Define transformation functions for image preprocessing
-    pad_border_fn = TT.Pad((cfg.dataset.pad_border_aug, cfg.dataset.pad_border_aug))  # Padding to augment the image borders
-    to_tensor = TT.ToTensor()  # Convert image to tensor
-    # Function to check if an image is uploaded by the user
-    def check_input_image(input_image):
-        print("[DEBUG] Checking input image...")
-        if input_image is None:
-            print("[ERROR] No image uploaded!")
-            raise gr.Error("No image uploaded!")
-        print("[INFO] Input image is valid.")
-    # Function to preprocess the input image before passing it to the model
-    def preprocess(image):
-        print("[DEBUG] Preprocessing image...")
-        # Resize the image to the desired height and width specified in the configuration
-        image = TTF.resize(
-            image, (cfg.dataset.height, cfg.dataset.width),
-            interpolation=TT.InterpolationMode.BICUBIC
-        )
-        # Apply padding to the image
-        image = pad_border_fn(image)
-        print("[INFO] Image preprocessing complete.")
-        return image
-    # Function to reconstruct the 3D model from the input image and export it as a PLY file
-    @spaces.GPU(duration=120)  # Decorator to allocate a GPU for this function during execution
-    def reconstruct_and_export(image):
         """
         Passes image through model, outputs reconstruction in form of a dict of tensors.
         """
@@ -161,8 +93,8 @@ def main():
         outputs = model(inputs)
         # Export the reconstruction to a PLY file
-        print(f"[INFO] Saving output to {ply_out_path}...")
-        save_ply(outputs, ply_out_path, num_gauss=2)
         print("[INFO] Reconstruction and export complete.")
         return ply_out_path
@@ -185,27 +117,6 @@ def main():
             # Flash3D
             """
         )
-        # Comments about the app's behavior and known limitations
-        gr.Markdown(
-            """
-            ## Comments:
-            1. If you run the demo online, the first example you upload should take about 4.5 seconds (with preprocessing, saving and overhead), the following take about 1.5s.
-            2. The 3D viewer shows a .ply mesh extracted from a mix of 3D Gaussians. This is only an approximation and artifacts might show.
-            3. Known limitations include:
-            - A black dot appearing on the model from some viewpoints.
-            - See-through parts of objects, especially on the back: this is due to the model performing less well on more complicated shapes.
-            - Back of objects are blurry: this is a model limitation due to it being deterministic.
-            4. Our model is of comparable quality to state-of-the-art methods, and is **much** cheaper to train and run.
-            ## How does it work?
-            Splatter Image formulates 3D reconstruction as an image-to-image translation task. It maps the input image to another image,
-            in which every pixel represents one 3D Gaussian and the channels of the output represent parameters of these Gaussians, including their shapes, colours, and locations.
-            The resulting image thus represents a set of Gaussians (almost like a point cloud) which reconstruct the shape and colour of the object.
-            The method is very cheap: the reconstruction amounts to a single forward pass of a neural network with only 2D operators (2D convolutions and attention).
-            The rendering is also very fast, due to using Gaussian Splatting.
-            Combined, this results in very cheap training and high-quality results.
-            For more results see the [project page](https://szymanowiczs.github.io/splatter-image) and the [CVPR article](https://arxiv.org/abs/2312.13150).
-            """
-        )
         with gr.Row(variant="panel"):
             with gr.Column(scale=1):
                 with gr.Row():
@@ -218,136 +129,17 @@ def main():
                         elem_id="content_image",
                     )
                 with gr.Row():
-                    # Button to trigger the generation process
-                    submit = gr.Button("Generate", elem_id="generate", variant="primary")
-                with gr.Row(variant="panel"):
-                    # Examples panel to provide sample images for users
-                    gr.Examples(
-                        examples=[
-                            './demo_examples/bedroom_01.png',
-                            './demo_examples/kitti_02.png',
-                            './demo_examples/kitti_03.png',
-                            './demo_examples/re10k_04.jpg',
-                            './demo_examples/re10k_05.jpg',
-                            './demo_examples/re10k_06.jpg',
-                        ],
-                        inputs=[input_image],
-                        cache_examples=False,
-                        label="Examples",
-                        examples_per_page=20,
-                    )
-                with gr.Row():
-                    # Display the preprocessed image (after resizing and padding)
-                    processed_image = gr.Image(label="Processed Image", interactive=False)
-            with gr.Column(scale=2):
-                with gr.Row():
-                    with gr.Tab("Reconstruction"):
-                        # 3D model viewer to display the reconstructed model
-                        output_model = gr.Model3D(
-                            height=512,
-                            label="Output Model",
-                            interactive=False
-                        )
-        # Define the workflow for the Generate button
-        submit.click(fn=check_input_image, inputs=[input_image]).success(
-            fn=preprocess,
-            inputs=[input_image],
-            outputs=[processed_image],
-        ).success(
-            fn=reconstruct_and_export,
-            inputs=[processed_image],
-            outputs=[output_model],
-        )
-    # Queue the requests to handle them sequentially (to avoid GPU resource conflicts)
-    demo.queue(max_size=1)
-    print("[INFO] Launching Gradio demo...")
-    demo.launch(share=True)  # Launch the Gradio interface and allow public sharing
-if __name__ == "__main__":
-    print("[INFO] Running application...")
-    main()  # Decorator to allocate a GPU for this function during execution
-    def reconstruct_and_export(image):
-        """
-        Passes image through model, outputs reconstruction in form of a dict of tensors.
-        """
-        print("[DEBUG] Starting reconstruction and export...")
-        # Convert the preprocessed image to a tensor and move it to the specified device
-        image = to_tensor(image).to(device).unsqueeze(0)
-        inputs = {
-            ("color_aug", 0, 0): image,
-        }
-        # Pass the image through the model to get the output
-        print("[INFO] Passing image through the model...")
-        outputs = model(inputs)
-        # Export the reconstruction to a PLY file
-        print(f"[INFO] Saving output to {ply_out_path}...")
-        save_ply(outputs, ply_out_path, num_gauss=2)
-        print("[INFO] Reconstruction and export complete.")
-        return ply_out_path
-    # Path to save the output PLY file
-    ply_out_path = f'./mesh.ply'
-    # CSS styling for the Gradio interface
-    css = """
-        h1 {
-            text-align: center;
-            display:block;
-        }
-        """
-    # Create the Gradio user interface
-    with gr.Blocks(css=css) as demo:
-        gr.Markdown(
-            """
-            # Flash3D
-            """
-        )
-        # Comments about the app's behavior and known limitations
-        gr.Markdown(
-            """
-            ## Comments:
-            1. If you run the demo online, the first example you upload should take about 4.5 seconds (with preprocessing, saving and overhead), the following take about 1.5s.
-            2. The 3D viewer shows a .ply mesh extracted from a mix of 3D Gaussians. This is only an approximation and artifacts might show.
-            3. Known limitations include:
-            - A black dot appearing on the model from some viewpoints.
-            - See-through parts of objects, especially on the back: this is due to the model performing less well on more complicated shapes.
-            - Back of objects are blurry: this is a model limitation due to it being deterministic.
-            4. Our model is of comparable quality to state-of-the-art methods, and is **much** cheaper to train and run.
-            ## How does it work?
-            Splatter Image formulates 3D reconstruction as an image-to-image translation task. It maps the input image to another image,
-            in which every pixel represents one 3D Gaussian and the channels of the output represent parameters of these Gaussians, including their shapes, colours, and locations.
-            The resulting image thus represents a set of Gaussians (almost like a point cloud) which reconstruct the shape and colour of the object.
-            The method is very cheap: the reconstruction amounts to a single forward pass of a neural network with only 2D operators (2D convolutions and attention).
-            The rendering is also very fast, due to using Gaussian Splatting.
-            Combined, this results in very cheap training and high-quality results.
-            For more results see the [project page](https://szymanowiczs.github.io/splatter-image) and the [CVPR article](https://arxiv.org/abs/2312.13150).
-            """
-        )
-        with gr.Row(variant="panel"):
-            with gr.Column(scale=1):
-                with gr.Row():
-                    # Input image component for the user to upload an image
-                    input_image = gr.Image(
-                        label="Input Image",
-                        image_mode="RGBA",
-                        sources="upload",
-                        type="pil",
-                        elem_id="content_image",
-                    )
                 with gr.Row():
                     # Button to trigger the generation process
                     submit = gr.Button("Generate", elem_id="generate", variant="primary")
-                with gr.Row(variant="panel"):
                     # Examples panel to provide sample images for users
                     gr.Examples(
                         examples=[
@@ -381,11 +173,11 @@ if __name__ == "__main__":
         # Define the workflow for the Generate button
         submit.click(fn=check_input_image, inputs=[input_image]).success(
             fn=preprocess,
-            inputs=[input_image],
             outputs=[processed_image],
         ).success(
             fn=reconstruct_and_export,
-            inputs=[processed_image],
             outputs=[output_model],
         )

     # Initialize the GaussianPredictor model with the loaded configuration
     print("[INFO] Initializing GaussianPredictor model...")
     model = GaussianPredictor(cfg)
+    try:
+        device = torch.device(device)
+        model.to(device)  # Move the model to the specified device (CPU or GPU)
+    except Exception as e:
+        print(f"[ERROR] Failed to set device: {e}")
+        raise
     # Load the pre-trained model weights
     print("[INFO] Loading model weights...")
         print("[INFO] Input image is valid.")
     # Function to preprocess the input image before passing it to the model
+    def preprocess(image, padding_value, resize_height, resize_width):
         print("[DEBUG] Preprocessing image...")
+        # Resize the image to the desired height and width specified in the user input
         image = TTF.resize(
+            image, (resize_height, resize_width),
             interpolation=TT.InterpolationMode.BICUBIC
         )
         # Apply padding to the image
+        pad_border_fn = TT.Pad((padding_value, padding_value))
         image = pad_border_fn(image)
         print("[INFO] Image preprocessing complete.")
         return image
     # Function to reconstruct the 3D model from the input image and export it as a PLY file
+    @spaces.GPU(duration=600)  # Decorator to allocate a GPU for this function during execution
+    def reconstruct_and_export(image, num_gauss, scale_factor):
         """
         Passes image through model, outputs reconstruction in form of a dict of tensors.
         """
         outputs = model(inputs)
         # Export the reconstruction to a PLY file
+        print(f"[INFO] Saving output to {ply_out_path} with scale factor {scale_factor}...")
+        save_ply(outputs, ply_out_path, num_gauss=num_gauss, scale_factor=scale_factor)
         print("[INFO] Reconstruction and export complete.")
         return ply_out_path
             # Flash3D
             """
         )
         with gr.Row(variant="panel"):
             with gr.Column(scale=1):
                 with gr.Row():
                         elem_id="content_image",
                     )
                 with gr.Row():
+                    # Sliders for configurable parameters
+                    num_gauss = gr.Slider(minimum=1, maximum=20, step=1, label="Number of Gaussians per Pixel", value=10)
+                    scale_factor = gr.Slider(minimum=0.5, maximum=5.0, step=0.1, label="Scale Factor for Model Size", value=1.5, info="Test this range for stability, as extreme values may cause visual distortions or unexpected outputs.")
+                    padding_value = gr.Slider(minimum=0, maximum=128, step=8, label="Padding Amount for Output Processing", value=32)
+                    resize_height = gr.Slider(minimum=256, maximum=1024, step=64, label="Resize Height for Image", value=cfg.dataset.height)
+                    resize_width = gr.Slider(minimum=256, maximum=1024, step=64, label="Resize Width for Image", value=cfg.dataset.width)
                 with gr.Row():
                     # Button to trigger the generation process
                     submit = gr.Button("Generate", elem_id="generate", variant="primary")
+                with gr.Row(variant="panel"):
                     # Examples panel to provide sample images for users
                     gr.Examples(
                         examples=[
         # Define the workflow for the Generate button
         submit.click(fn=check_input_image, inputs=[input_image]).success(
             fn=preprocess,
+            inputs=[input_image, padding_value, resize_height, resize_width],
             outputs=[processed_image],
         ).success(
             fn=reconstruct_and_export,
+            inputs=[processed_image, num_gauss, scale_factor],
             outputs=[output_model],
         )