Spaces:

Ryukijano
/

Flash3d

Sleeping

App Files Files Community

Ryukijano commited on Oct 25, 2024

Commit

8403619

verified ·

1 Parent(s): f2e0064

Demake app.py

Browse files

We wait for now.

Files changed (1) hide show

app.py +71 -21

app.py CHANGED Viewed

@@ -13,9 +13,9 @@ import numpy as np
 from networks.gaussian_predictor import GaussianPredictor
 from util.vis3d import save_ply
 def main():
     print("[INFO] Starting main function...")
     if torch.cuda.is_available():
         device = "cuda:0"
         print("[INFO] CUDA is available. Using GPU device.")
@@ -23,25 +23,33 @@ def main():
         device = "cpu"
         print("[INFO] CUDA is not available. Using CPU device.")
     print("[INFO] Downloading model configuration...")
-    model_cfg_path = hf_hub_download(repo_id="einsafutdinov/flash3d", filename="config_re10k_v1.yaml")
     print("[INFO] Downloading model weights...")
-    model_path = hf_hub_download(repo_id="einsafutdinov/flash3d", filename="model_re10k_v1.pth")
     print("[INFO] Loading model configuration...")
     cfg = OmegaConf.load(model_cfg_path)
     print("[INFO] Initializing GaussianPredictor model...")
     model = GaussianPredictor(cfg)
     device = torch.device(device)
-    model.to(device)
     print("[INFO] Loading model weights...")
     model.load_model(model_path)
-    pad_border_fn = TT.Pad((cfg.dataset.pad_border_aug, cfg.dataset.pad_border_aug))
-    to_tensor = TT.ToTensor()
     def check_input_image(input_image):
         print("[DEBUG] Checking input image...")
         if input_image is None:
@@ -49,27 +57,47 @@ def main():
             raise gr.Error("No image uploaded!")
         print("[INFO] Input image is valid.")
-    def preprocess(image, resolution):
         print("[DEBUG] Preprocessing image...")
-        image = TTF.resize(image, (resolution, resolution), interpolation=TT.InterpolationMode.BICUBIC)
         image = pad_border_fn(image)
         print("[INFO] Image preprocessing complete.")
         return image
-    @spaces.GPU(duration=120)
-    def reconstruct_and_export(image, num_gauss):
         print("[DEBUG] Starting reconstruction and export...")
         image = to_tensor(image).to(device).unsqueeze(0)
-        inputs = {("color_aug", 0, 0): image}
         print("[INFO] Passing image through the model...")
         outputs = model(inputs)
         print(f"[INFO] Saving output to {ply_out_path}...")
-        save_ply(outputs, ply_out_path, num_gauss=num_gauss)
         print("[INFO] Reconstruction and export complete.")
         return ply_out_path
     ply_out_path = f'./mesh.ply'
     css = """
         h1 {
             text-align: center;
@@ -77,15 +105,30 @@ def main():
         }
         """
     with gr.Blocks(css=css) as demo:
-        gr.Markdown("# Flash3D")
         with gr.Row(variant="panel"):
             with gr.Column(scale=1):
                 with gr.Row():
-                    input_image = gr.Image(label="Input Image", image_mode="RGBA", sources="upload", type="pil", elem_id="content_image")
                 with gr.Row():
                     submit = gr.Button("Generate", elem_id="generate", variant="primary")
                 with gr.Row(variant="panel"):
                     gr.Examples(
                         examples=[
                             './demo_examples/bedroom_01.png',
@@ -100,29 +143,36 @@ def main():
                         label="Examples",
                         examples_per_page=20,
                     )
                 with gr.Row():
                     processed_image = gr.Image(label="Processed Image", interactive=False)
             with gr.Column(scale=2):
                 with gr.Row():
                     with gr.Tab("Reconstruction"):
-                        output_model = gr.Model3D(height=512, label="Output Model", interactive=False)
-                with gr.Row():
-                    resolution = gr.Slider(minimum=256, maximum=1024, step=64, label="Image Resolution", value=cfg.dataset.height)
-                    num_gauss = gr.Slider(minimum=1, maximum=10, step=1, label="Number of Gaussian Components", value=2)
         submit.click(fn=check_input_image, inputs=[input_image]).success(
             fn=preprocess,
-            inputs=[input_image, resolution],
             outputs=[processed_image],
         ).success(
             fn=reconstruct_and_export,
-            inputs=[processed_image, num_gauss],
             outputs=[output_model],
         )
     demo.queue(max_size=1)
     print("[INFO] Launching Gradio demo...")
-    demo.launch(share=True)
 if __name__ == "__main__":
     print("[INFO] Running application...")

 from networks.gaussian_predictor import GaussianPredictor
 from util.vis3d import save_ply
 def main():
     print("[INFO] Starting main function...")
+    # Determine if CUDA (GPU) is available and set the device accordingly
     if torch.cuda.is_available():
         device = "cuda:0"
         print("[INFO] CUDA is available. Using GPU device.")
         device = "cpu"
         print("[INFO] CUDA is not available. Using CPU device.")
+    # Download model configuration and weights from Hugging Face Hub
     print("[INFO] Downloading model configuration...")
+    model_cfg_path = hf_hub_download(repo_id="einsafutdinov/flash3d",
+                                     filename="config_re10k_v1.yaml")
     print("[INFO] Downloading model weights...")
+    model_path = hf_hub_download(repo_id="einsafutdinov/flash3d",
+                                 filename="model_re10k_v1.pth")
+    # Load model configuration using OmegaConf
     print("[INFO] Loading model configuration...")
     cfg = OmegaConf.load(model_cfg_path)
+    # Initialize the GaussianPredictor model with the loaded configuration
     print("[INFO] Initializing GaussianPredictor model...")
     model = GaussianPredictor(cfg)
     device = torch.device(device)
+    model.to(device)  # Move the model to the specified device (CPU or GPU)
+    # Load the pre-trained model weights
     print("[INFO] Loading model weights...")
     model.load_model(model_path)
+    # Define transformation functions for image preprocessing
+    pad_border_fn = TT.Pad((cfg.dataset.pad_border_aug, cfg.dataset.pad_border_aug))  # Padding to augment the image borders
+    to_tensor = TT.ToTensor()  # Convert image to tensor
+    # Function to check if an image is uploaded by the user
     def check_input_image(input_image):
         print("[DEBUG] Checking input image...")
         if input_image is None:
             raise gr.Error("No image uploaded!")
         print("[INFO] Input image is valid.")
+    # Function to preprocess the input image before passing it to the model
+    def preprocess(image):
         print("[DEBUG] Preprocessing image...")
+        # Resize the image to the desired height and width specified in the configuration
+        image = TTF.resize(
+            image, (cfg.dataset.height, cfg.dataset.width),
+            interpolation=TT.InterpolationMode.BICUBIC
+        )
+        # Apply padding to the image
         image = pad_border_fn(image)
         print("[INFO] Image preprocessing complete.")
         return image
+    # Function to reconstruct the 3D model from the input image and export it as a PLY file
+    @spaces.GPU(duration=120)  # Decorator to allocate a GPU for this function during execution
+    def reconstruct_and_export(image):
+        """
+        Passes image through model, outputs reconstruction in form of a dict of tensors.
+        """
         print("[DEBUG] Starting reconstruction and export...")
+        # Convert the preprocessed image to a tensor and move it to the specified device
         image = to_tensor(image).to(device).unsqueeze(0)
+        inputs = {
+            ("color_aug", 0, 0): image,
+        }
+        # Pass the image through the model to get the output
         print("[INFO] Passing image through the model...")
         outputs = model(inputs)
+        # Export the reconstruction to a PLY file
         print(f"[INFO] Saving output to {ply_out_path}...")
+        save_ply(outputs, ply_out_path, num_gauss=2)
         print("[INFO] Reconstruction and export complete.")
         return ply_out_path
+    # Path to save the output PLY file
     ply_out_path = f'./mesh.ply'
+    # CSS styling for the Gradio interface
     css = """
         h1 {
             text-align: center;
         }
         """
+    # Create the Gradio user interface
     with gr.Blocks(css=css) as demo:
+        gr.Markdown(
+            """
+            # Flash3D
+            """
+        )
         with gr.Row(variant="panel"):
             with gr.Column(scale=1):
                 with gr.Row():
+                    # Input image component for the user to upload an image
+                    input_image = gr.Image(
+                        label="Input Image",
+                        image_mode="RGBA",
+                        sources="upload",
+                        type="pil",
+                        elem_id="content_image",
+                    )
                 with gr.Row():
+                    # Button to trigger the generation process
                     submit = gr.Button("Generate", elem_id="generate", variant="primary")
                 with gr.Row(variant="panel"):
+                    # Examples panel to provide sample images for users
                     gr.Examples(
                         examples=[
                             './demo_examples/bedroom_01.png',
                         label="Examples",
                         examples_per_page=20,
                     )
                 with gr.Row():
+                    # Display the preprocessed image (after resizing and padding)
                     processed_image = gr.Image(label="Processed Image", interactive=False)
             with gr.Column(scale=2):
                 with gr.Row():
                     with gr.Tab("Reconstruction"):
+                        # 3D model viewer to display the reconstructed model
+                        output_model = gr.Model3D(
+                            height=512,
+                            label="Output Model",
+                            interactive=False
+                        )
+        # Define the workflow for the Generate button
         submit.click(fn=check_input_image, inputs=[input_image]).success(
             fn=preprocess,
+            inputs=[input_image],
             outputs=[processed_image],
         ).success(
             fn=reconstruct_and_export,
+            inputs=[processed_image],
             outputs=[output_model],
         )
+    # Queue the requests to handle them sequentially (to avoid GPU resource conflicts)
     demo.queue(max_size=1)
     print("[INFO] Launching Gradio demo...")
+    demo.launch(share=True)  # Launch the Gradio interface and allow public sharing
 if __name__ == "__main__":
     print("[INFO] Running application...")