+
EditP23: 3D Editing via Propagation of Image Prompts to Multi-View
+
+
+ This is the official Gradio demo for EditP23, a method for fast, mask-free 3D editing that propagates 2D image edits to multi-view representations in a 3D-consistent manner. The edit is guided by an image pair, allowing users to leverage any preferred 2D editing tool, from manual painting to generative pipelines.
+
+
+"""
+
+HOW_TO_USE_TEXT = """
+
+
+
EditP23 requires three specific images to perform an edit. This demo automates the process, but understanding each component is key.
+
+ - Original Multi-View Image (`src_mv.png`): This is a 2x3 grid of six different views of the original, unedited object. The model uses this as the base to apply the edit consistently across all angles.
+ - Source Condition (`src.png`): This is a single, frontal view of the original object. It acts as the "before" image for the edit.
+ - Target Condition (`edited.png`): This is the "after" image. It's the same view as
src.png
, but with your desired 2D modification applied. The difference between this image and src.png
is what guides the 3D edit.
+
+
+
How to Prepare Your Own Images
+
You can generate the required input images using the helper scripts provided in our GitHub repository.
+
Step 1: Generate src.png
and src_mv.png
+
You have two options for creating the initial views of your object.
+
+
Step 2: Create edited.png
+
Use any 2D image editor to modify your src.png
. This is where your creativity comes in! For quick edits, we recommend these online tools:
+
+ - FlowEdit: Excellent for global, structural edits.
+ - Flux-Inpainting: Great for local modifications and inpainting.
+
+
+
Understanding the Parameters
+
+ n_max
: Controls how many denoising steps are influenced by your edit. Higher values are needed for more significant geometric changes.
+ tar_guidance_scale
: Determines the strength of your edit. Increase this for more dramatic changes, but be aware that very high values can sometimes introduce artifacts.
+ src_guidance_scale
: Controls how strongly the model adheres to the original object's identity. This can usually be left at its default value.
+
+
+
Reconstructing a 3D Model
+
After this demo generates an edited multi-view image, you can use the scripts/recon.py
script from our repository to convert it back into a 3D model (.obj
file).
+
python scripts/recon.py path/to/instant-mesh-large.yaml --input_file "path/to/edited_mv.png" --output_dir "path/to/output/"
+
+"""
+
+# --- Gradio UI Layout ---
+# Create a custom theme to match the website's color
+theme = gr.themes.Base(
+ primary_hue=gr.themes.colors.blue,
+ secondary_hue=gr.themes.colors.blue,
+ font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
+).set(
+ button_primary_background_fill="*primary_500",
+ button_primary_background_fill_hover="*primary_600",
+)
+
+# Custom CSS for better layout and fixing UI quirks
+CUSTOM_CSS = """
+.gradio-container { max-width: 95% !important; }
+.label-wrap { padding-top: 6px !import ant; } /* Fix label overlap */
+.help-text { color: #9CA3AF; font-size: 0.9rem; margin-top: 4px; margin-bottom: 12px; }
+.link-button { text-decoration: none; color: white; padding: 8px 16px; border-radius: 8px; font-weight: bold; transition: background-color 0.2s ease; }
+.link-button:hover { background-color: #4a5568 !important; }
+#action-buttons { margin-top: 1rem; }
+
+/* --- CSS Rules for the Examples Table --- */
+
+/* 1. CRITICAL FIX: Target the image's wrapper to prevent clipping. */
+#example-table td > div {
+ overflow: visible !important; /* This is the key to stop cropping. */
+ display: flex;
+ justify-content: center;
+ align-items: center;
+}
+
+/* 2. General cell styling for alignment and spacing */
+#example-table td {
+ vertical-align: middle !important;
+ padding: 8px !important;
+}
+
+/* 3. Force parameter columns (4-7) to have the same width */
+#example-table th:nth-child(n+4):nth-child(-n+7),
+#example-table td:nth-child(n+4):nth-child(-n+7) {
+ width: 85px !important;
+ max-width: 85px !important;
+ text-align: center;
+ word-break: break-word;
+}
+
+/* 4. Enlarge multi-view image (Col 1) with a 3:2 height:width ratio */
+#example-table td:nth-child(1) img {
+ height: 180px !important;
+ width: 120px !important; /* 180px / 120px = 3:2 ratio */
+ object-fit: contain !important; /* Ensures the whole image is visible */
+}
+
+/* 5. Enlarge condition images (Col 2 & 3) */
+#example-table td:nth-child(2) img,
+#example-table td:nth-child(3) img {
+ height: 150px !important;
+ width: 150px !important;
+ object-fit: contain !important;
+}
+"""
+
+
+with gr.Blocks(theme=theme, css=CUSTOM_CSS) as demo:
+ gr.Markdown(ABOUT_TEXT)
+
+ with gr.Tabs() as tabs:
+ with gr.TabItem("Interactive Demo", id=0):
+ with gr.Row(variant="panel", equal_height=False):
+ # Column 1: Inputs
+ with gr.Column(scale=1):
+ gr.Markdown("### 1. Input Images")
+ gr.Markdown(
+ 'See the "How to Use" tab for details on generating the **Multi-View Image** and creating your own **Edited Condition**.',
+ elem_classes="help-text",
+ )
+ original_mv_image = gr.Image(
+ type="numpy",
+ label="Original Multi-View Image (src_mv.png)",
+ height=675,
+ width=450,
+ )
+ with gr.Row():
+ src_cond_image = gr.Image(
+ type="numpy",
+ label="Source Condition (src.png)",
+ height=350,
+ width=350,
+ )
+ tgt_cond_image = gr.Image(
+ type="numpy",
+ label="Target Condition (edited.png)",
+ height=350,
+ width=350,
+ )
+
+ # Column 2: Parameters & Action
+ with gr.Column(scale=1, min_width=300):
+ gr.Markdown("### 2. Parameters")
+ with gr.Accordion("Advanced Parameters", open=True):
+ t_steps = gr.Slider(
+ minimum=1,
+ maximum=100,
+ value=50,
+ step=1,
+ label="T_steps",
+ info="Total number of denoising steps.",
+ )
+ n_max = gr.Slider(
+ minimum=1,
+ maximum=50,
+ value=31,
+ step=1,
+ label="n_max",
+ info="Number of scheduler steps for edit-aware guidance. Increase for more significant edits.",
+ )
+ src_gs = gr.Slider(
+ minimum=1.0,
+ maximum=10.0,
+ value=3.5,
+ step=0.1,
+ label="Source CFG",
+ info="Guidance scale for the source condition. Can typically remain constant.",
+ )
+ tar_gs = gr.Slider(
+ minimum=1.0,
+ maximum=30.0,
+ value=5.0,
+ step=0.1,
+ label="Target CFG",
+ info="Guidance scale for the target condition. Increase for more significant edits.",
+ )
+ seed = gr.Slider(
+ minimum=0,
+ maximum=10000,
+ value=18,
+ step=1,
+ label="Seed",
+ info="Random seed for reproducibility.",
+ )
+
+ with gr.Row(elem_id="action-buttons"):
+ clear_button = gr.Button("Clear", variant="secondary", scale=1)
+ run_button = gr.Button("Generate", variant="primary", scale=2)
+
+ # Column 3: Output
+ with gr.Column(scale=2, min_width=350):
+ gr.Markdown("### 3. Output Image")
+ output_image = gr.Image(
+ type="pil",
+ label="Edited Result",
+ height=450,
+ width=450,
+ interactive=False,
+ )
+ gr.Markdown(
+ 'After generating, you can use the `recon.py` script to create a 3D model. See the "How to Use" tab for the full command.',
+ elem_classes="help-text",
+ )
+
+ # --- Examples Section ---
+ if os.path.exists(EXAMPLES_PATH):
+ gr.Markdown("---")
+ gr.Markdown("### Click an Example to Load")
+
+ example_inputs = [
+ original_mv_image,
+ src_cond_image,
+ tgt_cond_image,
+ t_steps,
+ n_max,
+ src_gs,
+ tar_gs,
+ ]
+
+ example_data = [
+ [
+ os.path.join(EXAMPLES_PATH, "bike_vintage", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "bike_vintage", "src.png"),
+ os.path.join(EXAMPLES_PATH, "bike_vintage", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "robot_sunglasses", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "robot_sunglasses", "src.png"),
+ os.path.join(EXAMPLES_PATH, "robot_sunglasses", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "stormtrooper_donut", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "stormtrooper_donut", "src.png"),
+ os.path.join(EXAMPLES_PATH, "stormtrooper_donut", "edited.png"),
+ 50,
+ 42,
+ 3.5,
+ 12.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "figure_zombie", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "figure_zombie", "src.png"),
+ os.path.join(EXAMPLES_PATH, "figure_zombie", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "deer_pixar", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "deer_pixar", "src.png"),
+ os.path.join(EXAMPLES_PATH, "deer_pixar", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "german-shep_plush", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "german-shep_plush", "src.png"),
+ os.path.join(EXAMPLES_PATH, "german-shep_plush", "edited.png"),
+ 50,
+ 41,
+ 3.5,
+ 6.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "deer_wings", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "deer_wings", "src.png"),
+ os.path.join(EXAMPLES_PATH, "deer_wings", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 21.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "lego-car_spoiler", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "lego-car_spoiler", "src.png"),
+ os.path.join(EXAMPLES_PATH, "lego-car_spoiler", "edited.png"),
+ 50,
+ 42,
+ 3.5,
+ 12.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "batman_jetpack", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "batman_jetpack", "src.png"),
+ os.path.join(EXAMPLES_PATH, "batman_jetpack", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "bike_sport", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "bike_sport", "src.png"),
+ os.path.join(EXAMPLES_PATH, "bike_sport", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "red-dragon_tail", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "red-dragon_tail", "src.png"),
+ os.path.join(EXAMPLES_PATH, "red-dragon_tail", "edited.png"),
+ 50,
+ 41,
+ 3.5,
+ 6.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "cake_oreo", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "cake_oreo", "src.png"),
+ os.path.join(EXAMPLES_PATH, "cake_oreo", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "bike_harley", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "bike_harley", "src.png"),
+ os.path.join(EXAMPLES_PATH, "bike_harley", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "bike_modern", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "bike_modern", "src.png"),
+ os.path.join(EXAMPLES_PATH, "bike_modern", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "bmw_speedy", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "bmw_speedy", "src.png"),
+ os.path.join(EXAMPLES_PATH, "bmw_speedy", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "batman_backpack", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "batman_backpack", "src.png"),
+ os.path.join(EXAMPLES_PATH, "batman_backpack", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "figure_backpack", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "figure_backpack", "src.png"),
+ os.path.join(EXAMPLES_PATH, "figure_backpack", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "car_cartoon", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "car_cartoon", "src.png"),
+ os.path.join(EXAMPLES_PATH, "car_cartoon", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "car_engine", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "car_engine", "src.png"),
+ os.path.join(EXAMPLES_PATH, "car_engine", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "car_steampunk", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "car_steampunk", "src.png"),
+ os.path.join(EXAMPLES_PATH, "car_steampunk", "edited.png"),
+ 50,
+ 41,
+ 3.5,
+ 6.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "green-dragon_skirt", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "green-dragon_skirt", "src.png"),
+ os.path.join(EXAMPLES_PATH, "green-dragon_skirt", "edited.png"),
+ 50,
+ 41,
+ 3.5,
+ 6.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "gazebo_pagoda", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_pagoda", "src.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_pagoda", "edited.png"),
+ 50,
+ 41,
+ 3.5,
+ 6.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "oasis_magical", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "oasis_magical", "src.png"),
+ os.path.join(EXAMPLES_PATH, "oasis_magical", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "cabin_alpine", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "cabin_alpine", "src.png"),
+ os.path.join(EXAMPLES_PATH, "cabin_alpine", "edited.png"),
+ 50,
+ 42,
+ 3.5,
+ 12.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "cabin_gothic", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "cabin_gothic", "src.png"),
+ os.path.join(EXAMPLES_PATH, "cabin_gothic", "edited.png"),
+ 50,
+ 42,
+ 3.5,
+ 12.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "fox_tuxedo", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "fox_tuxedo", "src.png"),
+ os.path.join(EXAMPLES_PATH, "fox_tuxedo", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "cabin_haunted", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "cabin_haunted", "src.png"),
+ os.path.join(EXAMPLES_PATH, "cabin_haunted", "edited.png"),
+ 50,
+ 42,
+ 3.5,
+ 12.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "fox_eyes", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "fox_eyes", "src.png"),
+ os.path.join(EXAMPLES_PATH, "fox_eyes", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "gazebo_disney", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_disney", "src.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_disney", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "desk_wizard", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "desk_wizard", "src.png"),
+ os.path.join(EXAMPLES_PATH, "desk_wizard", "edited.png"),
+ 50,
+ 42,
+ 3.5,
+ 12.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "gazebo_light", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_light", "src.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_light", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "gazebo_roof", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_roof", "src.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_roof", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "grogu_earphones", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_earphones", "src.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_earphones", "edited.png"),
+ 50,
+ 41,
+ 3.5,
+ 6.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "gazebo_rust", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_rust", "src.png"),
+ os.path.join(EXAMPLES_PATH, "gazebo_rust", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "german-shep_pixar", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "german-shep_pixar", "src.png"),
+ os.path.join(EXAMPLES_PATH, "german-shep_pixar", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "grogu_kimono", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_kimono", "src.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_kimono", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 21.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "ship_fantasy", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "ship_fantasy", "src.png"),
+ os.path.join(EXAMPLES_PATH, "ship_fantasy", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "grogu_lego-fig", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_lego-fig", "src.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_lego-fig", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 21.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "lego-car_spoiler", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "lego-car_spoiler", "src.png"),
+ os.path.join(EXAMPLES_PATH, "lego-car_spoiler", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "nurse_sporty", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "nurse_sporty", "src.png"),
+ os.path.join(EXAMPLES_PATH, "nurse_sporty", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "r2d2_golden", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "r2d2_golden", "src.png"),
+ os.path.join(EXAMPLES_PATH, "r2d2_golden", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "grogu_the-force", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_the-force", "src.png"),
+ os.path.join(EXAMPLES_PATH, "grogu_the-force", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 21.0,
+ 18,
+ ],
+
+ [
+ os.path.join(EXAMPLES_PATH, "spiderbot_chrome", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "spiderbot_chrome", "src.png"),
+ os.path.join(EXAMPLES_PATH, "spiderbot_chrome", "edited.png"),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(
+ EXAMPLES_PATH, "spiderbot_steampunk", "src_mv.png"
+ ),
+ os.path.join(EXAMPLES_PATH, "spiderbot_steampunk", "src.png"),
+ os.path.join(
+ EXAMPLES_PATH, "spiderbot_steampunk", "edited.png"
+ ),
+ 50,
+ 31,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ [
+ os.path.join(EXAMPLES_PATH, "superman_crossed", "src_mv.png"),
+ os.path.join(EXAMPLES_PATH, "superman_crossed", "src.png"),
+ os.path.join(EXAMPLES_PATH, "superman_crossed", "edited.png"),
+ 50,
+ 39,
+ 3.5,
+ 5.0,
+ 18,
+ ],
+ ]
+
+ gr.Examples(
+ examples=example_data,
+ inputs=example_inputs,
+ label="Example Edits",
+ examples_per_page=10,
+ elem_id="example-table"
+
+ )
+
+ with gr.TabItem("How to Use", id=1):
+ gr.Markdown(HOW_TO_USE_TEXT)
+
+ # Define button actions
+ run_button.click(
+ fn=run_main_script,
+ inputs=[
+ src_cond_image,
+ tgt_cond_image,
+ original_mv_image,
+ t_steps,
+ n_max,
+ src_gs,
+ tar_gs,
+ seed,
+ ],
+ outputs=output_image,
+ )
+
+ clear_button.click(
+ fn=clear_inputs,
+ inputs=[],
+ outputs=[
+ original_mv_image,
+ src_cond_image,
+ tgt_cond_image,
+ t_steps,
+ n_max,
+ src_gs,
+ tar_gs,
+ seed,
+ output_image,
+ ],
+ )
+
+if __name__ == "__main__":
+ demo.launch(share=True)
diff --git a/assets/stormtrooper.glb b/assets/stormtrooper.glb
new file mode 100644
index 0000000000000000000000000000000000000000..a61e292d4e58e8f90da0c3889e12f1ce9590ea8b
--- /dev/null
+++ b/assets/stormtrooper.glb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:921f8656fac3332d100969f40724455b7d34565625cf2a29d9e36f6c81b1d1c9
+size 1659928
diff --git a/examples/batman_backpack/edited.png b/examples/batman_backpack/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..e549a6372d87bcb2a169a7860c65db347a5e54f8
--- /dev/null
+++ b/examples/batman_backpack/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b931e0b36421524569533d2076f7885050f376c8a89978ee5b6c5fb91364ebbf
+size 211819
diff --git a/examples/batman_backpack/src.png b/examples/batman_backpack/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..7b9cc64faf353aee3c7e2d9cecdcda73506fe663
--- /dev/null
+++ b/examples/batman_backpack/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0829cda6608dbcc1de82f45c3887fdcfbdf4f5271976ca67b3d3d815ccebf018
+size 94721
diff --git a/examples/batman_backpack/src_mv.png b/examples/batman_backpack/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..0b069016e67984c8f504a0521b4f62fcceb1f4ee
--- /dev/null
+++ b/examples/batman_backpack/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6309e40375714b89f843d3623e00150c79add3d417a8b04317539fcaf3d997e0
+size 171122
diff --git a/examples/batman_jetpack/edited.png b/examples/batman_jetpack/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..5f671ba33a2eb7e56ee894692948ba9dae119e9a
--- /dev/null
+++ b/examples/batman_jetpack/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b312bfaeb098b1fe45ed61824b6e0a9a956e6f8969d880347eb4ac17512a0ac6
+size 223972
diff --git a/examples/batman_jetpack/src.png b/examples/batman_jetpack/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..7b9cc64faf353aee3c7e2d9cecdcda73506fe663
--- /dev/null
+++ b/examples/batman_jetpack/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0829cda6608dbcc1de82f45c3887fdcfbdf4f5271976ca67b3d3d815ccebf018
+size 94721
diff --git a/examples/batman_jetpack/src_mv.png b/examples/batman_jetpack/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..0b069016e67984c8f504a0521b4f62fcceb1f4ee
--- /dev/null
+++ b/examples/batman_jetpack/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6309e40375714b89f843d3623e00150c79add3d417a8b04317539fcaf3d997e0
+size 171122
diff --git a/examples/bike_harley/edited.png b/examples/bike_harley/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..ce97c8c8db1e513b5c6a6e6e37463d42ff4ae969
--- /dev/null
+++ b/examples/bike_harley/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b810cc2f73c107c698f03c2bd8ad4d5e13646ba5a327818670a98c1c7cb96da4
+size 164991
diff --git a/examples/bike_harley/src.png b/examples/bike_harley/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..6392df028a8211c8e3b97e85bf75cfc19e9e3ec5
--- /dev/null
+++ b/examples/bike_harley/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d122fa2e465bf5cfe1856cf913e029fee66d2229ea7b8a5f77254f0acaa44622
+size 180509
diff --git a/examples/bike_harley/src_mv.png b/examples/bike_harley/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..69da597de7ffbf963a1e434ea36bcdabd7d8ad09
--- /dev/null
+++ b/examples/bike_harley/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:343367a134e514ac65e3d06181e61a944ddf86112b52054ba2719383129e1862
+size 356471
diff --git a/examples/bike_modern/edited.png b/examples/bike_modern/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..5ba510dfb1d6b13adbf757414ed49b2ea3f29bce
--- /dev/null
+++ b/examples/bike_modern/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39e655a517446a880a7d9063fe271469bca3e6f7f0a8795f4e2c6d4596b195b9
+size 163977
diff --git a/examples/bike_modern/src.png b/examples/bike_modern/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..6392df028a8211c8e3b97e85bf75cfc19e9e3ec5
--- /dev/null
+++ b/examples/bike_modern/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d122fa2e465bf5cfe1856cf913e029fee66d2229ea7b8a5f77254f0acaa44622
+size 180509
diff --git a/examples/bike_modern/src_mv.png b/examples/bike_modern/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..69da597de7ffbf963a1e434ea36bcdabd7d8ad09
--- /dev/null
+++ b/examples/bike_modern/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:343367a134e514ac65e3d06181e61a944ddf86112b52054ba2719383129e1862
+size 356471
diff --git a/examples/bike_sport/edited.png b/examples/bike_sport/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..60d18e3f77d73749767f6c38e2710456a929d7d7
--- /dev/null
+++ b/examples/bike_sport/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:337d0f6dbe4df38020aa3de73d6acce82801f7efdcaedd37f297d9ec223e6cc4
+size 101239
diff --git a/examples/bike_sport/src.png b/examples/bike_sport/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..416489aa955a408046eb41474123ddd9f47c6412
--- /dev/null
+++ b/examples/bike_sport/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a161a0125c23afc5f79dced0ff10055722fe6379c54e9324aeed894a09ca613c
+size 91702
diff --git a/examples/bike_sport/src_mv.png b/examples/bike_sport/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..5fef5a894cdab38080e16f7a4b2ac94e98ea39b2
--- /dev/null
+++ b/examples/bike_sport/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f2c13a76e416db634e873ecd9b5c492c42e14e4c3310ee1583851c829582b32
+size 290624
diff --git a/examples/bike_vintage/edited.png b/examples/bike_vintage/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..6116d5530c6659504e387c9957d6a804177262c4
--- /dev/null
+++ b/examples/bike_vintage/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8230a205104ffa813e9231068e805be17bbe6c5efb8e41a4dc4a489c5e9206c
+size 171361
diff --git a/examples/bike_vintage/src.png b/examples/bike_vintage/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..6392df028a8211c8e3b97e85bf75cfc19e9e3ec5
--- /dev/null
+++ b/examples/bike_vintage/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d122fa2e465bf5cfe1856cf913e029fee66d2229ea7b8a5f77254f0acaa44622
+size 180509
diff --git a/examples/bike_vintage/src_mv.png b/examples/bike_vintage/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..69da597de7ffbf963a1e434ea36bcdabd7d8ad09
--- /dev/null
+++ b/examples/bike_vintage/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:343367a134e514ac65e3d06181e61a944ddf86112b52054ba2719383129e1862
+size 356471
diff --git a/examples/bmw_speedy/edited.png b/examples/bmw_speedy/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..821408fa220720ca1b13c1b86e77ccb7ad9a8a6b
--- /dev/null
+++ b/examples/bmw_speedy/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d45865fba033e4e599ab4a35198aac39e00bc00404147531bf37bdf25f5cf598
+size 91057
diff --git a/examples/bmw_speedy/src.png b/examples/bmw_speedy/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..0347ce26045ba3becca8a287a477d3ed2e39e71f
--- /dev/null
+++ b/examples/bmw_speedy/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0b7c8e768928d36db07363dd2a5cc7d0df6895118a4a57d1ad417f2dca9cccd
+size 108813
diff --git a/examples/bmw_speedy/src_mv.png b/examples/bmw_speedy/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..22512abbf452fa813779398ca0692094a0be436c
--- /dev/null
+++ b/examples/bmw_speedy/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d914de8ed583b5cf016dc4b601dc72b62f6999885c289a745ee09d86903dbda
+size 178455
diff --git a/examples/cabin_alpine/edited.png b/examples/cabin_alpine/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..a9c7aecbe26966206d3ee6967670f870315d88f4
--- /dev/null
+++ b/examples/cabin_alpine/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be31a56b0aeea55dc8d71c23919017f8ad0c9a1d1c0b67370f37f427df93271c
+size 859027
diff --git a/examples/cabin_alpine/src.png b/examples/cabin_alpine/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..313357f8d4f1bfe27f4df2a417b8c339fcea7d58
--- /dev/null
+++ b/examples/cabin_alpine/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0655a2cca2e8b5e9590228252c239079963425d5f6d9f5ccc698fd78ed02687a
+size 304497
diff --git a/examples/cabin_alpine/src_mv.png b/examples/cabin_alpine/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..3e6ea4588be0de350c07e03c53ac8dea49955a34
--- /dev/null
+++ b/examples/cabin_alpine/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b211a612b264cb26d0b18f2a80f5273716f4d7d5b86a669f4c4ab8cf4167ff6
+size 519695
diff --git a/examples/cabin_gothic/edited.png b/examples/cabin_gothic/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..3c8878e47107524d8d33f9ccbe5c54281ca2f9c0
--- /dev/null
+++ b/examples/cabin_gothic/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7efc56a9f254cda00ec54ce93bec15335ecfe5f6bf10911a262fcd783f2bb8d
+size 881495
diff --git a/examples/cabin_gothic/src.png b/examples/cabin_gothic/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..313357f8d4f1bfe27f4df2a417b8c339fcea7d58
--- /dev/null
+++ b/examples/cabin_gothic/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0655a2cca2e8b5e9590228252c239079963425d5f6d9f5ccc698fd78ed02687a
+size 304497
diff --git a/examples/cabin_gothic/src_mv.png b/examples/cabin_gothic/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..3e6ea4588be0de350c07e03c53ac8dea49955a34
--- /dev/null
+++ b/examples/cabin_gothic/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b211a612b264cb26d0b18f2a80f5273716f4d7d5b86a669f4c4ab8cf4167ff6
+size 519695
diff --git a/examples/cabin_haunted/edited.png b/examples/cabin_haunted/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..446e3e524dfce77f47b745389d569634d4c338a0
--- /dev/null
+++ b/examples/cabin_haunted/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ce7a3b5abda615bd3f1e9694ba0d8925c912ef249b3f0127e7228f29a9ece44
+size 270283
diff --git a/examples/cabin_haunted/src.png b/examples/cabin_haunted/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..313357f8d4f1bfe27f4df2a417b8c339fcea7d58
--- /dev/null
+++ b/examples/cabin_haunted/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0655a2cca2e8b5e9590228252c239079963425d5f6d9f5ccc698fd78ed02687a
+size 304497
diff --git a/examples/cabin_haunted/src_mv.png b/examples/cabin_haunted/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..3e6ea4588be0de350c07e03c53ac8dea49955a34
--- /dev/null
+++ b/examples/cabin_haunted/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b211a612b264cb26d0b18f2a80f5273716f4d7d5b86a669f4c4ab8cf4167ff6
+size 519695
diff --git a/examples/cake_oreo/edited.png b/examples/cake_oreo/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..5c9d7ad6fd6fe8c835067fcd7200c82468946e8b
--- /dev/null
+++ b/examples/cake_oreo/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe262f65660d656341c7780a5ce444eae965980abbba45d44754fde07e6d976f
+size 475666
diff --git a/examples/cake_oreo/src.png b/examples/cake_oreo/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..37db4cb3dce06e1b170948d5d309a312caedfb32
--- /dev/null
+++ b/examples/cake_oreo/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bab8e40296d3df9d572c43b494a3cf7de0374c9209a69765a91e00f4c4c2cba3
+size 251020
diff --git a/examples/cake_oreo/src_mv.png b/examples/cake_oreo/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..f8639de197db8713c8fcadcd47c07a308e7c8be1
--- /dev/null
+++ b/examples/cake_oreo/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8b8961a2dad370cf1dc548aed6d8e563c84f895027095d95872c565c3a7673a
+size 427688
diff --git a/examples/car_cartoon/edited.png b/examples/car_cartoon/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..ca726c7ea8fa9a7f8e1e05749da69049c22db29c
--- /dev/null
+++ b/examples/car_cartoon/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2f31141552cb72b4a5e7b204926d3c6cd7e36651edc2924a3d5d8e10437694e
+size 93554
diff --git a/examples/car_cartoon/src.png b/examples/car_cartoon/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..242c60181c683b7705b74026fc69534ed789c509
--- /dev/null
+++ b/examples/car_cartoon/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10dbdbdfe282d8f232caf66c6d84f9116639bcc50b8e70b2cde2a0f352fdccfe
+size 102456
diff --git a/examples/car_cartoon/src_mv.png b/examples/car_cartoon/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..69972374bec1d6e783771d935fea1cc3ee4fd8bf
--- /dev/null
+++ b/examples/car_cartoon/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0bf471ec55d12cac52b73620503439b3910fc2e31c8f7569d28c80beb57b1bb
+size 183102
diff --git a/examples/car_engine/edited.png b/examples/car_engine/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..884ce8c832896fda3308698e742055d956282308
--- /dev/null
+++ b/examples/car_engine/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d506435b214bb037f0c65312b8c29b356c79c31db7abc1c5e9776c38a1890fca
+size 254825
diff --git a/examples/car_engine/src.png b/examples/car_engine/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..242c60181c683b7705b74026fc69534ed789c509
--- /dev/null
+++ b/examples/car_engine/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10dbdbdfe282d8f232caf66c6d84f9116639bcc50b8e70b2cde2a0f352fdccfe
+size 102456
diff --git a/examples/car_engine/src_mv.png b/examples/car_engine/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..69972374bec1d6e783771d935fea1cc3ee4fd8bf
--- /dev/null
+++ b/examples/car_engine/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0bf471ec55d12cac52b73620503439b3910fc2e31c8f7569d28c80beb57b1bb
+size 183102
diff --git a/examples/car_steampunk/edited.png b/examples/car_steampunk/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..372e6cf1bc4e36061e02e03e4e973fc35a99fb98
--- /dev/null
+++ b/examples/car_steampunk/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f1f047c038f82081928c1c008b71529fe683bdbedf23814729f07156bb04405
+size 94126
diff --git a/examples/car_steampunk/src.png b/examples/car_steampunk/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..242c60181c683b7705b74026fc69534ed789c509
--- /dev/null
+++ b/examples/car_steampunk/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10dbdbdfe282d8f232caf66c6d84f9116639bcc50b8e70b2cde2a0f352fdccfe
+size 102456
diff --git a/examples/car_steampunk/src_mv.png b/examples/car_steampunk/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..69972374bec1d6e783771d935fea1cc3ee4fd8bf
--- /dev/null
+++ b/examples/car_steampunk/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0bf471ec55d12cac52b73620503439b3910fc2e31c8f7569d28c80beb57b1bb
+size 183102
diff --git a/examples/deer_pixar/edited.png b/examples/deer_pixar/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..8f7c748b007b86e03d40df13931167bfe683c2af
--- /dev/null
+++ b/examples/deer_pixar/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3851883c3f4985b580179430dd59ecabb2ab307d2fb2fac9b1d098ab8335c3d9
+size 124923
diff --git a/examples/deer_pixar/src.png b/examples/deer_pixar/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..4ded9777d65029234e51fd1e959a4b652bb8ee57
--- /dev/null
+++ b/examples/deer_pixar/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4283b7e23c6a7173982dd16f6943aaa3fc1367b0b20a4736c5b4b4ffeee0a1d5
+size 106593
diff --git a/examples/deer_pixar/src_mv.png b/examples/deer_pixar/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..42d97bb41764280f8e6f01a0f6c88c2ab252cb26
--- /dev/null
+++ b/examples/deer_pixar/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:402f558bf62ec8a269997bba9a5dc783d9ab8fafd963da897681b8b1494f6634
+size 215417
diff --git a/examples/deer_wings/edited.png b/examples/deer_wings/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..ec01357c4034b300cab66f57228d438b1f4fdf54
--- /dev/null
+++ b/examples/deer_wings/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5a7f916061401398a1af50bf95e2c6d85195ae9869bf32ed7853d75edcb6566
+size 440500
diff --git a/examples/deer_wings/output/expected.png b/examples/deer_wings/output/expected.png
new file mode 100644
index 0000000000000000000000000000000000000000..c2d7c7f21c573e28f36d245346ad2fecba8e12e7
--- /dev/null
+++ b/examples/deer_wings/output/expected.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b12c9a3b96215d667329d4909e6d1bf5f3ee785be3781f10aae0fa930e4cb62
+size 556132
diff --git a/examples/deer_wings/src.png b/examples/deer_wings/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..4ded9777d65029234e51fd1e959a4b652bb8ee57
--- /dev/null
+++ b/examples/deer_wings/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4283b7e23c6a7173982dd16f6943aaa3fc1367b0b20a4736c5b4b4ffeee0a1d5
+size 106593
diff --git a/examples/deer_wings/src_mv.png b/examples/deer_wings/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..42d97bb41764280f8e6f01a0f6c88c2ab252cb26
--- /dev/null
+++ b/examples/deer_wings/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:402f558bf62ec8a269997bba9a5dc783d9ab8fafd963da897681b8b1494f6634
+size 215417
diff --git a/examples/desk_wizard/edited.png b/examples/desk_wizard/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..4fce7eb49ccf8bce0f0778ac6dd54d8652d2b3f9
--- /dev/null
+++ b/examples/desk_wizard/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b100e3edafef41e5414531e7f76b1391caca1f2d4de9685af0173c4b90f8c0d
+size 185536
diff --git a/examples/desk_wizard/src.png b/examples/desk_wizard/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..0eeacde0d3b28f2ae21014aea4409d6e5ed3df95
--- /dev/null
+++ b/examples/desk_wizard/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:beed78eccce5a36a39038ff7d21ced98b350104ef169bb721ecfcdff29aef400
+size 202095
diff --git a/examples/desk_wizard/src_mv.png b/examples/desk_wizard/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..588fde4139229d5570200b6782a6408265b70ec9
--- /dev/null
+++ b/examples/desk_wizard/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88b8c9f1a4f84b29da0990d22ee2bb1a3c4449b607ceb87783b31f9ec8e9d7d4
+size 424371
diff --git a/examples/figure_backpack/edited.png b/examples/figure_backpack/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..6de12f814f8101323831b87ec10e979fbc48ca68
--- /dev/null
+++ b/examples/figure_backpack/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12a43505bed404a07dfdde643bd81a4fe38c252c1ff530220ff05ab296fbe8ff
+size 281040
diff --git a/examples/figure_backpack/src.png b/examples/figure_backpack/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..5e77fa42bbe939067c9baa054809d828f8b4defe
--- /dev/null
+++ b/examples/figure_backpack/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57c0dbfffe6e3d1a6cfc052b90bf5f5bb97cd70b88550bae77e074213ba85eb5
+size 105256
diff --git a/examples/figure_backpack/src_mv.png b/examples/figure_backpack/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..a67870bb8659246d346817b499fd64f52de0ae35
--- /dev/null
+++ b/examples/figure_backpack/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e1c207dcf2e859f679828925c30760b21fb4a370f724f4c07bf005736e21410
+size 188907
diff --git a/examples/figure_zombie/edited.png b/examples/figure_zombie/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..f384768b56ec97e5fe1726cd8a39994cc99571b6
--- /dev/null
+++ b/examples/figure_zombie/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae4b387a3e566dccf7719d8f69d4580278e1e06819cd97fa3926bf47c0eacb84
+size 265916
diff --git a/examples/figure_zombie/src.png b/examples/figure_zombie/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..fbcbc0eb00d68a908783ce2ffab4ecfc0cd0c514
--- /dev/null
+++ b/examples/figure_zombie/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea5358bfb25731dd565ff9326680d73fa61fb2144a1b1a58f891e3d03dacac9a
+size 109657
diff --git a/examples/figure_zombie/src_mv.png b/examples/figure_zombie/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..92480d9cbc4499cd6f2feae70a05fafd371c92e5
--- /dev/null
+++ b/examples/figure_zombie/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8241b6618e97e27f8abb40157dc39c038c8576a56a4300990eef85dd93ee8bd
+size 189195
diff --git a/examples/fox_eyes/edited.png b/examples/fox_eyes/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..a02d3c3bf8ef79bf7d4d24d1dc4bdbca9e859fb2
--- /dev/null
+++ b/examples/fox_eyes/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ddca6411e9231de47e0492fb5f840abf0afdfec610f045e375a1864c13a8849
+size 220288
diff --git a/examples/fox_eyes/src.png b/examples/fox_eyes/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..00873d1c5ddac03d0d3d017c28b26d6e191233a5
--- /dev/null
+++ b/examples/fox_eyes/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cb23e9ccbea032367aa721444814d5772f27c3bf31cc0814a6159ec71dd5fa9
+size 129432
diff --git a/examples/fox_eyes/src_mv.png b/examples/fox_eyes/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..789ac505a31a85d157b3d35005848f30684e47a3
--- /dev/null
+++ b/examples/fox_eyes/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5d086775b1bba7851c4e6867cf95518024f9abe2df7290694c74f244da5e6aa
+size 202552
diff --git a/examples/fox_tuxedo/edited.png b/examples/fox_tuxedo/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..a02d3c3bf8ef79bf7d4d24d1dc4bdbca9e859fb2
--- /dev/null
+++ b/examples/fox_tuxedo/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ddca6411e9231de47e0492fb5f840abf0afdfec610f045e375a1864c13a8849
+size 220288
diff --git a/examples/fox_tuxedo/src.png b/examples/fox_tuxedo/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..00873d1c5ddac03d0d3d017c28b26d6e191233a5
--- /dev/null
+++ b/examples/fox_tuxedo/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cb23e9ccbea032367aa721444814d5772f27c3bf31cc0814a6159ec71dd5fa9
+size 129432
diff --git a/examples/fox_tuxedo/src_mv.png b/examples/fox_tuxedo/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..789ac505a31a85d157b3d35005848f30684e47a3
--- /dev/null
+++ b/examples/fox_tuxedo/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5d086775b1bba7851c4e6867cf95518024f9abe2df7290694c74f244da5e6aa
+size 202552
diff --git a/examples/gazebo_disney/edited.png b/examples/gazebo_disney/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..83027f11c67ed6e9802b5f29fb9c4f20e91637a3
--- /dev/null
+++ b/examples/gazebo_disney/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7ac343b7a29a30a8c18ff8d532f8faf484edfc038596584b168be81f6c48a0a
+size 208864
diff --git a/examples/gazebo_disney/src.png b/examples/gazebo_disney/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..55353c6ef3439304933aa6d3316dbf64dd42b5fb
--- /dev/null
+++ b/examples/gazebo_disney/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:631fa35574af0eeb1f58ea7379d144ef5b6bd81e7862ac3b7e8b07d3de423f79
+size 252424
diff --git a/examples/gazebo_disney/src_mv.png b/examples/gazebo_disney/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3016cd0cee241c07c09518964ee69e76cc935d8
--- /dev/null
+++ b/examples/gazebo_disney/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a305b4297c9a16340ce738978222d3fe784938e57fda6281550e7b804c011ebb
+size 470796
diff --git a/examples/gazebo_light/edited.png b/examples/gazebo_light/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..a249a7e75b051e9eb737935ab3f76c5082cd7bf7
--- /dev/null
+++ b/examples/gazebo_light/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e249be567fa40011a4a182b342e5279e8ac07dabf580f09e23d0ead7168980
+size 227478
diff --git a/examples/gazebo_light/src.png b/examples/gazebo_light/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..55353c6ef3439304933aa6d3316dbf64dd42b5fb
--- /dev/null
+++ b/examples/gazebo_light/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:631fa35574af0eeb1f58ea7379d144ef5b6bd81e7862ac3b7e8b07d3de423f79
+size 252424
diff --git a/examples/gazebo_light/src_mv.png b/examples/gazebo_light/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3016cd0cee241c07c09518964ee69e76cc935d8
--- /dev/null
+++ b/examples/gazebo_light/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a305b4297c9a16340ce738978222d3fe784938e57fda6281550e7b804c011ebb
+size 470796
diff --git a/examples/gazebo_pagoda/edited.png b/examples/gazebo_pagoda/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..bf8c54a3f7de0ac7a32201328f8287d9ded4db59
--- /dev/null
+++ b/examples/gazebo_pagoda/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42445e7493b114bd187da22176ce4f7197da4e629b2d13c7b0e3470c00f30a9e
+size 640285
diff --git a/examples/gazebo_pagoda/src.png b/examples/gazebo_pagoda/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..55353c6ef3439304933aa6d3316dbf64dd42b5fb
--- /dev/null
+++ b/examples/gazebo_pagoda/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:631fa35574af0eeb1f58ea7379d144ef5b6bd81e7862ac3b7e8b07d3de423f79
+size 252424
diff --git a/examples/gazebo_pagoda/src_mv.png b/examples/gazebo_pagoda/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3016cd0cee241c07c09518964ee69e76cc935d8
--- /dev/null
+++ b/examples/gazebo_pagoda/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a305b4297c9a16340ce738978222d3fe784938e57fda6281550e7b804c011ebb
+size 470796
diff --git a/examples/gazebo_roof/edited.png b/examples/gazebo_roof/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..22c8e34ae73b0730b88eb304ac7d6129efc1db10
--- /dev/null
+++ b/examples/gazebo_roof/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3df85b8c94f29f514bfe07d0fb6705584c7c1b5b1c69fbe8255c027429b4ccab
+size 471945
diff --git a/examples/gazebo_roof/src.png b/examples/gazebo_roof/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..55353c6ef3439304933aa6d3316dbf64dd42b5fb
--- /dev/null
+++ b/examples/gazebo_roof/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:631fa35574af0eeb1f58ea7379d144ef5b6bd81e7862ac3b7e8b07d3de423f79
+size 252424
diff --git a/examples/gazebo_roof/src_mv.png b/examples/gazebo_roof/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3016cd0cee241c07c09518964ee69e76cc935d8
--- /dev/null
+++ b/examples/gazebo_roof/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a305b4297c9a16340ce738978222d3fe784938e57fda6281550e7b804c011ebb
+size 470796
diff --git a/examples/gazebo_rust/edited.png b/examples/gazebo_rust/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..c6bc296a9196f6aac7df688084c15c32b25417f6
--- /dev/null
+++ b/examples/gazebo_rust/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a54def20e1899cdf42f83375d0f45bcf2c66710e76038a1e7f902117b9fc2f6e
+size 234300
diff --git a/examples/gazebo_rust/src.png b/examples/gazebo_rust/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..55353c6ef3439304933aa6d3316dbf64dd42b5fb
--- /dev/null
+++ b/examples/gazebo_rust/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:631fa35574af0eeb1f58ea7379d144ef5b6bd81e7862ac3b7e8b07d3de423f79
+size 252424
diff --git a/examples/gazebo_rust/src_mv.png b/examples/gazebo_rust/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3016cd0cee241c07c09518964ee69e76cc935d8
--- /dev/null
+++ b/examples/gazebo_rust/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a305b4297c9a16340ce738978222d3fe784938e57fda6281550e7b804c011ebb
+size 470796
diff --git a/examples/german-shep_pixar/edited.png b/examples/german-shep_pixar/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..fcc857306304761aaeb2bab50f7ce5d0a2d7c5b0
--- /dev/null
+++ b/examples/german-shep_pixar/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:536b7efc741d1800a8f1e4d293c9673051e9125e20464c28d4821da3fa35cbdc
+size 87295
diff --git a/examples/german-shep_pixar/src.png b/examples/german-shep_pixar/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..252cbfebfb89f45c66dabd334e4843ed9630c3f0
--- /dev/null
+++ b/examples/german-shep_pixar/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67f6b29caff5c68f8c8f177ebd71b54870bec69a1e2f2221b54f23cb5a321fb5
+size 105010
diff --git a/examples/german-shep_pixar/src_mv.png b/examples/german-shep_pixar/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..7e83f5b7848b3a3565ecb9fbcfd46c2e114bee38
--- /dev/null
+++ b/examples/german-shep_pixar/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce6937a6d716fc2920e60477de5d3bc46f361bc283e12abd4c2bd717cbe14356
+size 197799
diff --git a/examples/german-shep_plush/edited.png b/examples/german-shep_plush/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..f6c81cf38809e25b3100be4a5c15a3621a2691ae
--- /dev/null
+++ b/examples/german-shep_plush/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:041a871f13a42f277121dcd95544a8aa3f64dcec2d9372bfbebae5c419feaf75
+size 120424
diff --git a/examples/german-shep_plush/src.png b/examples/german-shep_plush/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..252cbfebfb89f45c66dabd334e4843ed9630c3f0
--- /dev/null
+++ b/examples/german-shep_plush/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67f6b29caff5c68f8c8f177ebd71b54870bec69a1e2f2221b54f23cb5a321fb5
+size 105010
diff --git a/examples/german-shep_plush/src_mv.png b/examples/german-shep_plush/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..7e83f5b7848b3a3565ecb9fbcfd46c2e114bee38
--- /dev/null
+++ b/examples/german-shep_plush/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce6937a6d716fc2920e60477de5d3bc46f361bc283e12abd4c2bd717cbe14356
+size 197799
diff --git a/examples/green-dragon_skirt/edited.png b/examples/green-dragon_skirt/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..d2128a7f490506f5b6415a73405ed57f31d7b587
--- /dev/null
+++ b/examples/green-dragon_skirt/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71a744084e3a388c1dd1c71abd9a9808d1a2bd1295ec89e4915176175f4c6010
+size 323566
diff --git a/examples/green-dragon_skirt/src.png b/examples/green-dragon_skirt/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..e2f102f2262d8172a0346903405d75aecd194624
--- /dev/null
+++ b/examples/green-dragon_skirt/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45e7272e980a9ed09065f7ab59c9b10a75b81f3ea5fcce7a90b075e1b5735d13
+size 157831
diff --git a/examples/green-dragon_skirt/src_mv.png b/examples/green-dragon_skirt/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..6aad0c36e278d23212656e26fc9170e73d5f0d49
--- /dev/null
+++ b/examples/green-dragon_skirt/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9169a7378a7ae079f392ccf5a15e440de0c93b35dd1ab4c732817ad4371c40e
+size 325560
diff --git a/examples/grogu_earphones/edited.png b/examples/grogu_earphones/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..d65a477bc3e60958f45e82ff509022b499d6878a
--- /dev/null
+++ b/examples/grogu_earphones/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f09740650379238901fc0eb63ade8a8ddc2b9a7cba0c929bff6d8dc17c202397
+size 761794
diff --git a/examples/grogu_earphones/src.png b/examples/grogu_earphones/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..5734d814868fc2781ba999b0f0d3ca1631074ab8
--- /dev/null
+++ b/examples/grogu_earphones/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3e9ff66c27255e28a80a910096fcc9f461c9c72a5854898e1f9385abca342ce
+size 301766
diff --git a/examples/grogu_earphones/src_mv.png b/examples/grogu_earphones/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..d3382d4ce2ffcc5a4742fa19a5aac7b3aa1b1ad2
--- /dev/null
+++ b/examples/grogu_earphones/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ac30331d658c10a45e7be48433ab7224ab0430a8e30370a512e46b5928f71e
+size 532855
diff --git a/examples/grogu_kimono/edited.png b/examples/grogu_kimono/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..a8888c24b5a5d1187ce0d15aa9d435c9f6ca9c3c
--- /dev/null
+++ b/examples/grogu_kimono/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b80bbd8d906c0d92a3e56f7f962ae1790ef187c28c3676eef42a7b88dd49796
+size 541011
diff --git a/examples/grogu_kimono/src.png b/examples/grogu_kimono/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..5734d814868fc2781ba999b0f0d3ca1631074ab8
--- /dev/null
+++ b/examples/grogu_kimono/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3e9ff66c27255e28a80a910096fcc9f461c9c72a5854898e1f9385abca342ce
+size 301766
diff --git a/examples/grogu_kimono/src_mv.png b/examples/grogu_kimono/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..d3382d4ce2ffcc5a4742fa19a5aac7b3aa1b1ad2
--- /dev/null
+++ b/examples/grogu_kimono/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ac30331d658c10a45e7be48433ab7224ab0430a8e30370a512e46b5928f71e
+size 532855
diff --git a/examples/grogu_lego-fig/edited.png b/examples/grogu_lego-fig/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..ec4f1387aa3e0bfc38d885a90c0611313990c87a
--- /dev/null
+++ b/examples/grogu_lego-fig/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:63c9b367e5a01a8fcec4ab2cb8d3638fe70d012d3ebd07f0ef40769858fd1029
+size 278338
diff --git a/examples/grogu_lego-fig/src.png b/examples/grogu_lego-fig/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..5734d814868fc2781ba999b0f0d3ca1631074ab8
--- /dev/null
+++ b/examples/grogu_lego-fig/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3e9ff66c27255e28a80a910096fcc9f461c9c72a5854898e1f9385abca342ce
+size 301766
diff --git a/examples/grogu_lego-fig/src_mv.png b/examples/grogu_lego-fig/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..d3382d4ce2ffcc5a4742fa19a5aac7b3aa1b1ad2
--- /dev/null
+++ b/examples/grogu_lego-fig/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ac30331d658c10a45e7be48433ab7224ab0430a8e30370a512e46b5928f71e
+size 532855
diff --git a/examples/grogu_the-force/edited.png b/examples/grogu_the-force/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..9467dacad281beee48aa5407fb2ab435a1e8b105
--- /dev/null
+++ b/examples/grogu_the-force/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41d9b992b2dc5e5e0b7a58ca9a2acdcd3c2104504442bd3b47c1292d3c6f7af8
+size 710198
diff --git a/examples/grogu_the-force/src.png b/examples/grogu_the-force/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..5734d814868fc2781ba999b0f0d3ca1631074ab8
--- /dev/null
+++ b/examples/grogu_the-force/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3e9ff66c27255e28a80a910096fcc9f461c9c72a5854898e1f9385abca342ce
+size 301766
diff --git a/examples/grogu_the-force/src_mv.png b/examples/grogu_the-force/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..d3382d4ce2ffcc5a4742fa19a5aac7b3aa1b1ad2
--- /dev/null
+++ b/examples/grogu_the-force/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ac30331d658c10a45e7be48433ab7224ab0430a8e30370a512e46b5928f71e
+size 532855
diff --git a/examples/lego-car_spoiler/edited.png b/examples/lego-car_spoiler/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..c8b6dbed5f64c6f219900c2a0454b796c51b32e8
--- /dev/null
+++ b/examples/lego-car_spoiler/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d0356a19c21059941e58f4a4da071ca75c9ecec7311cfdac2e99b1672862da6
+size 314538
diff --git a/examples/lego-car_spoiler/src.png b/examples/lego-car_spoiler/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..0c86acf4508a666e09487093240e6ada436e4d35
--- /dev/null
+++ b/examples/lego-car_spoiler/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86e13c3942880537e9facb8b5361671398e70c7d95249a9735c30fccf877907a
+size 156501
diff --git a/examples/lego-car_spoiler/src_mv.png b/examples/lego-car_spoiler/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..aae7a927f1c1211e8137e699cdada1305af289c4
--- /dev/null
+++ b/examples/lego-car_spoiler/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12ee65894bb3ea07aa8850c94342b0ae059d7aed0c7bca5d51b046580b49510e
+size 256467
diff --git a/examples/nurse_sporty/edited.png b/examples/nurse_sporty/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..2c72cfc51f79b0f1867888d422960cf550bf9fd1
--- /dev/null
+++ b/examples/nurse_sporty/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d09b6a9bb40c97f0317c6fb1460bccf73bcf02dba72b91ba05bfc0fbd5ffcd0
+size 94325
diff --git a/examples/nurse_sporty/src.png b/examples/nurse_sporty/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..b806340b3528e4f587f69d046a23423597d9f78a
--- /dev/null
+++ b/examples/nurse_sporty/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a97558a8532d476b436a258240cb5ad1d9303efd945851ed69f897f2aa573030
+size 84455
diff --git a/examples/nurse_sporty/src_mv.png b/examples/nurse_sporty/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..0974376bbbbab36414c71fcd721a21d4b700de85
--- /dev/null
+++ b/examples/nurse_sporty/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97a9e32374ce4fab1e12063a78156a6bcd134488a68d2f9d92fb3e2703fde132
+size 146028
diff --git a/examples/oasis_magical/edited.png b/examples/oasis_magical/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..822dc1ec8c88731edd965541c29095705377bc6e
--- /dev/null
+++ b/examples/oasis_magical/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ca245181688fa4d777f95c93ab26c8c432bb7c1e49df17706828bd720161ee0
+size 198404
diff --git a/examples/oasis_magical/src.png b/examples/oasis_magical/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..33b03c8a72617369bd98a406b2c7f364746d6045
--- /dev/null
+++ b/examples/oasis_magical/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6de9c44ea90542208df5a24933ea2177201460a9fbd5e715ffa394715568d5fb
+size 220033
diff --git a/examples/oasis_magical/src_mv.png b/examples/oasis_magical/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..8ddd65dd68da26c1c9384111ded9ec0f4de346b8
--- /dev/null
+++ b/examples/oasis_magical/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:397b18085dcb8ffc4daa6b05554f1533f3c072602f4fc74ec6caf08303cd83f2
+size 340066
diff --git a/examples/r2d2_golden/edited.png b/examples/r2d2_golden/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..5347a2ebe623f0614703160889f0354334a16a15
--- /dev/null
+++ b/examples/r2d2_golden/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a689f6d7f47b7b66205c40ba6fbed2d570f4cae712ecb7ca14fff199beb6bdd
+size 215874
diff --git a/examples/r2d2_golden/src.png b/examples/r2d2_golden/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..fb9d09162edc802d18713e96d3fcc19718983b1d
--- /dev/null
+++ b/examples/r2d2_golden/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfaefd28dc01be8b9b2b27a67979d2ce06812a75283ed2077cd219e045095c57
+size 231139
diff --git a/examples/r2d2_golden/src_mv.png b/examples/r2d2_golden/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..5cc933f06a1ccd835d8c54702397c98530bd4ed3
--- /dev/null
+++ b/examples/r2d2_golden/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c7ea59150542e22b01b34b643bb6cd28d50dc2b0701dcfc4c0c946303f023d3
+size 421552
diff --git a/examples/red-dragon_tail/edited.png b/examples/red-dragon_tail/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..6ada7e3739dde1f380a779034ff5277728e6b970
--- /dev/null
+++ b/examples/red-dragon_tail/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4a599d457f4e1e901fd8eacf6e865b15a0227f7deddcf0ce7756e74b5809ec5
+size 282077
diff --git a/examples/red-dragon_tail/src.png b/examples/red-dragon_tail/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..a56934b2cde7172665f9b784ff0dca7c9762085d
--- /dev/null
+++ b/examples/red-dragon_tail/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f416ee4d117c2eb7741d8c99b606a688eaaa1fae4cf4c307cce031053c58a1f
+size 125444
diff --git a/examples/red-dragon_tail/src_mv.png b/examples/red-dragon_tail/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..aab5feffc89d8886ec0ce99de492cc7cd48227f6
--- /dev/null
+++ b/examples/red-dragon_tail/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c0e0be7dc35d10839871541d6f85b365e0c7da0e7b64065ac0955686eced885
+size 241876
diff --git a/examples/robot_sunglasses/edited.png b/examples/robot_sunglasses/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..0928c0c161324a36a00f68363f94fbb749d5f7ec
--- /dev/null
+++ b/examples/robot_sunglasses/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1baecfc567dcd06459a677c7d254fade2e73c9f52405abdaa42fb2665d5153eb
+size 439986
diff --git a/examples/robot_sunglasses/output/expected.png b/examples/robot_sunglasses/output/expected.png
new file mode 100644
index 0000000000000000000000000000000000000000..27934677ce93790e6750868a573dd2694795dcc1
--- /dev/null
+++ b/examples/robot_sunglasses/output/expected.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1024252ad60faf364ebc3de593c483dea358daf4b30b7664892cba4ced799ef
+size 485565
diff --git a/examples/robot_sunglasses/src.png b/examples/robot_sunglasses/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..7ce63f43896eb27bd200e062ef6dc386fbe2a2a8
--- /dev/null
+++ b/examples/robot_sunglasses/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61bb64b6314a7c36f08d87b7fc97f44b7b34b99a8635a907572319ef05ee5274
+size 215827
diff --git a/examples/robot_sunglasses/src_mv.png b/examples/robot_sunglasses/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..b338a757a412aaf9d3f265e9144c5644bb35fbd7
--- /dev/null
+++ b/examples/robot_sunglasses/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f8161aa18b17a26b31bf6c59f8b518107928af0220ba789c4405e6adf49a2d6
+size 387049
diff --git a/examples/ship_fantasy/edited.png b/examples/ship_fantasy/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..67eb897032ec2b5216ab9ae8bd811d49dbbb5271
--- /dev/null
+++ b/examples/ship_fantasy/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5d1e1fa480e8c2a6907d3e54ae2294abff64edf5e2bffb23d58aeee21c8c729
+size 203636
diff --git a/examples/ship_fantasy/src.png b/examples/ship_fantasy/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..aec97a75ad905ca012216c4eaca115510a725cee
--- /dev/null
+++ b/examples/ship_fantasy/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72dd2048af05effb5d12cb45322b91ae419a19ade7f8b738fa5c25f00201fa32
+size 197493
diff --git a/examples/ship_fantasy/src_mv.png b/examples/ship_fantasy/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..b02af274a2ce27ebf8a75e967228c4ded2e5b0dd
--- /dev/null
+++ b/examples/ship_fantasy/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1dddd093e479c05f86d12dd0991bbbe0aa8474eadf74fe4c008957e869cb6c2
+size 366609
diff --git a/examples/spiderbot_chrome/edited.png b/examples/spiderbot_chrome/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..aa225500eed933720b8b51ebe238d1b57aa90128
--- /dev/null
+++ b/examples/spiderbot_chrome/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:679c255f8a83a36a12bf51b09b2c858bdc36eac1fa0b042f7a2b980e66d62fa1
+size 177923
diff --git a/examples/spiderbot_chrome/src.png b/examples/spiderbot_chrome/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..c0e37031b187d3d73cb67f958d13f18699e98e52
--- /dev/null
+++ b/examples/spiderbot_chrome/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9171f3b894e12f8c9f5a821ee22a265e0189605f44639bf1d9d24fe1ce2b3596
+size 189699
diff --git a/examples/spiderbot_chrome/src_mv.png b/examples/spiderbot_chrome/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..13b663701bfe2876b321204c01695cde99a5a9a3
--- /dev/null
+++ b/examples/spiderbot_chrome/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:693d830ad2c15b7e7d34ce7a4669642c8a7635c8eb7aa83daafe66e29d1889f2
+size 363038
diff --git a/examples/spiderbot_steampunk/edited.png b/examples/spiderbot_steampunk/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..42a095b3c9a6bc194e0e8f2f82f52757092dae78
--- /dev/null
+++ b/examples/spiderbot_steampunk/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:466fa790fb3e8671e0981f3c3e6e2c4e141d2f14184aaa89563cae78087db236
+size 173473
diff --git a/examples/spiderbot_steampunk/src.png b/examples/spiderbot_steampunk/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..c0e37031b187d3d73cb67f958d13f18699e98e52
--- /dev/null
+++ b/examples/spiderbot_steampunk/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9171f3b894e12f8c9f5a821ee22a265e0189605f44639bf1d9d24fe1ce2b3596
+size 189699
diff --git a/examples/spiderbot_steampunk/src_mv.png b/examples/spiderbot_steampunk/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..13b663701bfe2876b321204c01695cde99a5a9a3
--- /dev/null
+++ b/examples/spiderbot_steampunk/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:693d830ad2c15b7e7d34ce7a4669642c8a7635c8eb7aa83daafe66e29d1889f2
+size 363038
diff --git a/examples/stormtrooper_donut/edited.png b/examples/stormtrooper_donut/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..890046731a9beeb6e18cd1e6d68e2f562d684e18
--- /dev/null
+++ b/examples/stormtrooper_donut/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25b618f0adb791ef54b3e8a1721703a17934bd07d1c37a7f3e77d0a0b96b181f
+size 234289
diff --git a/examples/stormtrooper_donut/src.png b/examples/stormtrooper_donut/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..2fa852ec04d2c269361b57d6f30239a7a1394438
--- /dev/null
+++ b/examples/stormtrooper_donut/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f301631774f0c6a471ceb0be1dc3395cab330034d5eb2cb7b4470ed9af03c414
+size 115832
diff --git a/examples/stormtrooper_donut/src_mv.png b/examples/stormtrooper_donut/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..63d254c766929d7e4a3ca0d5c729c06398121d85
--- /dev/null
+++ b/examples/stormtrooper_donut/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:389e58719d4054061a5d9437857a952ca76d81c72484fe8092c300c26034c7d6
+size 203964
diff --git a/examples/superman_crossed/edited.png b/examples/superman_crossed/edited.png
new file mode 100644
index 0000000000000000000000000000000000000000..e1c95f17feb986739a2e1367c407f49e6d272c76
--- /dev/null
+++ b/examples/superman_crossed/edited.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6cff6b2d03937c94e86f9d2a767abcf35d353739375266d11f8ffc7864a4b77
+size 218512
diff --git a/examples/superman_crossed/src.png b/examples/superman_crossed/src.png
new file mode 100644
index 0000000000000000000000000000000000000000..056e5942048104645162208b27a423162becd0d1
--- /dev/null
+++ b/examples/superman_crossed/src.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c02def0b51c7948e60ed2d15f39f27ba2ff6db1f25a466ba1708226fe420420
+size 113692
diff --git a/examples/superman_crossed/src_mv.png b/examples/superman_crossed/src_mv.png
new file mode 100644
index 0000000000000000000000000000000000000000..8fadc3b4c62ec39cf042064fdbfcb01b75737bd2
--- /dev/null
+++ b/examples/superman_crossed/src_mv.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9001135f9c58eb2f5d5485d7994623b279bc075c02d4185c5f5233f7e585971
+size 184764
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..88fadec2914b79f46eb5b4e7eec0f9cca3a99a63
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,10 @@
+torch
+torchvision
+diffusers==0.30.1
+transformers
+accelerate
+pillow
+huggingface_hub
+numpy
+tqdm
+gradio
\ No newline at end of file
diff --git a/resources/mv-gallery/1/edit/0.png b/resources/mv-gallery/1/edit/0.png
new file mode 100644
index 0000000000000000000000000000000000000000..dc9ce253c1f9cd270c2cb511e5908e821e82d1ca
--- /dev/null
+++ b/resources/mv-gallery/1/edit/0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2bf89561c1d1a0b3edbf26c436f3dd465c6a606b516ae05760505464d221fa9
+size 45127
diff --git a/resources/mv-gallery/1/edit/1.png b/resources/mv-gallery/1/edit/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..6eb65e9516178287cb67cc2b7159f13e1f1296dc
--- /dev/null
+++ b/resources/mv-gallery/1/edit/1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20188d7a128cdc1fbf9127fa1bf84bbbed95bdf320fd6b51f49413d7e4b12861
+size 50163
diff --git a/resources/mv-gallery/1/edit/2.png b/resources/mv-gallery/1/edit/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..554b4d8325317ed5bdea68e2996294916c43b9a0
--- /dev/null
+++ b/resources/mv-gallery/1/edit/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ecd5ff28dccb9d4f9aff53fb84dc384750e1bfccd25bbc9b523f4efa89d19ad
+size 51846
diff --git a/resources/mv-gallery/1/edit/prompt.png b/resources/mv-gallery/1/edit/prompt.png
new file mode 100644
index 0000000000000000000000000000000000000000..af8089b410c4714f97c38c76023cc7be54414884
--- /dev/null
+++ b/resources/mv-gallery/1/edit/prompt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32ec575e7468054f8bd57f1db3a1a2831b99405887d4951d45011cb179644633
+size 40538
diff --git a/resources/mv-gallery/1/edit2/0.png b/resources/mv-gallery/1/edit2/0.png
new file mode 100644
index 0000000000000000000000000000000000000000..83d6f61013a69341d6863756f545676453d6ecbb
--- /dev/null
+++ b/resources/mv-gallery/1/edit2/0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86d128a57586533fbb9df0d03d73ca8cd5e74e19056df11391286feeb25800d5
+size 56827
diff --git a/resources/mv-gallery/1/edit2/1.png b/resources/mv-gallery/1/edit2/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..1cc12f7c2194d1724a513af541411a4115435561
--- /dev/null
+++ b/resources/mv-gallery/1/edit2/1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29154ac07b0a441fcbb9bce5bd97919682e79567704e3c62e8895fa237559bf0
+size 56044
diff --git a/resources/mv-gallery/1/edit2/2.png b/resources/mv-gallery/1/edit2/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..58a6e12e57dd89f89a2fba14c2ae4c693ccb6c06
--- /dev/null
+++ b/resources/mv-gallery/1/edit2/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef7cbab69b050b10a87de160009773a5c70e8e2318912672a9beaef7eeb1ee5e
+size 48090
diff --git a/resources/mv-gallery/1/edit2/prompt.png b/resources/mv-gallery/1/edit2/prompt.png
new file mode 100644
index 0000000000000000000000000000000000000000..432b5b52257dac250234432004030c94affe91dc
--- /dev/null
+++ b/resources/mv-gallery/1/edit2/prompt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e08897c4137140d422d738399f262d9aeb8456f77b068375aaaa350b3bc912cc
+size 53369
diff --git a/resources/mv-gallery/1/src/0.png b/resources/mv-gallery/1/src/0.png
new file mode 100644
index 0000000000000000000000000000000000000000..26d49181272e8ce82838d1e9d45fb5e18aac3434
--- /dev/null
+++ b/resources/mv-gallery/1/src/0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5e6c50641e3a4648d9991375108fe70e08f279b16412a3629117ae28a47557b
+size 30347
diff --git a/resources/mv-gallery/1/src/1.png b/resources/mv-gallery/1/src/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..915d442d7c58ebe7b15bbf01ce42c97b96eb48bc
--- /dev/null
+++ b/resources/mv-gallery/1/src/1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b672ff174e56f2f3376c0351cf86f2a3f302c931bb54aabc91b9c9b13c72ca0
+size 34623
diff --git a/resources/mv-gallery/1/src/2.png b/resources/mv-gallery/1/src/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..723bcbcc757373e09a58e3ebd6e5c7c05b8dc396
--- /dev/null
+++ b/resources/mv-gallery/1/src/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a189c0e2665b627a5520ca84b1b8be6e6d0e3444f8f9aa86b6cb376d689ae00
+size 34818
diff --git a/resources/mv-gallery/1/src/prompt.png b/resources/mv-gallery/1/src/prompt.png
new file mode 100644
index 0000000000000000000000000000000000000000..00c566b332e528b4de40f9d2f4a8d5dd44fc4f40
--- /dev/null
+++ b/resources/mv-gallery/1/src/prompt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0d5fa8487834b51ffa89fb42b825fedb55e9902f3c4509c460c6aeab2c4d0d7
+size 34392
diff --git a/resources/mv-gallery/2/edit/0.png b/resources/mv-gallery/2/edit/0.png
new file mode 100644
index 0000000000000000000000000000000000000000..2a2896bd2e86e2687e81eec08d78748bd16d6e40
--- /dev/null
+++ b/resources/mv-gallery/2/edit/0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7defdf226cfda007f9566572ad4f10edee8b806b97491323ece3a831f9a7962
+size 38885
diff --git a/resources/mv-gallery/2/edit/1.png b/resources/mv-gallery/2/edit/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..38d5d83734390591e4a7a7d0107ca961439a6192
--- /dev/null
+++ b/resources/mv-gallery/2/edit/1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d27c761282a7130e85c30e08b3406e640a21186c0cbafe94f576ebbe062acf37
+size 43875
diff --git a/resources/mv-gallery/2/edit/2.png b/resources/mv-gallery/2/edit/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..43a345337e5493ee175f4e2e01406924794cda29
--- /dev/null
+++ b/resources/mv-gallery/2/edit/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:796bc56899c91f7c7bfa08c8c3278e2c8b5932a257c2737e1245b99d44416574
+size 32073
diff --git a/resources/mv-gallery/2/edit/prompt.png b/resources/mv-gallery/2/edit/prompt.png
new file mode 100644
index 0000000000000000000000000000000000000000..cc5af485413e2f175a74352f9b9649ae1d4c9c67
--- /dev/null
+++ b/resources/mv-gallery/2/edit/prompt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f194ba3af273b6f70fd59da0af7df80801c7add0406efb654c09e153cb091e8
+size 38996
diff --git a/resources/mv-gallery/2/edit2/0.png b/resources/mv-gallery/2/edit2/0.png
new file mode 100644
index 0000000000000000000000000000000000000000..22ef4ee58d1ff361de837984503308a76101b092
--- /dev/null
+++ b/resources/mv-gallery/2/edit2/0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:984cb2a4301a7a75126071dd2e4a8a07b66401ac7035b15a7771f5a92f7ecf05
+size 36494
diff --git a/resources/mv-gallery/2/edit2/1.png b/resources/mv-gallery/2/edit2/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..fcabc9593db476657c496b9230cde1b23db36b76
--- /dev/null
+++ b/resources/mv-gallery/2/edit2/1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bd8d641de2b78ce883decdddfa744f14008421d7b9a597bca837225c04b1ca4
+size 42852
diff --git a/resources/mv-gallery/2/edit2/2.png b/resources/mv-gallery/2/edit2/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..7be7a7bccaf51eb2e1d6e20703356f0f74a5a72e
--- /dev/null
+++ b/resources/mv-gallery/2/edit2/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f982d22a2f0fc154207fa5c8f5ad7d3c1abcd651a5686d71f9f388217bc5b97
+size 32548
diff --git a/resources/mv-gallery/2/edit2/prompt.png b/resources/mv-gallery/2/edit2/prompt.png
new file mode 100644
index 0000000000000000000000000000000000000000..e3edd6c78ecb702bb1722f956e8f5a2af10d1c61
--- /dev/null
+++ b/resources/mv-gallery/2/edit2/prompt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbdde3c1562dc3406dd4ce50a61a966b4817178d907b9ca5c4ffd32d9366495c
+size 33971
diff --git a/resources/mv-gallery/2/src/0.png b/resources/mv-gallery/2/src/0.png
new file mode 100644
index 0000000000000000000000000000000000000000..11a2c348ff4e8f76d1a31b7e0936e7709d83bf87
--- /dev/null
+++ b/resources/mv-gallery/2/src/0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8204713b64298624d1c703288c2342f0f25f4bc44b330f5fcbddf6fe39b15bb
+size 27094
diff --git a/resources/mv-gallery/2/src/1.png b/resources/mv-gallery/2/src/1.png
new file mode 100644
index 0000000000000000000000000000000000000000..11a2c348ff4e8f76d1a31b7e0936e7709d83bf87
--- /dev/null
+++ b/resources/mv-gallery/2/src/1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8204713b64298624d1c703288c2342f0f25f4bc44b330f5fcbddf6fe39b15bb
+size 27094
diff --git a/resources/mv-gallery/2/src/2.png b/resources/mv-gallery/2/src/2.png
new file mode 100644
index 0000000000000000000000000000000000000000..aa75c36bae555d5bfdf98fd41e2c66e63ea8fe81
--- /dev/null
+++ b/resources/mv-gallery/2/src/2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dd5c079793b69790c889dd3968ed607b2eb292d5f671d0d3358c8c7e0153b65
+size 30177
diff --git a/resources/mv-gallery/2/src/prompt.png b/resources/mv-gallery/2/src/prompt.png
new file mode 100644
index 0000000000000000000000000000000000000000..46f29a29751c9be35466105df7c56e20532a9fff
--- /dev/null
+++ b/resources/mv-gallery/2/src/prompt.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:338c3769e7e5661c7c6fbfbbb070ff964dfc2dd3bbc966760f40260e6cbb8c31
+size 34068
diff --git a/scripts/img2mv.py b/scripts/img2mv.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f14f54dcb615399223084663a376ec2f5e08469
--- /dev/null
+++ b/scripts/img2mv.py
@@ -0,0 +1,97 @@
+import argparse
+import sys
+from pathlib import Path
+from typing import Optional
+
+# --- Start of the "Messy" but Effective Path Setup ---
+# This block ensures that imports work correctly without modifying the src directory.
+# It adds both the project root and the src directory to the Python path.
+try:
+ # Get the project root directory (which is the parent of the 'scripts' directory)
+ project_root = Path(__file__).resolve().parent.parent
+ # Get the source code directory
+ src_dir = project_root / "src"
+
+ # Add both directories to the system path
+ sys.path.insert(0, str(project_root))
+ sys.path.insert(0, str(src_dir))
+except IndexError:
+ # Fallback for when the script is run in a way that __file__ is not defined
+ print("Could not determine project root. Please run from the 'scripts' directory.")
+ sys.exit(1)
+# --- End of Path Setup ---
+
+import torch
+from PIL import Image
+
+from pipeline import Zero123PlusPipeline # This now works because src/ is on the path
+from utils import add_white_bg, load_z123_pipe
+
+
+def generate_from_single_view(
+ input_path: Path,
+ output_path: Path,
+ device_number: int = 0,
+ pipeline: Optional[Zero123PlusPipeline] = None,
+) -> None:
+ """
+ Generates a multi-view image grid from a single input image.
+
+ Args:
+ input_path: Path to the single input image.
+ output_path: Path to save the generated multi-view .png file.
+ device_number: The GPU device number to use.
+ pipeline: An optional pre-loaded pipeline instance.
+ """
+ if not input_path.is_file():
+ raise FileNotFoundError(f"Input image not found at: {input_path}")
+
+ print(f"Loading pipeline on device {device_number}...")
+ if pipeline is None:
+ pipeline = load_z123_pipe(device_number)
+
+ print(f"Processing input image: {input_path}")
+ cond_image = Image.open(input_path)
+ cond_image = add_white_bg(cond_image)
+
+ print("Generating multi-view grid...")
+ result = pipeline(cond_image, num_inference_steps=75).images[0]
+
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+ result.save(output_path)
+ print(f"Successfully saved multi-view grid to: {output_path}")
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ description="Generate a multi-view image grid from a single input view using Zero123++."
+ )
+ parser.add_argument(
+ "--input_image",
+ type=Path,
+ required=True,
+ help="Path to the single input image file (e.g., examples/robot_sunglasses/src.png)."
+ )
+ parser.add_argument(
+ "--output_path",
+ type=Path,
+ required=True,
+ help="Path to save the output multi-view grid (e.g., examples/robot_sunglasses/src_mv.png)."
+ )
+ parser.add_argument(
+ "--device_number",
+ type=int,
+ default=0,
+ help="GPU device number to use for generation."
+ )
+ args = parser.parse_args()
+
+ try:
+ generate_from_single_view(
+ input_path=args.input_image,
+ output_path=args.output_path,
+ device_number=args.device_number
+ )
+ except Exception as e:
+ print(f"An error occurred: {e}")
+ sys.exit(1)
diff --git a/scripts/recon.py b/scripts/recon.py
new file mode 100644
index 0000000000000000000000000000000000000000..6bdf641aaec6bbd1be0e19fcbd79bde956d57f7d
--- /dev/null
+++ b/scripts/recon.py
@@ -0,0 +1,219 @@
+import os
+import argparse
+import numpy as np
+import torch
+from PIL import Image
+from torchvision.transforms import v2
+from omegaconf import OmegaConf
+from einops import rearrange
+from tqdm import tqdm
+from huggingface_hub import hf_hub_download
+import sys
+
+script_dir = os.path.dirname(os.path.abspath(__file__))
+submodule_path = os.path.join(script_dir, "..", "external", "instant-mesh")
+sys.path.insert(0, submodule_path)
+
+from src.utils.camera_util import (
+ get_circular_camera_poses,
+ get_zero123plus_input_cameras,
+ FOV_to_intrinsics,
+)
+from src.utils.train_util import instantiate_from_config
+from src.utils.mesh_util import save_obj
+from src.utils.infer_util import save_video
+
+
+def get_render_cameras(
+ batch_size=1, M=120, radius=4.0, elevation=20.0, is_flexicubes=False
+):
+ c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation)
+ if is_flexicubes:
+ cameras = torch.linalg.inv(c2ws)
+ cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1)
+ else:
+ extrinsics = c2ws.flatten(-2)
+ intrinsics = (
+ FOV_to_intrinsics(30.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2)
+ )
+ cameras = torch.cat([extrinsics, intrinsics], dim=-1)
+ cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1)
+ return cameras
+
+
+def render_frames(
+ model, planes, render_cameras, render_size=512, chunk_size=1, is_flexicubes=False
+):
+ frames = []
+ for i in tqdm(range(0, render_cameras.shape[1], chunk_size)):
+ if is_flexicubes:
+ frame = model.forward_geometry(
+ planes, render_cameras[:, i : i + chunk_size], render_size=render_size
+ )["img"]
+ else:
+ frame = model.forward_synthesizer(
+ planes, render_cameras[:, i : i + chunk_size], render_size=render_size
+ )["images_rgb"]
+ frames.append(frame)
+ frames = torch.cat(frames, dim=1)[0]
+ return frames
+
+def main(args):
+ """
+ Main function to run the 3D mesh generation process.
+ """
+ # ============================
+ # CONFIG
+ # ============================
+ print("š Starting 3D mesh generation...")
+ config = OmegaConf.load(args.config)
+ config_name = os.path.basename(args.config).replace(".yaml", "")
+ model_config = config.model_config
+ infer_config = config.infer_config
+ IS_FLEXICUBES = config_name.startswith("instant-mesh")
+
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ print(f"Using device: {device}")
+
+ # ============================
+ # SETUP OUTPUT DIRECTORY
+ # ============================
+ os.makedirs(args.output_dir, exist_ok=True)
+ base_name = os.path.splitext(os.path.basename(args.input_file))[0]
+ mesh_path = os.path.join(args.output_dir, "recon.obj")
+ video_path = os.path.join(args.output_dir, "recon.mp4")
+
+ # ============================
+ # LOAD RECONSTRUCTION MODEL
+ # ============================
+ print("Loading reconstruction model...")
+ model = instantiate_from_config(model_config)
+
+ # Download model checkpoint if it doesn't exist
+ model_ckpt_path = (
+ infer_config.model_path
+ if os.path.exists(infer_config.model_path)
+ else hf_hub_download(
+ repo_id="TencentARC/InstantMesh",
+ filename=f"{config_name.replace('-', '_')}.ckpt",
+ repo_type="model",
+ )
+ )
+
+ # Load the state dictionary
+ state_dict = torch.load(model_ckpt_path, map_location="cpu")["state_dict"]
+ state_dict = {
+ k[14:]: v for k, v in state_dict.items() if k.startswith("lrm_generator.")
+ }
+ model.load_state_dict(state_dict, strict=True)
+ model = model.to(device).eval()
+
+ if IS_FLEXICUBES:
+ model.init_flexicubes_geometry(device, fovy=30.0)
+
+ # ============================
+ # PREPARE DATA
+ # ============================
+ print(f"Processing input file: {args.input_file}")
+
+ # Load and preprocess the input image
+ input_image = Image.open(args.input_file).convert("RGB")
+ images = np.asarray(input_image, dtype=np.float32) / 255.0
+ images = torch.from_numpy(images).permute(2, 0, 1).contiguous().float()
+ # Rearrange from (C, H, W) to (B, C, H, W) where B is the number of views
+ images = rearrange(images, "c (n h) (m w) -> (n m) c h w", n=3, m=2)
+ images = images.unsqueeze(0).to(device)
+ images = v2.functional.resize(images, size=320, interpolation=3, antialias=True).clamp(0, 1)
+
+ input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0 * args.scale).to(device)
+
+ # ============================
+ # RUN INFERENCE AND SAVE OUTPUT
+ # ============================
+ with torch.no_grad():
+ # Generate 3D mesh
+ planes = model.forward_planes(images, input_cameras)
+ mesh_out = model.extract_mesh(planes, use_texture_map=False, **infer_config)
+
+ # Save the mesh
+ vertices, faces, vertex_colors = mesh_out
+ save_obj(vertices, faces, vertex_colors, mesh_path)
+ print(f"ā
Mesh saved to {mesh_path}")
+
+ # Render and save video if enabled
+ if args.save_video:
+ print("š„ Rendering video...")
+ render_size = infer_config.render_resolution
+ chunk_size = 20 if IS_FLEXICUBES else 1
+ render_cameras = get_render_cameras(
+ batch_size=1,
+ M=120,
+ radius=args.distance,
+ elevation=20.0,
+ is_flexicubes=IS_FLEXICUBES,
+ ).to(device)
+
+ frames = render_frames(
+ model=model,
+ planes=planes,
+ render_cameras=render_cameras,
+ render_size=render_size,
+ chunk_size=chunk_size,
+ is_flexicubes=IS_FLEXICUBES,
+ )
+ save_video(frames, video_path, fps=30)
+ print(f"ā
Video saved to {video_path}")
+
+ print("⨠Process complete.")
+
+if __name__ == "__main__":
+ # ============================
+ # SCRIPT ARGUMENTS
+ # ============================
+ parser = argparse.ArgumentParser(
+ description="Generate a 3D mesh and video from a single multi-view PNG file."
+ )
+
+ # Positional argument for config file
+ parser.add_argument(
+ "config",
+ type=str,
+ help="Path to the model config file (.yaml)."
+ )
+
+ # Required file paths
+ parser.add_argument(
+ "--input_file",
+ type=str,
+ required=True,
+ help="Path to the input PNG file."
+ )
+ parser.add_argument(
+ "--output_dir",
+ type=str,
+ default="outputs/",
+ help="Directory to save the output .obj and .mp4 files. Defaults to 'outputs/'."
+ )
+
+ # Optional parameters for model and rendering
+ parser.add_argument(
+ "--scale",
+ type=float,
+ default=1.0,
+ help="Scale of the input cameras."
+ )
+ parser.add_argument(
+ "--distance",
+ type=float,
+ default=4.5,
+ help="Camera distance for rendering the output video."
+ )
+ parser.add_argument(
+ "--no_video",
+ dest="save_video",
+ action="store_false",
+ help="If set, disables saving the output .mp4 video."
+ )
+
+ parsed_args = parser.parse_args()
+ main(parsed_args)
diff --git a/scripts/render_mesh.py b/scripts/render_mesh.py
new file mode 100644
index 0000000000000000000000000000000000000000..26dd7999249c35464cde9e26a83be5bddadb6eca
--- /dev/null
+++ b/scripts/render_mesh.py
@@ -0,0 +1,388 @@
+# This script is borrowed from https://github.com/allenai/objaverse-rendering
+import argparse
+import math
+import os
+from pathlib import Path
+import shutil
+from typing import Dict, Literal, Tuple
+
+import bpy
+from mathutils import Vector
+from PIL import Image
+
+
+
+# --- Blender Setup Functions ---
+def global_settings():
+ """Configures global Blender rendering settings."""
+ context = bpy.context
+ scene = context.scene
+ render = scene.render
+
+ render.engine = "CYCLES"
+ render.image_settings.file_format = "PNG"
+ render.image_settings.color_mode = "RGBA"
+ render.resolution_x = 512
+ render.resolution_y = 512
+ render.resolution_percentage = 100
+
+ scene.cycles.device = "GPU"
+ scene.cycles.samples = 32
+ scene.cycles.diffuse_bounces = 1
+ scene.cycles.glossy_bounces = 1
+ scene.cycles.transparent_max_bounces = 3
+ scene.cycles.transmission_bounces = 3
+ scene.cycles.filter_width = 0.01
+ scene.cycles.use_denoising = True
+ scene.render.film_transparent = True
+ return scene
+
+
+def add_lighting() -> None:
+ """Adds area lights to the scene."""
+ # Delete the default light
+ if "Light" in bpy.data.objects:
+ bpy.data.objects["Light"].select_set(True)
+ bpy.ops.object.delete()
+
+ # Add a new large area light
+ bpy.ops.object.light_add(type="AREA")
+ light2 = bpy.data.lights["Area"]
+ light2.energy = 30000
+ bpy.data.objects["Area"].location[2] = 0.5
+ bpy.data.objects["Area"].scale[0] = 100
+ bpy.data.objects["Area"].scale[1] = 100
+ bpy.data.objects["Area"].scale[2] = 100
+
+ # Add a fill light
+ bpy.ops.object.light_add(type="AREA", location=(0, 0, 2))
+ fill_obj = bpy.context.active_object
+ fill_obj.data.energy = 2000
+ fill_obj.scale = (10, 10, 10)
+
+
+def reset_scene() -> None:
+ """Resets the scene to a clean state by deleting all objects and data."""
+ # Delete all objects
+ bpy.ops.object.select_all(action='SELECT')
+ bpy.ops.object.delete()
+
+ # Delete all meshes
+ for block in bpy.data.meshes:
+ bpy.data.meshes.remove(block, do_unlink=True)
+
+ # Delete all materials
+ for material in bpy.data.materials:
+ bpy.data.materials.remove(material, do_unlink=True)
+
+ # Delete all textures
+ for texture in bpy.data.textures:
+ bpy.data.textures.remove(texture, do_unlink=True)
+
+ # Delete all images
+ for image in bpy.data.images:
+ bpy.data.images.remove(image, do_unlink=True)
+
+ # Delete all lights
+ for light in bpy.data.lights:
+ bpy.data.lights.remove(light, do_unlink=True)
+
+ # Delete all cameras
+ for cam in bpy.data.cameras:
+ bpy.data.cameras.remove(cam, do_unlink=True)
+
+ # Delete all empties and curves
+ for curve in bpy.data.curves:
+ bpy.data.curves.remove(curve, do_unlink=True)
+
+ # Reset world
+ if bpy.data.worlds:
+ for world in bpy.data.worlds:
+ bpy.data.worlds.remove(world, do_unlink=True)
+
+ # Create a new default world
+ bpy.context.scene.world = bpy.data.worlds.new("World")
+ bpy.context.view_layer.update()
+
+
+def load_object(object_path: str) -> None:
+ """Loads a 3D model into the scene based on its file extension."""
+ if object_path.endswith(".glb"):
+ bpy.ops.import_scene.gltf(filepath=object_path, merge_vertices=True)
+ elif object_path.endswith(".fbx"):
+ bpy.ops.import_scene.fbx(filepath=object_path)
+ else:
+ raise ValueError(f"Unsupported file type: {object_path}")
+
+
+# --- Scene Normalization and Utility Functions ---
+def scene_bbox(single_obj=None, ignore_matrix=False):
+ """Calculates the bounding box of the scene or a single object."""
+ bbox_min = (math.inf,) * 3
+ bbox_max = (-math.inf,) * 3
+ found = False
+ for obj in scene_meshes() if single_obj is None else [single_obj]:
+ found = True
+ for coord in obj.bound_box:
+ coord = Vector(coord)
+ if not ignore_matrix:
+ coord = obj.matrix_world @ coord
+ bbox_min = tuple(min(x, y) for x, y in zip(bbox_min, coord))
+ bbox_max = tuple(max(x, y) for x, y in zip(bbox_max, coord))
+ if not found:
+ raise RuntimeError("No objects in scene to compute bounding box for")
+ return Vector(bbox_min), Vector(bbox_max)
+
+
+def scene_root_objects():
+ """Generator for all root objects in the scene."""
+ for obj in bpy.context.scene.objects.values():
+ if not obj.parent:
+ yield obj
+
+
+def scene_meshes():
+ """Generator for all mesh objects in the scene."""
+ for obj in bpy.context.scene.objects.values():
+ if isinstance(obj.data, (bpy.types.Mesh)):
+ yield obj
+
+
+def normalize_scene(target_scale=1.0):
+ """Normalizes the scene: scales to fit target size and centers at the origin."""
+ bbox_min, bbox_max = scene_bbox()
+ size = bbox_max - bbox_min
+ max_dim = max(size.x, size.y, size.z)
+ if max_dim == 0:
+ raise ValueError("Model has zero size. Cannot normalize.")
+
+ scale = target_scale / max_dim
+ for obj in scene_root_objects():
+ obj.scale = obj.scale * scale
+
+ bpy.context.view_layer.update()
+
+ bbox_min, bbox_max = scene_bbox()
+ center = (bbox_min + bbox_max) * 0.5
+ for obj in scene_root_objects():
+ obj.location -= center
+
+ bpy.context.view_layer.update()
+
+
+# --- Camera and Lighting Setup ---
+def setup_camera(scene):
+ """Configures the camera and adds a tracking constraint."""
+ cam = scene.objects["Camera"]
+ cam.location = (0, 1.2, 0)
+ cam.data.lens = 35
+ cam.data.sensor_width = 32
+ cam_constraint = cam.constraints.new(type="TRACK_TO")
+ cam_constraint.track_axis = "TRACK_NEGATIVE_Z"
+ cam_constraint.up_axis = "UP_Y"
+ return cam, cam_constraint
+
+
+def _create_light(
+ name: str,
+ light_type: Literal["POINT", "SUN", "SPOT", "AREA"],
+ location: Tuple[float, float, float],
+ rotation: Tuple[float, float, float],
+ energy: float,
+ use_shadow: bool = False,
+ specular_factor: float = 1.0,
+) -> bpy.types.Object:
+ """Creates and returns a configured light object."""
+ light_data = bpy.data.lights.new(name=name, type=light_type)
+ light_object = bpy.data.objects.new(name, light_data)
+ bpy.context.collection.objects.link(light_object)
+
+ light_object.location = location
+ light_object.rotation_euler = rotation
+
+ light_data.energy = energy
+ light_data.use_shadow = use_shadow
+ light_data.specular_factor = specular_factor
+
+ return light_object
+
+
+def create_lighting() -> Dict[str, bpy.types.Object]:
+ """Creates a deterministic multi-directional sun lighting setup."""
+ # Remove existing lights
+ bpy.ops.object.select_all(action="DESELECT")
+ bpy.ops.object.select_by_type(type="LIGHT")
+ bpy.ops.object.delete()
+
+ # Add 4 deterministic sun lights
+ key_light = _create_light(
+ name="Key_Light",
+ light_type="SUN",
+ location=(0, 0, 0),
+ rotation=(0.785398, 0, -0.785398), # 45°, -45° in radians
+ energy=0.5,
+ )
+ fill_light = _create_light(
+ name="Fill_Light",
+ light_type="SUN",
+ location=(0, 0, 0),
+ rotation=(0.785398, 0, 2.35619), # 45°, 135°
+ energy=0.3,
+ )
+ rim_light = _create_light(
+ name="Rim_Light",
+ light_type="SUN",
+ location=(0, 0, 0),
+ rotation=(-0.785398, 0, -3.92699), # -45°, -225°
+ energy=0.5,
+ )
+ bottom_light = _create_light(
+ name="Bottom_Light",
+ light_type="SUN",
+ location=(0, 0, 0),
+ rotation=(3.14159, 0, 0), # 180° (from below)
+ energy=0.2,
+ )
+ return {
+ "key_light": key_light,
+ "fill_light": fill_light,
+ "rim_light": rim_light,
+ "bottom_light": bottom_light,
+ }
+
+
+# --- Main Rendering and Image Processing Functions ---
+def render_object(
+ object_file: str,
+ output_dir: str,
+ camera_views=[(30, 30, 1.5), (90, -20, 1.5), (150, 30, 1.5), (210, -20, 1.5), (270, 30, 1.5), (330, -20, 1.5)],
+ background_color=(255, 255, 255)
+) -> None:
+ """Renders images of an object from multiple camera views."""
+ scene = global_settings()
+ os.makedirs(output_dir, exist_ok=True)
+ reset_scene()
+
+ # Create and set up a new camera
+ bpy.ops.object.camera_add()
+ camera = bpy.context.object
+ camera.name = "Camera"
+ scene.collection.objects.link(camera)
+ scene.camera = camera
+
+ scene.view_settings.view_transform = 'Standard'
+
+ # Set background color
+ world = bpy.data.worlds["World"]
+ world.use_nodes = False
+ world.color = tuple(channel / 255 for channel in background_color)
+ scene.render.film_transparent = False
+ scene.world = world
+
+ # Load, normalize, and light the object
+ load_object(object_file)
+ normalize_scene()
+ create_lighting()
+ cam, cam_constraint = setup_camera(scene)
+
+ # Create an empty object for the camera to track
+ empty = bpy.data.objects.new("Empty", None)
+ scene.collection.objects.link(empty)
+ cam_constraint.target = empty
+
+ for i, (azim, elev, camera_dist) in enumerate(camera_views):
+ # Set camera position
+ theta = math.radians(azim)
+ phi = math.radians(elev)
+ point = (
+ camera_dist * math.cos(phi) * math.cos(theta),
+ camera_dist * math.cos(phi) * math.sin(theta),
+ camera_dist * math.sin(phi),
+ )
+ cam.location = point
+
+ # Render the image
+ render_path = os.path.join(output_dir, f"{i:02d}.png")
+ scene.render.filepath = render_path
+ bpy.ops.render.render(write_still=True)
+
+
+def create_tiled_grid(
+ image_paths=["00.png", "01.png", "02.png", "03.png", "04.png", "05.png"],
+ output_path="tiled_grid.png",
+ tile_width=320,
+ tile_height=320,
+ background_color=(255, 255, 255),
+):
+ """Creates a 2x3 tiled grid image from a list of six image paths."""
+ if len(image_paths) != 6:
+ print("Error: Exactly 6 image paths are required.")
+ return
+
+ grid_width = tile_width * 2
+ grid_height = tile_height * 3
+ grid_image = Image.new("RGB", (grid_width, grid_height), background_color)
+
+ for i, image_path in enumerate(image_paths):
+ img = Image.open(image_path)
+ img = img.resize((tile_width, tile_height))
+ # Handle transparency by pasting onto a solid background
+ if img.mode == "RGBA":
+ background = Image.new("RGB", (tile_width, tile_height), background_color)
+ background.paste(img, (0, 0), img)
+ img = background
+
+ x = (i % 2) * tile_width
+ y = (i // 2) * tile_height
+ grid_image.paste(img, (x, y))
+
+ grid_image.save(output_path)
+ print(f"Tiled grid image saved to: {output_path}")
+
+
+
+# --- Main Execution Block ---
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Render a 3D object into a multi-view and source image format for EditP23.")
+ parser.add_argument("--mesh_path", type=str, required=True, help="Path to the input .glb or .fbx file.")
+ parser.add_argument("--output_dir", type=str, required=True, help="Directory to save the output src.png and src_mv.png.")
+ parser.add_argument("--camera_dist", type=float, default=1.35, help="Camera distance from the object.")
+ parser.add_argument("--azim_offset", type=float, default=0, help="Azimuthal offset for camera views in degrees.")
+ args = parser.parse_args()
+
+ RENDERS_SUBDIR = "all_renders"
+ BACKGROUND_COLOR = (255, 255, 255)
+
+ output_dir = Path(args.output_dir)
+ renders_path = output_dir / RENDERS_SUBDIR
+
+
+ ELEV_1 = 20
+ ELEV_2 = -10
+ elevs = [ELEV_1, ELEV_2] * 3
+ azims = [(30 + 60 * i + args.azim_offset) % 360 for i in range(6)]
+ camera_views = [(azim, elev, args.camera_dist) for azim, elev in zip(azims, elevs)] + [
+ ((0 + args.azim_offset) % 360, ELEV_1, args.camera_dist)
+ ]
+
+
+ # Render the object from different views
+ render_object(
+ args.mesh_path,
+ output_dir=str(renders_path),
+ camera_views=camera_views,
+ background_color=BACKGROUND_COLOR,
+ )
+
+ # --- Create Final Outputs ---
+ image_paths_for_grid = [renders_path / f"{i:02d}.png" for i in range(6)]
+
+ create_tiled_grid(
+ image_paths=image_paths_for_grid,
+ output_path=str(output_dir/"src_mv.png"),
+ background_color=BACKGROUND_COLOR,
+ )
+
+ shutil.copy(renders_path / "06.png", output_dir / "src.png")
+
+ print(f"Saved conditioning view and multi-view grid to {renders_path}.")
diff --git a/src/edit_mv.py b/src/edit_mv.py
new file mode 100644
index 0000000000000000000000000000000000000000..0781bfe83acd0fbc692ce59134b40dbffe443723
--- /dev/null
+++ b/src/edit_mv.py
@@ -0,0 +1,177 @@
+import numpy as np
+import torch
+from PIL import Image
+from tqdm import tqdm
+
+from diffusers import DDPMScheduler
+from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import retrieve_timesteps
+from pipeline import Zero123PlusPipeline
+from utils import add_white_bg, load_z123_pipe
+from typing import Optional
+
+class VAEProcessor:
+ """A helper class to handle encoding and decoding images with the VAE."""
+ def __init__(self, pipeline: Zero123PlusPipeline):
+ self.pipe = pipeline
+ self.image_processor = pipeline.image_processor
+ self.vae = pipeline.vae
+
+ self.latent_shift_factor = 0.22
+ self.latent_scale_factor = 0.75
+ self.image_scale_factor = 0.5 / 0.8
+
+ def encode(self, image: Image.Image) -> torch.Tensor:
+ """Encodes a PIL image into the latent space."""
+ image_tensor = self.image_processor.preprocess(image).to(self.vae.device).half()
+ with torch.autocast("cuda"), torch.inference_mode():
+ image_tensor *= self.image_scale_factor
+ denorm = self.vae.encode(image_tensor).latent_dist.mode()
+ denorm *= self.vae.config.scaling_factor
+ return (denorm - self.latent_shift_factor) * self.latent_scale_factor
+
+ def decode(self, latents: torch.Tensor) -> Image.Image:
+ """Decodes latents back into a post-processed image."""
+ with torch.autocast("cuda"), torch.inference_mode():
+ denorm = latents / self.latent_scale_factor + self.latent_shift_factor
+ image = self.vae.decode(denorm / self.vae.config.scaling_factor, return_dict=False)[0]
+ image /= self.image_scale_factor
+ return self.image_processor.postprocess(image)
+
+
+class EditAwareDenoiser:
+ """Encapsulates the entire Edit-Aware Denoising process."""
+ def __init__(self, pipe: Zero123PlusPipeline, scheduler: DDPMScheduler, T_steps: int, src_gs: float, tar_gs: float, n_max: int):
+ """Initializes the denoiser with the pipeline and configuration."""
+ self.pipe = pipe
+ self.scheduler = scheduler
+ self.T_steps = T_steps
+ self.src_guidance_scale = src_gs
+ self.tar_guidance_scale = tar_gs
+ self.n_max = n_max
+
+ @staticmethod
+ def _mix_cfg(cond: torch.Tensor, uncond: torch.Tensor, cfg: float) -> torch.Tensor:
+ """Mixes conditional and unconditional predictions."""
+ return uncond + cfg * (cond - uncond)
+
+ def _get_differential_edit_direction(self, t: torch.Tensor, zt_src: torch.Tensor, zt_tar: torch.Tensor) -> torch.Tensor:
+ """Computes the differential edit direction (delta v) for a timestep."""
+ condition_noise = torch.randn_like(self.src_cond_lat)
+
+ noisy_src_cond_lat = self.pipe.scheduler.scale_model_input(
+ self.pipe.scheduler.add_noise(self.src_cond_lat, condition_noise, t), t
+ )
+ vt_src_uncond, vt_src_cond = self._calc_v_zero(self.src_cond_img, zt_src, t, noisy_src_cond_lat)
+ vt_src = self._mix_cfg(vt_src_cond, vt_src_uncond, self.src_guidance_scale)
+
+ noisy_tar_cond_lat = self.pipe.scheduler.scale_model_input(
+ self.pipe.scheduler.add_noise(self.tar_cond_lat, condition_noise, t), t
+ )
+ vt_tar_uncond, vt_tar_cond = self._calc_v_zero(self.tar_cond_img, zt_tar, t, noisy_tar_cond_lat)
+ vt_tar = self._mix_cfg(vt_tar_cond, vt_tar_uncond, self.tar_guidance_scale)
+
+ return vt_tar - vt_src
+
+ def _propagate_for_timestep(self, zt_edit: torch.Tensor, t: torch.Tensor, dt: torch.Tensor) -> torch.Tensor:
+ """Performs a single propagation step for the edit."""
+ fwd_noise = torch.randn_like(self.x_src)
+ zt_src = self.scheduler.scale_model_input(self.scheduler.add_noise(self.x_src, fwd_noise, t), t)
+ zt_tar = self.scheduler.scale_model_input(self.scheduler.add_noise(zt_edit, fwd_noise, t), t)
+
+ diff_v = self._get_differential_edit_direction(t, zt_src, zt_tar)
+
+ zt_edit_change = dt * diff_v
+ zt_edit = zt_edit.to(torch.float32) + zt_edit_change
+ return zt_edit.to(diff_v.dtype)
+
+ def _calc_v_zero(self, condition_image: Image.Image, noisy_latent: torch.Tensor, t: torch.Tensor, noised_condition: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+ """Calculates the unconditional and conditional v-prediction from the UNet."""
+ DUMMY_GUIDANCE_SCALE = 2
+ model_output = {}
+
+ def hook_fn(module, args, output):
+ model_output['v_pred'] = output[0]
+
+ hook_handle = self.pipe.unet.register_forward_hook(hook_fn)
+
+ try:
+ self.pipe(
+ condition_image,
+ latents=noisy_latent,
+ num_inference_steps=1,
+ guidance_scale=DUMMY_GUIDANCE_SCALE,
+ timesteps=[t.item()],
+ output_type="latent",
+ noisy_cond_lat=noised_condition,
+ )
+ finally:
+ hook_handle.remove()
+
+ return model_output['v_pred'].chunk(2)
+
+ @torch.no_grad()
+ def denoise(self, x_src: torch.Tensor, src_cond_img: Image.Image, tar_cond_img: Image.Image) -> torch.Tensor:
+ """Public method to run the entire denoising process."""
+ self.x_src = x_src
+ self.src_cond_img = src_cond_img
+ self.tar_cond_img = tar_cond_img
+
+ timesteps, _ = retrieve_timesteps(self.scheduler, self.T_steps, self.x_src.device)
+ zt_edit = self.x_src.clone()
+
+ self.src_cond_lat = self.pipe.make_condition_lat(self.src_cond_img, guidance_scale=2.0)
+ self.tar_cond_lat = self.pipe.make_condition_lat(self.tar_cond_img, guidance_scale=2.0)
+
+ start_index = max(0, len(timesteps) - self.n_max)
+
+ for i in tqdm(range(start_index, len(timesteps))):
+ t = timesteps[i]
+ t_i = t / 1000.0
+ t_im1 = timesteps[i + 1] / 1000.0 if i + 1 < len(timesteps) else torch.zeros_like(t_i)
+ dt = t_im1 - t_i
+
+ zt_edit = self._propagate_for_timestep(zt_edit, t, dt)
+
+ return zt_edit
+
+
+def run_editp23(
+ src_condition_path: str,
+ tgt_condition_path: str,
+ original_mv: str,
+ save_path: str,
+ device_number: int = 0,
+ T_steps: int = 50,
+ n_max: int = 31,
+ src_guidance_scale: float = 3.5,
+ tar_guidance_scale: float = 5.0,
+ seed: int = 18,
+ pipeline: Optional[Zero123PlusPipeline] = None,
+) -> None:
+ """Main execution function to run the complete editing pipeline."""
+ if pipeline is None:
+ pipeline = load_z123_pipe(device_number)
+
+ torch.manual_seed(seed)
+ np.random.seed(seed)
+
+ vae_processor = VAEProcessor(pipeline)
+
+ src_cond_img = add_white_bg(Image.open(src_condition_path))
+ tgt_cond_img = add_white_bg(Image.open(tgt_condition_path))
+ mv_src = add_white_bg(Image.open(original_mv))
+ x0_src = vae_processor.encode(mv_src)
+
+ denoiser = EditAwareDenoiser(
+ pipe=pipeline,
+ scheduler=pipeline.scheduler,
+ T_steps=T_steps,
+ src_gs=src_guidance_scale,
+ tar_gs=tar_guidance_scale,
+ n_max=n_max
+ )
+ x0_tar = denoiser.denoise(x0_src, src_cond_img, tgt_cond_img)
+
+ image_tar = vae_processor.decode(x0_tar)
+ image_tar[0].save(save_path)
+ print(f"Successfully saved result to {save_path}")
\ No newline at end of file
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..28fba16d6ce346ebc9dfeddfb375ba661a7e1d2f
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,72 @@
+import argparse
+import sys
+from pathlib import Path
+from edit_mv import run_editp23, load_z123_pipe
+
+def main(args: argparse.Namespace) -> None:
+ """
+ Sets up and runs the EditP23 process for a single experiment.
+ """
+ exp_dir = Path(args.exp_dir)
+ input_files = {
+ "src_path": exp_dir / "src.png",
+ "edited_path": exp_dir / "edited.png",
+ "src_mv_path": exp_dir / "src_mv.png",
+ }
+
+ # Pre-run validation to ensure all input files exist
+ for name, path in input_files.items():
+ if not path.is_file():
+ print(f"Error: Input file not found at {path}")
+ sys.exit(1)
+
+ output_dir = exp_dir / "output"
+ output_dir.mkdir(exist_ok=True)
+ save_path = output_dir / f"result_tgs_{args.tar_guidance_scale}_nmax_{args.n_max}.png"
+
+ print(f"Running edit for experiment: {args.exp_dir}")
+ print(f"Saving to: {save_path}")
+
+ pipeline = load_z123_pipe(args.device_number)
+
+ run_editp23(
+ src_condition_path=str(input_files["src_path"]),
+ tgt_condition_path=str(input_files["edited_path"]),
+ original_mv=str(input_files["src_mv_path"]),
+ save_path=str(save_path),
+ device_number=args.device_number,
+ T_steps=args.T_steps,
+ n_max=args.n_max,
+ src_guidance_scale=args.src_guidance_scale,
+ tar_guidance_scale=args.tar_guidance_scale,
+ seed=args.seed,
+ pipeline=pipeline,
+ )
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="""Run EditP23 for 3D object editing.
+Paper presets for (tar_guidance_scale, n_max):
+- Mild: (5, 31)
+- Medium: (6, 41), (12, 42)
+- Hard: (21, 39)""",
+ formatter_class=argparse.RawTextHelpFormatter
+ )
+
+ parser.add_argument("--exp_dir", type=str, required=True,
+ help="Path to the experiment directory. Expects src.png, edited.png, and src_mv.png in this directory.")
+ parser.add_argument("--device_number", type=int, default=0,
+ help="GPU device number to use.")
+ parser.add_argument("--seed", type=int, default=18,
+ help="Random seed for reproducibility.")
+ parser.add_argument("--T_steps", type=int, default=50,
+ help="Total number of denoising steps.")
+ parser.add_argument("--n_max", type=int, default=31,
+ help="Number of scheduler steps for edit-aware guidance. Increase up to T_steps for more significant edits.")
+ parser.add_argument("--src_guidance_scale", type=float, default=3.5,
+ help="CFG scale for the source condition. Can typically remain constant.")
+ parser.add_argument("--tar_guidance_scale", type=float, default=5.0,
+ help="CFG scale for the target condition. Increase for more significant edits.")
+
+ args = parser.parse_args()
+ main(args)
\ No newline at end of file
diff --git a/src/pipeline.py b/src/pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d223713fae0ab97e62e489374137b6f9a686b4e
--- /dev/null
+++ b/src/pipeline.py
@@ -0,0 +1,512 @@
+import os
+from typing import Any, Dict, Optional
+from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.schedulers import KarrasDiffusionSchedulers
+
+import numpy
+import torch
+import torch.nn as nn
+import transformers
+from collections import OrderedDict
+from PIL import Image
+from torchvision import transforms
+from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
+
+import diffusers
+from diffusers import (
+ AutoencoderKL,
+ DDPMScheduler,
+ DiffusionPipeline,
+ EulerAncestralDiscreteScheduler,
+ UNet2DConditionModel,
+ ImagePipelineOutput,
+)
+from diffusers.image_processor import VaeImageProcessor
+from diffusers.models.attention_processor import (
+ Attention,
+ AttnProcessor,
+ XFormersAttnProcessor,
+ AttnProcessor2_0,
+)
+from diffusers.utils.import_utils import is_xformers_available
+
+
+def to_rgb_image(maybe_rgba: Image.Image):
+ if maybe_rgba.mode == "RGB":
+ return maybe_rgba
+ elif maybe_rgba.mode == "RGBA":
+ rgba = maybe_rgba
+ img = numpy.random.randint(
+ 127, 128, size=[rgba.size[1], rgba.size[0], 3], dtype=numpy.uint8
+ )
+ img = Image.fromarray(img, "RGB")
+ img.paste(rgba, mask=rgba.getchannel("A"))
+ return img
+ else:
+ raise ValueError("Unsupported image type.", maybe_rgba.mode)
+
+
+class ReferenceOnlyAttnProc(torch.nn.Module):
+ def __init__(self, chained_proc, enabled=False, name=None) -> None:
+ super().__init__()
+ self.enabled = enabled
+ self.chained_proc = chained_proc
+ self.name = name
+
+ def __call__(
+ self,
+ attn: Attention,
+ hidden_states,
+ encoder_hidden_states=None,
+ attention_mask=None,
+ mode="w",
+ ref_dict: dict = None,
+ is_cfg_guidance=False,
+ ) -> Any:
+ if encoder_hidden_states is None:
+ encoder_hidden_states = hidden_states
+ if self.enabled and is_cfg_guidance:
+ res0 = self.chained_proc(
+ attn, hidden_states[:1], encoder_hidden_states[:1], attention_mask
+ )
+ hidden_states = hidden_states[1:]
+ encoder_hidden_states = encoder_hidden_states[1:]
+ if self.enabled:
+ if mode == "w":
+ ref_dict[self.name] = encoder_hidden_states
+ elif mode == "r":
+ encoder_hidden_states = torch.cat(
+ [encoder_hidden_states, ref_dict.pop(self.name)], dim=1
+ )
+ elif mode == "m":
+ encoder_hidden_states = torch.cat(
+ [encoder_hidden_states, ref_dict[self.name]], dim=1
+ )
+ else:
+ assert False, mode
+ res = self.chained_proc(
+ attn, hidden_states, encoder_hidden_states, attention_mask
+ )
+ if self.enabled and is_cfg_guidance:
+ res = torch.cat([res0, res])
+ return res
+
+
+class RefOnlyNoisedUNet(torch.nn.Module):
+ def __init__(
+ self,
+ unet: UNet2DConditionModel,
+ train_sched: DDPMScheduler,
+ val_sched: EulerAncestralDiscreteScheduler,
+ ) -> None:
+ super().__init__()
+ self.unet = unet
+ self.train_sched = train_sched
+ self.val_sched = val_sched
+
+ unet_lora_attn_procs = dict()
+ for name, _ in unet.attn_processors.items():
+ if torch.__version__ >= "2.0":
+ default_attn_proc = AttnProcessor2_0()
+ elif is_xformers_available():
+ default_attn_proc = XFormersAttnProcessor()
+ else:
+ default_attn_proc = AttnProcessor()
+ unet_lora_attn_procs[name] = ReferenceOnlyAttnProc(
+ default_attn_proc, enabled=name.endswith("attn1.processor"), name=name
+ )
+ unet.set_attn_processor(unet_lora_attn_procs)
+
+ def __getattr__(self, name: str):
+ try:
+ return super().__getattr__(name)
+ except AttributeError:
+ return getattr(self.unet, name)
+
+ def forward_cond(
+ self,
+ noisy_cond_lat,
+ timestep,
+ encoder_hidden_states,
+ class_labels,
+ ref_dict,
+ is_cfg_guidance,
+ **kwargs,
+ ):
+ if is_cfg_guidance:
+ encoder_hidden_states = encoder_hidden_states[1:]
+ class_labels = class_labels[1:]
+ self.unet(
+ noisy_cond_lat,
+ timestep,
+ encoder_hidden_states=encoder_hidden_states,
+ class_labels=class_labels,
+ cross_attention_kwargs=dict(mode="w", ref_dict=ref_dict),
+ **kwargs,
+ )
+
+ def forward(
+ self,
+ sample,
+ timestep,
+ encoder_hidden_states,
+ class_labels=None,
+ *args,
+ cross_attention_kwargs,
+ down_block_res_samples=None,
+ mid_block_res_sample=None,
+ **kwargs,
+ ):
+ cond_lat = cross_attention_kwargs["cond_lat"]
+ noisy_cond_lat = cross_attention_kwargs.get("noisy_cond_lat", None)
+ is_cfg_guidance = cross_attention_kwargs.get("is_cfg_guidance", False)
+ noise = torch.randn_like(cond_lat)
+ if noisy_cond_lat is None:
+ if self.training:
+ noisy_cond_lat = self.train_sched.add_noise(cond_lat, noise, timestep)
+ noisy_cond_lat = self.train_sched.scale_model_input(
+ noisy_cond_lat, timestep
+ )
+ else:
+ noisy_cond_lat = self.val_sched.add_noise(
+ cond_lat, noise, timestep.reshape(-1)
+ )
+ noisy_cond_lat = self.val_sched.scale_model_input(
+ noisy_cond_lat, timestep.reshape(-1)
+ )
+ ref_dict = {}
+ self.forward_cond(
+ noisy_cond_lat,
+ timestep,
+ encoder_hidden_states,
+ class_labels,
+ ref_dict,
+ is_cfg_guidance,
+ **kwargs,
+ )
+ weight_dtype = self.unet.dtype
+ return self.unet(
+ sample,
+ timestep,
+ encoder_hidden_states,
+ *args,
+ class_labels=class_labels,
+ cross_attention_kwargs=dict(
+ mode="r", ref_dict=ref_dict, is_cfg_guidance=is_cfg_guidance
+ ),
+ down_block_additional_residuals=(
+ [sample.to(dtype=weight_dtype) for sample in down_block_res_samples]
+ if down_block_res_samples is not None
+ else None
+ ),
+ mid_block_additional_residual=(
+ mid_block_res_sample.to(dtype=weight_dtype)
+ if mid_block_res_sample is not None
+ else None
+ ),
+ **kwargs,
+ )
+
+
+def scale_latents(latents):
+ latents = (latents - 0.22) * 0.75
+ return latents
+
+
+def unscale_latents(latents):
+ latents = latents / 0.75 + 0.22
+ return latents
+
+
+def scale_image(image):
+ image = image * 0.5 / 0.8
+ return image
+
+
+def unscale_image(image):
+ image = image / 0.5 * 0.8
+ return image
+
+
+class DepthControlUNet(torch.nn.Module):
+ def __init__(
+ self,
+ unet: RefOnlyNoisedUNet,
+ controlnet: Optional[diffusers.ControlNetModel] = None,
+ conditioning_scale=1.0,
+ ) -> None:
+ super().__init__()
+ self.unet = unet
+ if controlnet is None:
+ self.controlnet = diffusers.ControlNetModel.from_unet(unet.unet)
+ else:
+ self.controlnet = controlnet
+ DefaultAttnProc = AttnProcessor2_0
+ if is_xformers_available():
+ DefaultAttnProc = XFormersAttnProcessor
+ self.controlnet.set_attn_processor(DefaultAttnProc())
+ self.conditioning_scale = conditioning_scale
+
+ def __getattr__(self, name: str):
+ try:
+ return super().__getattr__(name)
+ except AttributeError:
+ return getattr(self.unet, name)
+
+ def forward(
+ self,
+ sample,
+ timestep,
+ encoder_hidden_states,
+ class_labels=None,
+ *args,
+ cross_attention_kwargs: dict,
+ **kwargs,
+ ):
+ cross_attention_kwargs = dict(cross_attention_kwargs)
+ control_depth = cross_attention_kwargs.pop("control_depth")
+ down_block_res_samples, mid_block_res_sample = self.controlnet(
+ sample,
+ timestep,
+ encoder_hidden_states=encoder_hidden_states,
+ controlnet_cond=control_depth,
+ conditioning_scale=self.conditioning_scale,
+ return_dict=False,
+ )
+ return self.unet(
+ sample,
+ timestep,
+ encoder_hidden_states=encoder_hidden_states,
+ down_block_res_samples=down_block_res_samples,
+ mid_block_res_sample=mid_block_res_sample,
+ cross_attention_kwargs=cross_attention_kwargs,
+ )
+
+
+class ModuleListDict(torch.nn.Module):
+ def __init__(self, procs: dict) -> None:
+ super().__init__()
+ self.keys = sorted(procs.keys())
+ self.values = torch.nn.ModuleList(procs[k] for k in self.keys)
+
+ def __getitem__(self, key):
+ return self.values[self.keys.index(key)]
+
+
+class SuperNet(torch.nn.Module):
+ def __init__(self, state_dict: Dict[str, torch.Tensor]):
+ super().__init__()
+ state_dict = OrderedDict((k, state_dict[k]) for k in sorted(state_dict.keys()))
+ self.layers = torch.nn.ModuleList(state_dict.values())
+ self.mapping = dict(enumerate(state_dict.keys()))
+ self.rev_mapping = {v: k for k, v in enumerate(state_dict.keys())}
+
+ # .processor for unet, .self_attn for text encoder
+ self.split_keys = [".processor", ".self_attn"]
+
+ # we add a hook to state_dict() and load_state_dict() so that the
+ # naming fits with `unet.attn_processors`
+ def map_to(module, state_dict, *args, **kwargs):
+ new_state_dict = {}
+ for key, value in state_dict.items():
+ num = int(key.split(".")[1]) # 0 is always "layers"
+ new_key = key.replace(f"layers.{num}", module.mapping[num])
+ new_state_dict[new_key] = value
+
+ return new_state_dict
+
+ def remap_key(key, state_dict):
+ for k in self.split_keys:
+ if k in key:
+ return key.split(k)[0] + k
+ return key.split(".")[0]
+
+ def map_from(module, state_dict, *args, **kwargs):
+ all_keys = list(state_dict.keys())
+ for key in all_keys:
+ replace_key = remap_key(key, state_dict)
+ new_key = key.replace(
+ replace_key, f"layers.{module.rev_mapping[replace_key]}"
+ )
+ state_dict[new_key] = state_dict[key]
+ del state_dict[key]
+
+ self._register_state_dict_hook(map_to)
+ self._register_load_state_dict_pre_hook(map_from, with_module=True)
+
+
+class Zero123PlusPipeline(diffusers.StableDiffusionPipeline):
+ tokenizer: transformers.CLIPTokenizer
+ text_encoder: transformers.CLIPTextModel
+ vision_encoder: transformers.CLIPVisionModelWithProjection
+
+ feature_extractor_clip: transformers.CLIPImageProcessor
+ unet: UNet2DConditionModel
+ scheduler: diffusers.schedulers.KarrasDiffusionSchedulers
+
+ vae: AutoencoderKL
+ ramping: nn.Linear
+
+ feature_extractor_vae: transformers.CLIPImageProcessor
+
+ depth_transforms_multi = transforms.Compose(
+ [transforms.ToTensor(), transforms.Normalize([0.5], [0.5])]
+ )
+
+ def __init__(
+ self,
+ vae: AutoencoderKL,
+ text_encoder: CLIPTextModel,
+ tokenizer: CLIPTokenizer,
+ unet: UNet2DConditionModel,
+ scheduler: KarrasDiffusionSchedulers,
+ vision_encoder: transformers.CLIPVisionModelWithProjection,
+ feature_extractor_clip: CLIPImageProcessor,
+ feature_extractor_vae: CLIPImageProcessor,
+ ramping_coefficients: Optional[list] = None,
+ safety_checker=None,
+ ):
+ DiffusionPipeline.__init__(self)
+
+ self.register_modules(
+ vae=vae,
+ text_encoder=text_encoder,
+ tokenizer=tokenizer,
+ unet=unet,
+ scheduler=scheduler,
+ safety_checker=None,
+ vision_encoder=vision_encoder,
+ feature_extractor_clip=feature_extractor_clip,
+ feature_extractor_vae=feature_extractor_vae,
+ )
+ self.register_to_config(ramping_coefficients=ramping_coefficients)
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
+ self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
+
+ def prepare(self):
+ train_sched = DDPMScheduler.from_config(self.scheduler.config)
+ if isinstance(self.unet, UNet2DConditionModel):
+ self.unet = RefOnlyNoisedUNet(self.unet, train_sched, self.scheduler).eval()
+
+ def add_controlnet(
+ self,
+ controlnet: Optional[diffusers.ControlNetModel] = None,
+ conditioning_scale=1.0,
+ ):
+ self.prepare()
+ self.unet = DepthControlUNet(self.unet, controlnet, conditioning_scale)
+ return SuperNet(OrderedDict([("controlnet", self.unet.controlnet)]))
+
+ def encode_condition_image(self, image: torch.Tensor):
+ image = self.vae.encode(image).latent_dist.sample()
+ return image
+
+ def make_condition_lat(
+ self,
+ local_cond_image,
+ num_images_per_prompt: Optional[int] = 1,
+ guidance_scale=4.0,
+ ):
+ local_cond_image = to_rgb_image(local_cond_image)
+ local_cond_image_f = self.feature_extractor_vae(
+ images=local_cond_image, return_tensors="pt"
+ ).pixel_values
+
+ image = local_cond_image_f.to(device=self.vae.device, dtype=self.vae.dtype)
+ cond_lat = self.encode_condition_image(image)
+ if guidance_scale > 1:
+ negative_lat = self.encode_condition_image(torch.zeros_like(image))
+ cond_lat = torch.cat([negative_lat, cond_lat])
+ if num_images_per_prompt > 1:
+ bs_embed, *lat_shape = cond_lat.shape
+ assert len(lat_shape) == 3
+ cond_lat = cond_lat.repeat(1, num_images_per_prompt, 1, 1)
+ cond_lat = cond_lat.view(bs_embed * num_images_per_prompt, *lat_shape)
+ return cond_lat
+
+ @torch.no_grad()
+ def __call__(
+ self,
+ image: Image.Image = None,
+ prompt="",
+ *args,
+ num_images_per_prompt: Optional[int] = 1,
+ guidance_scale=4.0,
+ depth_image: Image.Image = None,
+ output_type: Optional[str] = "pil",
+ width=640,
+ height=960,
+ num_inference_steps=28,
+ return_dict=True,
+ noisy_cond_lat=None,
+ **kwargs,
+ ):
+ self.prepare()
+ if image is None:
+ raise ValueError(
+ "Inputting embeddings not supported for this pipeline. Please pass an image."
+ )
+ global_image = image
+ local_image = image
+
+ assert not isinstance(image, torch.Tensor)
+ image = to_rgb_image(image)
+ global_image = to_rgb_image(global_image)
+ image_2 = self.feature_extractor_clip(
+ images=global_image, return_tensors="pt"
+ ).pixel_values
+
+ if depth_image is not None and hasattr(self.unet, "controlnet"):
+ depth_image = to_rgb_image(depth_image)
+ depth_image = self.depth_transforms_multi(depth_image).to(
+ device=self.unet.controlnet.device, dtype=self.unet.controlnet.dtype
+ )
+ image_2 = image_2.to(device=self.vae.device, dtype=self.vae.dtype)
+
+ encoded = self.vision_encoder(image_2, output_hidden_states=False)
+ global_embeds = encoded.image_embeds
+ global_embeds = global_embeds.unsqueeze(-2)
+
+ if hasattr(self, "encode_prompt"):
+ encoder_hidden_states = self.encode_prompt(prompt, self.device, 1, False)[0]
+ else:
+ encoder_hidden_states = self._encode_prompt(prompt, self.device, 1, False)
+ ramp = global_embeds.new_tensor(self.config.ramping_coefficients).unsqueeze(-1)
+ encoder_hidden_states = encoder_hidden_states + global_embeds * ramp
+ cond_lat = self.make_condition_lat(local_image, num_images_per_prompt, guidance_scale)
+
+ cak = dict(cond_lat=cond_lat, noisy_cond_lat=noisy_cond_lat)
+ if hasattr(self.unet, "controlnet"):
+ cak["control_depth"] = depth_image
+ latents: torch.Tensor = (
+ super()
+ .__call__(
+ None,
+ *args,
+ cross_attention_kwargs=cak,
+ guidance_scale=guidance_scale,
+ num_images_per_prompt=num_images_per_prompt,
+ prompt_embeds=encoder_hidden_states,
+ num_inference_steps=num_inference_steps,
+ output_type="latent",
+ width=width,
+ height=height,
+ **kwargs,
+ )
+ .images
+ )
+ latents = unscale_latents(latents)
+ if not output_type == "latent":
+ image = unscale_image(
+ self.vae.decode(
+ latents / self.vae.config.scaling_factor, return_dict=False
+ )[0]
+ )
+ else:
+ image = latents
+
+ image = self.image_processor.postprocess(image, output_type=output_type)
+ if not return_dict:
+ return (image,)
+
+ return ImagePipelineOutput(images=image)
diff --git a/src/utils.py b/src/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..103af98cd5b492305d62ab27f5619094be381c8d
--- /dev/null
+++ b/src/utils.py
@@ -0,0 +1,46 @@
+import torch
+import os
+from diffusers import DDPMScheduler
+from pipeline import Zero123PlusPipeline
+from huggingface_hub import hf_hub_download
+from PIL import Image
+
+
+def load_z123_pipe(device_number):
+ device = torch.device(
+ f"cuda:{device_number}" if torch.cuda.is_available() else "cpu"
+ )
+
+ pipeline = Zero123PlusPipeline.from_pretrained(
+ "sudo-ai/zero123plus-v1.2", torch_dtype=torch.float16
+ )
+ # DDPM supports custom timesteps
+ pipeline.scheduler = DDPMScheduler.from_config(pipeline.scheduler.config)
+
+ unet_path = "ckpts/diffusion_pytorch_model.bin"
+ # load custom white-background UNet
+ if os.path.exists(unet_path):
+ unet_ckpt_path = unet_path
+ else:
+ unet_ckpt_path = hf_hub_download(
+ repo_id="TencentARC/InstantMesh",
+ filename="diffusion_pytorch_model.bin",
+ repo_type="model",
+ )
+ state_dict = torch.load(unet_ckpt_path, map_location="cpu")
+ pipeline.unet.load_state_dict(state_dict, strict=True)
+
+ pipeline.to(device)
+ return pipeline
+
+
+def add_white_bg(image):
+ # Check if image has transparency (RGBA or LA mode)
+ if image.mode in ("RGBA", "LA"):
+ # Create a white background image of the same size
+ white_bg = Image.new("RGB", image.size, (255, 255, 255))
+ # Paste original image onto white background using alpha channel as mask
+ white_bg.paste(image, mask=image.split()[-1])
+ return white_bg
+ # If no transparency, return the original image
+ return image