Spaces:

alexnasa
/

Chain-of-Zoom

Running on Zero

App Files Files Community

alexnasa commited on 10 days ago

Commit

24ee135

verified ·

1 Parent(s): 80bb1dc

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -54

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from pathlib import Path
 from PIL import Image
 import spaces
 # -----------------------------------------------------------------------------
 # CONFIGURE THESE PATHS TO MATCH YOUR PROJECT STRUCTURE
 # -----------------------------------------------------------------------------
@@ -18,13 +19,13 @@ OUTPUT_DIR = "inference_results/coz_vlmprompt"
 # -----------------------------------------------------------------------------
 @spaces.GPU()
-def run_with_upload(uploaded_image_path):
     """
-    1) Clear out INPUT_DIR (so old samples don’t linger).
-    2) Copy the uploaded image into INPUT_DIR.
-    3) Run your inference_coz.py command (which reads from -i INPUT_DIR).
-    4) After it finishes, find the most recently‐modified PNG in OUTPUT_DIR.
-    5) Return a PIL.Image, which Gradio will display.
     """
     # 1) Make sure INPUT_DIR exists; if it does, delete everything inside.
@@ -61,13 +62,15 @@ def run_with_upload(uploaded_image_path):
     # 3) Build and run your inference_coz.py command.
     #    This will block until it completes.
     cmd = [
         "python", "inference_coz.py",
         "-i", INPUT_DIR,
         "-o", OUTPUT_DIR,
         "--rec_type", "recursive_multiscale",
         "--prompt_type", "vlm",
-        "--upscale", "2",
         "--lora_path", "ckpt/SR_LoRA/model_20001.pkl",
         "--vae_path", "ckpt/SR_VAE/vae_encoder_20001.pt",
         "--pretrained_model_name_or_path", "stabilityai/stable-diffusion-3-medium-diffusers",
@@ -81,41 +84,49 @@ def run_with_upload(uploaded_image_path):
         print("Inference failed:", err)
         return None
-    # 4) After it finishes, scan OUTPUT_DIR for .png files.
-    RECUSIVE_DIR = f'{OUTPUT_DIR}/recursive'
-    if not os.path.isdir(RECUSIVE_DIR):
-        return None
-    png_files = [
-        os.path.join(RECUSIVE_DIR, fn)
-        for fn in os.listdir(RECUSIVE_DIR)
-        if fn.lower().endswith(".png")
-    ]
-    if not png_files:
         return None
-    # 5) Pick the most recently‐modified PNG
-    latest_png = max(png_files, key=os.path.getmtime)
-    # 6) Open and return a PIL.Image. Gradio will display it automatically.
-    try:
-        img = Image.open(latest_png).convert("RGB")
-    except Exception as e:
-        print(f"Error opening {latest_png}: {e}")
-        return None
-    return img
-# -----------------------------------------------------------------------------
 # BUILD THE GRADIO INTERFACE
 # -----------------------------------------------------------------------------
 css="""
 #col-container {
     margin: 0 auto;
-    max-width: 720px;
 }
 """
@@ -138,32 +149,43 @@ with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        # 1) Image upload component. We set type="filepath" so the callback
-        #    (run_with_upload) will receive a local path to the uploaded file.
-        upload_image = gr.Image(
-            label="Upload your input image",
-            type="filepath"
-        )
-        # 2) A button that the user will click to launch inference.
-        run_button = gr.Button("Run Inference")
-        # 3) An output <Image> where we will show the final PNG.
-        output_image = gr.Image(
-            label="Inference Result",
-            type="pil"          # because run_with_upload() returns a PIL.Image
-        )
-        # Wire the button: when clicked, call run_with_upload(upload_image), put
-        # its return value into output_image.
-        run_button.click(
-            fn=run_with_upload,
-            inputs=upload_image,
-            outputs=output_image
-        )
 # -----------------------------------------------------------------------------
 # START THE GRADIO SERVER
 # -----------------------------------------------------------------------------
-demo.launch(share=True)

 from PIL import Image
 import spaces
 # -----------------------------------------------------------------------------
 # CONFIGURE THESE PATHS TO MATCH YOUR PROJECT STRUCTURE
 # -----------------------------------------------------------------------------
 # -----------------------------------------------------------------------------
 @spaces.GPU()
+def run_with_upload(uploaded_image_path, upscale_option):
     """
+    1) Clear INPUT_DIR
+    2) Save the uploaded file as input.png in INPUT_DIR
+    3) Read `upscale_option` (e.g. "1x", "2x", "4x") → turn it into "1", "2", or "4"
+    4) Call inference_coz.py with `--upscale <that_value>`
+    5) (Here we assume you still stitch together 1.png–4.png, or however you want.)
     """
     # 1) Make sure INPUT_DIR exists; if it does, delete everything inside.
     # 3) Build and run your inference_coz.py command.
     #    This will block until it completes.
+    upscale_value = upscale_option.replace("x", "")  # e.g. "2x" → "2"
     cmd = [
         "python", "inference_coz.py",
         "-i", INPUT_DIR,
         "-o", OUTPUT_DIR,
         "--rec_type", "recursive_multiscale",
         "--prompt_type", "vlm",
+        "--upscale", upscale_value,
         "--lora_path", "ckpt/SR_LoRA/model_20001.pkl",
         "--vae_path", "ckpt/SR_VAE/vae_encoder_20001.pt",
         "--pretrained_model_name_or_path", "stabilityai/stable-diffusion-3-medium-diffusers",
         print("Inference failed:", err)
         return None
+    # -------------------------------------------------------------------------
+    # 4) After inference, look for the four numbered PNGs and stitch them
+    # -------------------------------------------------------------------------
+    per_sample_dir = os.path.join(OUTPUT_DIR, "per-sample", "input")
+    expected_files = [os.path.join(per_sample_dir, f"{i}.png") for i in range(1, 5)]
+    pil_images = []
+    for fp in expected_files:
+        if not os.path.isfile(fp):
+            print(f"Warning: expected file not found: {fp}")
+            return None
+        try:
+            img = Image.open(fp).convert("RGB")
+            pil_images.append(img)
+        except Exception as e:
+            print(f"Error opening {fp}: {e}")
+            return None
+    if len(pil_images) != 4:
+        print(f"Error: found {len(pil_images)} images, but need 4.")
         return None
+    widths, heights = zip(*(im.size for im in pil_images))
+    w, h = widths[0], heights[0]
+    grid_w = w * 2
+    grid_h = h * 2
+    # composite = Image.new("RGB", (grid_w, grid_h))
+    # composite.paste(pil_images[0], (0,    0))
+    # composite.paste(pil_images[1], (w,    0))
+    # composite.paste(pil_images[2], (0,    h))
+    # composite.paste(pil_images[3], (w,    h))
+    return [pil_images[0], pil_images[1], pil_images[2], pil_images[3]]
+# -------------------------------------------------------------
 # BUILD THE GRADIO INTERFACE
 # -----------------------------------------------------------------------------
 css="""
 #col-container {
     margin: 0 auto;
+    max-width: 1024px;
 }
 """
     with gr.Column(elem_id="col-container"):
+        with gr.Row():
+          with gr.Column():
+            # 1) Image upload component. We set type="filepath" so the callback
+            #    (run_with_upload) will receive a local path to the uploaded file.
+            upload_image = gr.Image(
+                label="Upload your input image",
+                type="filepath"
+            )
+            # 2) Radio for choosing 1× / 2× / 4× upscaling
+            upscale_radio = gr.Radio(
+                choices=["1x", "2x", "4x"],
+                value="2x",
+                show_label=False
+            )
+            # 2) A button that the user will click to launch inference.
+            run_button = gr.Button("Chain-of-Zoom it")
+          # (3) Gallery to display multiple output images
+          output_gallery = gr.Gallery(
+              label="Inference Results",
+              show_label=True,
+              elem_id="gallery",
+              columns=[2], rows=[2]
+          )
+          # Wire the button: when clicked, call run_with_upload(upload_image), put
+          # its return value into output_image.
+          run_button.click(
+              fn=run_with_upload,
+              inputs=[upload_image, upscale_radio],
+              outputs=output_gallery
+          )
 # -----------------------------------------------------------------------------
 # START THE GRADIO SERVER
 # -----------------------------------------------------------------------------
+demo.launch(share=True)