Spaces:

alexnasa
/

Chain-of-Zoom

Running on Zero

App Files Files Community

alexnasa commited on May 31

Commit

f6e8319

verified ·

1 Parent(s): d73c075

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -174

app.py CHANGED Viewed

@@ -1,137 +1,89 @@
-import gradio as gr
-import subprocess
 import os
 import shutil
 from pathlib import Path
-from PIL import Image, ImageDraw
 import spaces
-# ------------------------------------------------------------------
-# CONFIGURE THESE PATHS TO MATCH YOUR PROJECT STRUCTURE
-# ------------------------------------------------------------------
 INPUT_DIR   = "samples"
 OUTPUT_DIR  = "inference_results/coz_vlmprompt"
-# ------------------------------------------------------------------
-# HELPER: Resize & center-crop to 512, preserving aspect ratio
-# ------------------------------------------------------------------
 def resize_and_center_crop(img: Image.Image, size: int) -> Image.Image:
-    """
-    Resize the input PIL image so that its shorter side == `size`,
-    then center-crop to exactly (size x size).
-    """
     w, h = img.size
     scale = size / min(w, h)
     new_w, new_h = int(w * scale), int(h * scale)
     img = img.resize((new_w, new_h), Image.LANCZOS)
     left = (new_w - size) // 2
     top  = (new_h - size) // 2
     return img.crop((left, top, left + size, top + size))
-# ------------------------------------------------------------------
-# HELPER: Draw four concentric, centered rectangles on a 512×512 image
-# ------------------------------------------------------------------
 def make_preview_with_boxes(image_path: str, scale_option: str) -> Image.Image:
-    """
-    1) Open the uploaded image from disk.
-    2) Resize & center-crop it to exactly 512×512.
-    3) Depending on scale_option ("1x","2x","4x"), compute four rectangle sizes:
-       - "1x": [512, 512, 512, 512]
-       - "2x": [256, 128, 64, 32]
-       - "4x": [128, 64, 32, 16]
-    4) Draw each of those four rectangles (outline only), all centered.
-    5) Return the modified PIL image.
-    """
     try:
         orig = Image.open(image_path).convert("RGB")
     except Exception as e:
-        # If something fails, return a plain 512×512 gray image as fallback
         fallback = Image.new("RGB", (512, 512), (200, 200, 200))
         draw = ImageDraw.Draw(fallback)
         draw.text((20, 20), f"Error:\n{e}", fill="red")
         return fallback
-    # 1. Resize & center-crop to 512×512
-    base = resize_and_center_crop(orig, 512)  # now `base.size == (512,512)`
-    # 2. Determine the four box sizes
-    scale_int = int(scale_option.replace("x", ""))  # e.g. "2x" -> 2
-    if scale_int == 1:
-        sizes = [512, 512, 512, 512]
-    else:
-        # For scale=2: sizes = [512//2, 512//(2*2), 512//(2*4), 512//(2*8)] -> [256,128,64,32]
-        # For scale=4: sizes = [512//4, 512//(4*2), 512//(4*4), 512//(4*8)] -> [128,64,32,16]
-        sizes = [512 // (scale_int * (2 ** i)) for i in range(4)]
     draw = ImageDraw.Draw(base)
-    # 3. Outline color cycle (you can change these or use just one color)
     colors = ["red", "lime", "cyan", "yellow"]
-    width = 3  # thickness of each rectangle’s outline
     for idx, s in enumerate(sizes):
-        # Compute top-left corner so that box is centered in 512×512
         x0 = (512 - s) // 2
         y0 = (512 - s) // 2
         x1 = x0 + s
         y1 = y0 + s
-        draw.rectangle([(x0, y0), (x1, y1)], outline=colors[idx % len(colors)], width=width)
     return base
-# ------------------------------------------------------------------
-# HELPER FUNCTIONS FOR INFERENCE & CAPTION (unchanged from your original)
-# ------------------------------------------------------------------
 @spaces.GPU(duration=120)
-def run_with_upload(uploaded_image_path, upscale_option):
     """
-    1) Clear INPUT_DIR
-    2) Save the uploaded file as input.png in INPUT_DIR
-    3) Read `upscale_option` (e.g. "1x", "2x", "4x") → turn it into "1","2","4"
-    4) Call inference_coz.py with `--upscale <that_value>`
-    5) Return the FOUR output‐PNG file‐paths as a Python list, so that Gradio's Gallery
-       can display them.
     """
-    # ————————————————————————————————————————————————————————————
-    # (Copy‐paste exactly your existing code here; no changes needed)
-    # ————————————————————————————————————————————————————————————
-    os.makedirs(INPUT_DIR, exist_ok=True)
-    for fn in os.listdir(INPUT_DIR):
-        full_path = os.path.join(INPUT_DIR, fn)
-        try:
-            if os.path.isfile(full_path) or os.path.islink(full_path):
-                os.remove(full_path)
-            elif os.path.isdir(full_path):
-                shutil.rmtree(full_path)
-        except Exception as e:
-            print(f"Warning: could not delete {full_path}: {e}")
     if uploaded_image_path is None:
         return []
     try:
         pil_img = Image.open(uploaded_image_path).convert("RGB")
-    except Exception as e:
-        print(f"Error: could not open uploaded image: {e}")
-        return []
-    save_path = Path(INPUT_DIR) / "input.png"
-    try:
         pil_img.save(save_path, format="PNG")
     except Exception as e:
-        print(f"Error: could not save as PNG: {e}")
         return []
-    upscale_value = upscale_option.replace("x", "")  # e.g. "2x" → "2"
     cmd = [
         "python", "inference_coz.py",
-        "-i", INPUT_DIR,
-        "-o", OUTPUT_DIR,
         "--rec_type", "recursive_multiscale",
         "--prompt_type", "vlm",
         "--upscale", upscale_value,
@@ -147,32 +99,23 @@ def run_with_upload(uploaded_image_path, upscale_option):
         print("Inference failed:", err)
         return []
-    per_sample_dir = os.path.join(OUTPUT_DIR, "per-sample", "input")
-    expected_files = [
-        os.path.join(per_sample_dir, f"{i}.png")
-        for i in range(1, 5)
-    ]
     for fp in expected_files:
         if not os.path.isfile(fp):
             print(f"Warning: expected file not found: {fp}")
             return []
     return expected_files
 def get_caption(src_gallery, evt: gr.SelectData):
-    """
-    Given a clicked‐on image in the gallery, read the corresponding .txt in
-    .../per-sample/input/txt and return its contents.
-    """
     if not src_gallery or not os.path.isfile(src_gallery[evt.index][0]):
         return "No caption available."
     selected_image_path = src_gallery[evt.index][0]
     base = os.path.basename(selected_image_path)  # e.g. "2.png"
     stem = os.path.splitext(base)[0]              # e.g. "2"
-    txt_folder = os.path.join(OUTPUT_DIR, "per-sample", "input", "txt")
     txt_path = os.path.join(txt_folder, f"{int(stem) - 1}.txt")
     if not os.path.isfile(txt_path):
         return f"Caption file not found: {int(stem) - 1}.txt"
     try:
@@ -182,11 +125,6 @@ def get_caption(src_gallery, evt: gr.SelectData):
     except Exception as e:
         return f"Error reading caption: {e}"
-# ------------------------------------------------------------------
-# BUILD THE GRADIO INTERFACE (with updated callbacks)
-# ------------------------------------------------------------------
 css = """
 #col-container {
     margin: 0 auto;
@@ -195,7 +133,6 @@ css = """
 """
 with gr.Blocks(css=css) as demo:
     gr.HTML(
         """
         <div style="text-align: center;">
@@ -212,98 +149,39 @@ with gr.Blocks(css=css) as demo:
     )
     with gr.Column(elem_id="col-container"):
         with gr.Row():
             with gr.Column():
-                # 1) Image upload component
-                upload_image = gr.Image(
-                    label="Upload your input image",
-                    type="filepath"
-                )
-                # 2) Radio for choosing 1× / 2× / 4× upscaling
-                upscale_radio = gr.Radio(
-                    choices=["1x", "2x", "4x"],
-                    value="2x",
-                    show_label=False
-                )
-                # 3) Button to launch inference
                 run_button = gr.Button("Chain-of-Zoom it")
-                # 4) Show the 512×512 preview with four centered rectangles
-                preview_with_box = gr.Image(
-                    label="Preview (512×512 with centered boxes)",
-                    type="pil",        # we’ll return a PIL.Image from our function
-                    interactive=False
-                )
             with gr.Column():
-                # 5) Gallery to display multiple output images
-                output_gallery = gr.Gallery(
-                    label="Inference Results",
-                    show_label=True,
-                    elem_id="gallery",
-                    columns=[2], rows=[2]
-                )
-                # 6) Textbox under the gallery for showing captions
-                caption_text = gr.Textbox(
-                    label="Caption",
-                    lines=4,
-                    placeholder="Click on any image above to see its caption here."
-                )
-        # ------------------------------------------------------------------
-        # CALLBACK #1: Whenever the user uploads or changes the radio, update preview
-        # ------------------------------------------------------------------
-        def update_preview(img_path, scale_opt):
-            """
-            If there's no image uploaded yet, return None (Gradio will show blank).
-            Otherwise, draw the resized 512×512 + four boxes and return it.
-            """
-            if img_path is None:
-                return None
-            return make_preview_with_boxes(img_path, scale_opt)
-        # When the user uploads a new file:
         upload_image.change(
-            fn=update_preview,
             inputs=[upload_image, upscale_radio],
             outputs=[preview_with_box]
         )
-        # Also trigger preview redraw if they switch 1×/2×/4× after uploading:
         upscale_radio.change(
-            fn=update_preview,
             inputs=[upload_image, upscale_radio],
             outputs=[preview_with_box]
         )
-        # ------------------------------------------------------------------
-        # CALLBACK #2: When “Chain-of-Zoom it” is clicked, run inference
-        # ------------------------------------------------------------------
         run_button.click(
             fn=run_with_upload,
-            inputs=[upload_image, upscale_radio],
             outputs=[output_gallery]
         )
-        # ------------------------------------------------------------------
-        # CALLBACK #3: When an image in the gallery is clicked, show its caption
-        # ------------------------------------------------------------------
         output_gallery.select(
             fn=get_caption,
             inputs=[output_gallery],
             outputs=[caption_text]
         )
-# ------------------------------------------------------------------
-# START THE GRADIO SERVER
-# ------------------------------------------------------------------
 demo.launch(share=True)

 import os
 import shutil
+import subprocess
 from pathlib import Path
+from PIL import Image
+import gradio as gr
 import spaces
 INPUT_DIR   = "samples"
 OUTPUT_DIR  = "inference_results/coz_vlmprompt"
 def resize_and_center_crop(img: Image.Image, size: int) -> Image.Image:
     w, h = img.size
     scale = size / min(w, h)
     new_w, new_h = int(w * scale), int(h * scale)
     img = img.resize((new_w, new_h), Image.LANCZOS)
     left = (new_w - size) // 2
     top  = (new_h - size) // 2
     return img.crop((left, top, left + size, top + size))
 def make_preview_with_boxes(image_path: str, scale_option: str) -> Image.Image:
     try:
         orig = Image.open(image_path).convert("RGB")
     except Exception as e:
         fallback = Image.new("RGB", (512, 512), (200, 200, 200))
+        from PIL import ImageDraw
         draw = ImageDraw.Draw(fallback)
         draw.text((20, 20), f"Error:\n{e}", fill="red")
         return fallback
+    base = resize_and_center_crop(orig, 512)
+    scale_int = int(scale_option.replace("x", ""))
+    if scale_int == 1: sizes = [512] * 4
+    else: sizes = [512 // (scale_int * (2 ** i)) for i in range(4)]
+    from PIL import ImageDraw
     draw = ImageDraw.Draw(base)
     colors = ["red", "lime", "cyan", "yellow"]
+    width = 3
     for idx, s in enumerate(sizes):
         x0 = (512 - s) // 2
         y0 = (512 - s) // 2
         x1 = x0 + s
         y1 = y0 + s
+        draw.rectangle([(x0, y0), (x1, y1)], outline=colors[idx], width=width)
     return base
 @spaces.GPU(duration=120)
+def run_with_upload(uploaded_image_path, upscale_option, session_id=None):
     """
+    Each invocation creates/uses:
+      - samples/<session_id>/input.png   ← user’s uploaded image
+      - inference_results/coz_vlmprompt/<session_id>/per-sample/input/*.png ← inference outputs
     """
     if uploaded_image_path is None:
         return []
+    # 1) Prepare a per-session input directory
+    print(session_id)
+    session_folder = os.path.join(INPUT_DIR, str(session_id))
+    os.makedirs(session_folder, exist_ok=True)
+    # 2) Clear only this session’s folder
+    for fn in os.listdir(session_folder):
+        full_path = os.path.join(session_folder, fn)
+        if os.path.isfile(full_path) or os.path.islink(full_path):
+            os.remove(full_path)
+        elif os.path.isdir(full_path):
+            shutil.rmtree(full_path)
+    # 3) Save uploaded image to session_folder/input.png
     try:
         pil_img = Image.open(uploaded_image_path).convert("RGB")
+        save_path = Path(session_folder) / "input.png"
         pil_img.save(save_path, format="PNG")
     except Exception as e:
+        print(f"Error: could not save uploaded image: {e}")
         return []
+    # 4) Define a per-session output directory
+    session_output_dir = os.path.join(OUTPUT_DIR, str(session_id))
+    os.makedirs(session_output_dir, exist_ok=True)
+    # 5) Build and run the inference command
+    upscale_value = upscale_option.replace("x", "")
     cmd = [
         "python", "inference_coz.py",
+        "-i", session_folder,
+        "-o", session_output_dir,
         "--rec_type", "recursive_multiscale",
         "--prompt_type", "vlm",
         "--upscale", upscale_value,
         print("Inference failed:", err)
         return []
+    # 6) Gather output file paths (1.png through 4.png)
+    per_sample_dir = os.path.join(session_output_dir, "per-sample", "input")
+    expected_files = [os.path.join(per_sample_dir, f"{i}.png") for i in range(1, 5)]
     for fp in expected_files:
         if not os.path.isfile(fp):
             print(f"Warning: expected file not found: {fp}")
             return []
     return expected_files
 def get_caption(src_gallery, evt: gr.SelectData):
     if not src_gallery or not os.path.isfile(src_gallery[evt.index][0]):
         return "No caption available."
     selected_image_path = src_gallery[evt.index][0]
     base = os.path.basename(selected_image_path)  # e.g. "2.png"
     stem = os.path.splitext(base)[0]              # e.g. "2"
+    txt_folder = os.path.join(OUTPUT_DIR, str(evt.index), "per-sample", "input", "txt")
     txt_path = os.path.join(txt_folder, f"{int(stem) - 1}.txt")
     if not os.path.isfile(txt_path):
         return f"Caption file not found: {int(stem) - 1}.txt"
     try:
     except Exception as e:
         return f"Error reading caption: {e}"
 css = """
 #col-container {
     margin: 0 auto;
 """
 with gr.Blocks(css=css) as demo:
     gr.HTML(
         """
         <div style="text-align: center;">
     )
     with gr.Column(elem_id="col-container"):
         with gr.Row():
             with gr.Column():
+                upload_image = gr.Image(label="Upload your input image", type="filepath")
+                upscale_radio = gr.Radio(choices=["1x", "2x", "4x"], value="2x", show_label=False)
                 run_button = gr.Button("Chain-of-Zoom it")
+                preview_with_box = gr.Image(label="Preview (512×512 with centered boxes)", type="pil", interactive=False)
             with gr.Column():
+                output_gallery = gr.Gallery(label="Inference Results", show_label=True, columns=[2], rows=[2])
+                caption_text = gr.Textbox(label="Caption", lines=4, placeholder="Click on any image above to see its caption here.")
         upload_image.change(
+            fn=lambda img_path, scale_opt: make_preview_with_boxes(img_path, scale_opt) if img_path is not None else None,
             inputs=[upload_image, upscale_radio],
             outputs=[preview_with_box]
         )
         upscale_radio.change(
+            fn=lambda img_path, scale_opt: make_preview_with_boxes(img_path, scale_opt) if img_path is not None else None,
             inputs=[upload_image, upscale_radio],
             outputs=[preview_with_box]
         )
+        # Note: gr.State() will pass session_id automatically
         run_button.click(
             fn=run_with_upload,
+            inputs=[upload_image, upscale_radio, gr.State()],
             outputs=[output_gallery]
         )
         output_gallery.select(
             fn=get_caption,
             inputs=[output_gallery],
             outputs=[caption_text]
         )
 demo.launch(share=True)