Spaces:

liruiw
/

hma

Runtime error

+import gradio as gr
+import spaces
+import gradio as gr
+import numpy as np
+from PIL import Image
+import cv2
+from sim.simulator import GenieSimulator
+import os
+if not os.path.exists("data/mar_ckpt/langtable"):
+    # download from google drive
+    import gdown
+    gdown.download_folder("https://drive.google.com/drive/u/2/folders/1XU87cRqV-IMZA6RLiabIR_uZngynvUFN")
+    os.system("mkdir -p data/mar_ckpt/; mv langtable data/mar_ckpt/")
+RES = 512
+PROMPT_HORIZON = 3
+IMAGE_DIR = "sim/assets/langtable_prompt/"
+# Load available images
+available_images = sorted([img for img in os.listdir(IMAGE_DIR) if img.endswith(".png")])
+# Helper function to reset GenieSimulator with the selected image
+@spaces.GPU
+def initialize_simulator(image_name):
+    global genie
+    image_path = os.path.join(IMAGE_DIR, image_name)
+    image = Image.open(image_path)
+    prompt_image = np.tile(np.array(image), (genie.prompt_horizon, 1, 1, 1)).astype(np.uint8)
+    prompt_action = np.zeros((genie.prompt_horizon - 1, genie.action_stride, 2)).astype(np.float32)
+    genie.set_initial_state((prompt_image, prompt_action))
+    reset_image = genie.reset()
+    reset_image = cv2.resize(reset_image, (RES, RES))
+    return Image.fromarray(reset_image)
+# Example model: takes a direction and returns a random image
+@spaces.GPU
+def model(direction: str):
+    global genie
+    if direction == 'right':
+        action = np.array([0, 0.05])
+    elif direction == 'left':
+        action = np.array([0, -0.05])
+    elif direction == 'down':
+        action = np.array([0.05, 0])
+    elif direction == 'up':
+        action = np.array([-0.05, 0])
+    else:
+        raise ValueError(f"Invalid direction: {direction}")
+    next_image = genie.step(action)['pred_next_frame']
+    next_image = cv2.resize(next_image, (RES, RES))
+    return Image.fromarray(next_image)
+# Gradio function to handle user input
+@spaces.GPU
+def handle_input(direction):
+    print(f"User clicked: {direction}")
+    new_image = model(direction)  # Get a new image from the model
+    return new_image
+# Gradio function to handle image selection
+@spaces.GPU
+def handle_image_selection(image_name):
+    print(f"User selected image: {image_name}")
+    return initialize_simulator(image_name)
+if __name__ == '__main__':
+    genie = GenieSimulator(
+        image_encoder_type='temporalvae',
+        image_encoder_ckpt='stabilityai/stable-video-diffusion-img2vid',
+        quantize=False,
+        backbone_type='stmar',
+        backbone_ckpt='data/mar_ckpt/langtable',
+        prompt_horizon=PROMPT_HORIZON,
+        action_stride=1,
+        domain='language_table',
+    )
+    image = Image.open("sim/assets/langtable_prompt/frame_06.png")
+    prompt_image = np.tile(
+        np.array(image), (genie.prompt_horizon, 1, 1, 1)
+    ).astype(np.uint8)
+    prompt_action = np.zeros(
+        (genie.prompt_horizon, genie.action_stride, 2)
+    ).astype(np.float32)
+    genie.set_initial_state((prompt_image, prompt_action))
+    image = genie.reset()
+    with gr.Blocks() as demo:
+        with gr.Row():
+            image_selector = gr.Dropdown(
+                choices=available_images, value=available_images[0], label="Select an Image"
+            )
+            select_button = gr.Button("Load Image")
+        with gr.Row():
+            image_display = gr.Image(type="pil", label="Generated Image")
+        with gr.Row():
+            up = gr.Button("↑ Up")
+        with gr.Row():
+            left = gr.Button("← Left")
+            down = gr.Button("↓ Down")
+            right = gr.Button("→ Right")
+        # Define interactions
+        select_button.click(
+            fn=handle_image_selection, inputs=image_selector, outputs=image_display
+        )
+        up.click(fn=lambda: handle_input("up"), outputs=image_display, show_progress='hidden')
+        down.click(fn=lambda: handle_input("down"), outputs=image_display, show_progress='hidden')
+        left.click(fn=lambda: handle_input("left"), outputs=image_display, show_progress='hidden')
+        right.click(fn=lambda: handle_input("right"), outputs=image_display, show_progress='hidden')
+    demo.launch()

app.py CHANGED Viewed

@@ -23,23 +23,18 @@ IMAGE_DIR = "sim/assets/langtable_prompt/"
 available_images = sorted([img for img in os.listdir(IMAGE_DIR) if img.endswith(".png")])
-# Helper function to reset GenieSimulator with the selected image
-@spaces.GPU
-def initialize_simulator(image_name):
-    global genie
     image_path = os.path.join(IMAGE_DIR, image_name)
     image = Image.open(image_path)
-    prompt_image = np.tile(np.array(image), (genie.prompt_horizon, 1, 1, 1)).astype(np.uint8)
-    prompt_action = np.zeros((genie.prompt_horizon - 1, genie.action_stride, 2)).astype(np.float32)
-    genie.set_initial_state((prompt_image, prompt_action))
-    reset_image = genie.reset()
     reset_image = cv2.resize(reset_image, (RES, RES))
     return Image.fromarray(reset_image)
-# Example model: takes a direction and returns a random image
-@spaces.GPU
-def model(direction: str):
-    global genie
     if direction == 'right':
         action = np.array([0, 0.05])
     elif direction == 'left':
@@ -50,46 +45,34 @@ def model(direction: str):
         action = np.array([-0.05, 0])
     else:
         raise ValueError(f"Invalid direction: {direction}")
-    next_image = genie.step(action)['pred_next_frame']
     next_image = cv2.resize(next_image, (RES, RES))
     return Image.fromarray(next_image)
-# Gradio function to handle user input
-@spaces.GPU
-def handle_input(direction):
     print(f"User clicked: {direction}")
-    new_image = model(direction)  # Get a new image from the model
     return new_image
-# Gradio function to handle image selection
-@spaces.GPU
-def handle_image_selection(image_name):
     print(f"User selected image: {image_name}")
-    return initialize_simulator(image_name)
 if __name__ == '__main__':
-    genie = GenieSimulator(
-        image_encoder_type='temporalvae',
-        image_encoder_ckpt='stabilityai/stable-video-diffusion-img2vid',
-        quantize=False,
-        backbone_type='stmar',
-        backbone_ckpt='data/mar_ckpt/langtable',
-        prompt_horizon=PROMPT_HORIZON,
-        action_stride=1,
-        domain='language_table',
-    )
-    image = Image.open("sim/assets/langtable_prompt/frame_06.png")
-    prompt_image = np.tile(
-        np.array(image), (genie.prompt_horizon, 1, 1, 1)
-    ).astype(np.uint8)
-    prompt_action = np.zeros(
-        (genie.prompt_horizon, genie.action_stride, 2)
-    ).astype(np.float32)
-    genie.set_initial_state((prompt_image, prompt_action))
-    image = genie.reset()
     with gr.Blocks() as demo:
         with gr.Row():
             image_selector = gr.Dropdown(
                 choices=available_images, value=available_images[0], label="Select an Image"
@@ -106,15 +89,12 @@ if __name__ == '__main__':
             down = gr.Button("↓ Down")
             right = gr.Button("→ Right")
-        # Define interactions
         select_button.click(
-            fn=handle_image_selection, inputs=image_selector, outputs=image_display
         )
-        up.click(fn=lambda: handle_input("up"), outputs=image_display, show_progress='hidden')
-        down.click(fn=lambda: handle_input("down"), outputs=image_display, show_progress='hidden')
-        left.click(fn=lambda: handle_input("left"), outputs=image_display, show_progress='hidden')
-        right.click(fn=lambda: handle_input("right"), outputs=image_display, show_progress='hidden')
-    demo.launch()

 available_images = sorted([img for img in os.listdir(IMAGE_DIR) if img.endswith(".png")])
+def initialize_simulator(image_name, state):
     image_path = os.path.join(IMAGE_DIR, image_name)
     image = Image.open(image_path)
+    prompt_image = np.tile(np.array(image), (state['genie'].prompt_horizon, 1, 1, 1)).astype(np.uint8)
+    prompt_action = np.zeros((state['genie'].prompt_horizon - 1, state['genie'].action_stride, 2)).astype(np.float32)
+    state['genie'].set_initial_state((prompt_image, prompt_action))
+    reset_image = state['genie'].reset()
     reset_image = cv2.resize(reset_image, (RES, RES))
     return Image.fromarray(reset_image)
+def model(direction, state):
     if direction == 'right':
         action = np.array([0, 0.05])
     elif direction == 'left':
         action = np.array([-0.05, 0])
     else:
         raise ValueError(f"Invalid direction: {direction}")
+    next_image = state['genie'].step(action)['pred_next_frame']
     next_image = cv2.resize(next_image, (RES, RES))
     return Image.fromarray(next_image)
+def handle_input(direction, state):
     print(f"User clicked: {direction}")
+    new_image = model(direction, state)
     return new_image
+def handle_image_selection(image_name, state):
     print(f"User selected image: {image_name}")
+    return initialize_simulator(image_name, state)
 if __name__ == '__main__':
     with gr.Blocks() as demo:
+        genie_instance = gr.State({
+            'genie': GenieSimulator(
+                image_encoder_type='temporalvae',
+                image_encoder_ckpt='stabilityai/stable-video-diffusion-img2vid',
+                quantize=False,
+                backbone_type='stmar',
+                backbone_ckpt='data/mar_ckpt/langtable',
+                prompt_horizon=PROMPT_HORIZON,
+                action_stride=1,
+                domain='language_table',
+            )
+        })
         with gr.Row():
             image_selector = gr.Dropdown(
                 choices=available_images, value=available_images[0], label="Select an Image"
             down = gr.Button("↓ Down")
             right = gr.Button("→ Right")
         select_button.click(
+            fn=handle_image_selection, inputs=[image_selector, genie_instance], outputs=image_display, show_progress='hidden'
         )
+        up.click(fn=lambda state: handle_input("up", state), inputs=[genie_instance], outputs=image_display, show_progress='hidden')
+        down.click(fn=lambda state: handle_input("down", state), inputs=[genie_instance], outputs=image_display, show_progress='hidden')
+        left.click(fn=lambda state: handle_input("left", state), inputs=[genie_instance], outputs=image_display, show_progress='hidden')
+        right.click(fn=lambda state: handle_input("right", state), inputs=[genie_instance], outputs=image_display, show_progress='hidden')
+    demo.launch()

common/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (137 Bytes). View file

common/__pycache__/eval_utils.cpython-310.pyc ADDED Viewed

Binary file (4.6 kB). View file

data/mar_ckpt/langtable/random_states_0.pkl CHANGED Viewed

Binary files a/data/mar_ckpt/langtable/random_states_0.pkl and b/data/mar_ckpt/langtable/random_states_0.pkl differ

datasets/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (139 Bytes). View file

datasets/__pycache__/encode_openx_dataset.cpython-310.pyc ADDED Viewed

Binary file (13.1 kB). View file

datasets/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (5.36 kB). View file

genie/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (136 Bytes). View file

genie/__pycache__/attention.cpython-310.pyc ADDED Viewed

Binary file (4.4 kB). View file

genie/__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (4.37 kB). View file

genie/__pycache__/diffloss.cpython-310.pyc ADDED Viewed

Binary file (8.17 kB). View file

genie/__pycache__/factorization_utils.cpython-310.pyc ADDED Viewed

Binary file (4.05 kB). View file

genie/__pycache__/st_mar.cpython-310.pyc ADDED Viewed

Binary file (13.8 kB). View file

genie/__pycache__/st_mask_git.cpython-310.pyc ADDED Viewed

Binary file (20.6 kB). View file

genie/__pycache__/st_transformer.cpython-310.pyc ADDED Viewed

Binary file (5.18 kB). View file

genie/diffusion/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.01 kB). View file

genie/diffusion/__pycache__/diffusion_utils.cpython-310.pyc ADDED Viewed

Binary file (2.26 kB). View file

genie/diffusion/__pycache__/gaussian_diffusion.cpython-310.pyc ADDED Viewed

Binary file (24.3 kB). View file

genie/diffusion/__pycache__/respace.cpython-310.pyc ADDED Viewed

Binary file (4.97 kB). View file

magvit2/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (138 Bytes). View file

magvit2/__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (2.07 kB). View file

magvit2/__pycache__/util.cpython-310.pyc ADDED Viewed

Binary file (1.61 kB). View file

magvit2/models/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (145 Bytes). View file

magvit2/models/__pycache__/lfqgan.cpython-310.pyc ADDED Viewed

Binary file (8.87 kB). View file

magvit2/modules/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (146 Bytes). View file

magvit2/modules/__pycache__/ema.cpython-310.pyc ADDED Viewed

Binary file (3.3 kB). View file

magvit2/modules/__pycache__/util.cpython-310.pyc ADDED Viewed

Binary file (4.45 kB). View file

magvit2/modules/diffusionmodules/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (163 Bytes). View file

magvit2/modules/diffusionmodules/__pycache__/improved_model.cpython-310.pyc ADDED Viewed

Binary file (5.57 kB). View file

magvit2/modules/discriminator/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (160 Bytes). View file

magvit2/modules/discriminator/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (2.34 kB). View file

magvit2/modules/losses/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (222 Bytes). View file

magvit2/modules/losses/__pycache__/lpips.cpython-310.pyc ADDED Viewed

Binary file (5.37 kB). View file

magvit2/modules/losses/__pycache__/vqperceptual.cpython-310.pyc ADDED Viewed

Binary file (7.42 kB). View file

magvit2/modules/scheduler/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (156 Bytes). View file

magvit2/modules/scheduler/__pycache__/lr_scheduler.cpython-310.pyc ADDED Viewed

Binary file (989 Bytes). View file

magvit2/modules/vqvae/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (152 Bytes). View file

magvit2/modules/vqvae/__pycache__/lookup_free_quantize.cpython-310.pyc ADDED Viewed

Binary file (8.29 kB). View file

sim/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (134 Bytes). View file

sim/__pycache__/simulator.cpython-310.pyc ADDED Viewed

Binary file (13.4 kB). View file