Spaces:

liruiw
/

hma

Runtime error

App Files Files Community

liruiw commited on Dec 20, 2024

Commit

240cb6a

1 Parent(s): ca9c8aa

fix

Browse files

Files changed (2) hide show

app copy 2.py +122 -0
app.py +24 -37

app copy 2.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import gradio as gr
+import spaces
+import gradio as gr
+import numpy as np
+from PIL import Image
+import cv2
+from sim.simulator import GenieSimulator
+import os
+if not os.path.exists("data/mar_ckpt/langtable"):
+    # download from google drive
+    import gdown
+    gdown.download_folder("https://drive.google.com/drive/u/2/folders/1XU87cRqV-IMZA6RLiabIR_uZngynvUFN")
+    os.system("mkdir -p data/mar_ckpt/; mv langtable data/mar_ckpt/")
+RES = 512
+PROMPT_HORIZON = 3
+IMAGE_DIR = "sim/assets/langtable_prompt/"
+# Load available images
+available_images = sorted([img for img in os.listdir(IMAGE_DIR) if img.endswith(".png")])
+# Helper function to reset GenieSimulator with the selected image
+@spaces.GPU
+def initialize_simulator(image_name, genie):
+    image_path = os.path.join(IMAGE_DIR, image_name)
+    image = Image.open(image_path)
+    prompt_image = np.tile(np.array(image), (genie.prompt_horizon, 1, 1, 1)).astype(np.uint8)
+    prompt_action = np.zeros((genie.prompt_horizon - 1, genie.action_stride, 2)).astype(np.float32)
+    genie.set_initial_state((prompt_image, prompt_action))
+    reset_image = genie.reset()
+    reset_image = cv2.resize(reset_image, (RES, RES))
+    return Image.fromarray(reset_image)
+@spaces.GPU
+def model(direction, genie):
+    if direction == 'right':
+        action = np.array([0, 0.05])
+    elif direction == 'left':
+        action = np.array([0, -0.05])
+    elif direction == 'down':
+        action = np.array([0.05, 0])
+    elif direction == 'up':
+        action = np.array([-0.05, 0])
+    else:
+        raise ValueError(f"Invalid direction: {direction}")
+    next_image = genie.step(action)['pred_next_frame']
+    next_image = cv2.resize(next_image, (RES, RES))
+    return Image.fromarray(next_image)
+@spaces.GPU
+def handle_input(direction):
+    print(f"User clicked: {direction}")
+    new_image = genie(direction)
+    return new_image
+@spaces.GPU
+def handle_image_selection(image_name, state):
+    print(f"User selected image: {image_name}")
+    return initialize_simulator(image_name, state)
+genie =  GenieSimulator(
+            image_encoder_type='temporalvae',
+            image_encoder_ckpt='stabilityai/stable-video-diffusion-img2vid',
+            quantize=False,
+            backbone_type='stmar',
+            backbone_ckpt='data/mar_ckpt/langtable',
+            prompt_horizon=PROMPT_HORIZON,
+            action_stride=1,
+            domain='language_table',
+            device="cuda"
+        )
+image = Image.open("sim/assets/langtable_prompt/frame_06.png")
+prompt_image = np.tile(
+    np.array(image), (genie.prompt_horizon, 1, 1, 1)
+).astype(np.uint8)
+prompt_action = np.zeros(
+    (genie.prompt_horizon, genie.action_stride, 2)
+).astype(np.float32)
+genie.set_initial_state((prompt_image, prompt_action))
+genie.device = "cuda"
+if __name__ == '__main__':
+    with gr.Blocks() as demo:
+        genie.device = "cuda"
+        with gr.Row():
+            gr.Textbox(label='HMA Demo: Select a prompt initial image from the gallery and Interact with arrow keys. \n'
+            'Note: the speed is limited due to free GPU in HF and the interface supports one user at a time.', lines=1)
+        with gr.Row():
+            image_selector = gr.Dropdown(
+                choices=available_images, value=available_images[0], label="Select an Image"
+            )
+            select_button = gr.Button("Load Image")
+        with gr.Row():
+            image_display = gr.Image(type="pil", label="Generated Image")
+        with gr.Row():
+            up = gr.Button("↑ Up")
+        with gr.Row():
+            left = gr.Button("← Left")
+            down = gr.Button("↓ Down")
+            right = gr.Button("→ Right")
+        # Define interactions
+        select_button.click(
+            fn=handle_image_selection, inputs=[image_selector, genie], outputs=image_display, show_progress='hidden'
+        )
+        up.click(fn=lambda: handle_input("up"), outputs=image_display, show_progress='hidden')
+        down.click(fn=lambda: handle_input("down"), outputs=image_display, show_progress='hidden')
+        left.click(fn=lambda: handle_input("left"), outputs=image_display, show_progress='hidden')
+        right.click(fn=lambda: handle_input("right"), outputs=image_display, show_progress='hidden')
+    demo.launch(share=True)

app.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import gradio as gr
 import spaces
-import gradio as gr
 import numpy as np
 from PIL import Image
 import cv2
 from sim.simulator import GenieSimulator
 import os
 if not os.path.exists("data/mar_ckpt/langtable"):
     # download from google drive
@@ -23,12 +23,20 @@ IMAGE_DIR = "sim/assets/langtable_prompt/"
 available_images = sorted([img for img in os.listdir(IMAGE_DIR) if img.endswith(".png")])
 # Helper function to reset GenieSimulator with the selected image
 @spaces.GPU
-def initialize_simulator(image_name, genie):
     image_path = os.path.join(IMAGE_DIR, image_name)
     image = Image.open(image_path)
     prompt_image = np.tile(np.array(image), (genie.prompt_horizon, 1, 1, 1)).astype(np.uint8)
@@ -38,8 +46,9 @@ def initialize_simulator(image_name, genie):
     reset_image = cv2.resize(reset_image, (RES, RES))
     return Image.fromarray(reset_image)
 @spaces.GPU
-def model(direction, genie):
     if direction == 'right':
         action = np.array([0, 0.05])
     elif direction == 'left':
@@ -54,48 +63,25 @@ def model(direction, genie):
     next_image = cv2.resize(next_image, (RES, RES))
     return Image.fromarray(next_image)
 @spaces.GPU
 def handle_input(direction):
     print(f"User clicked: {direction}")
-    new_image = genie(direction)
     return new_image
 @spaces.GPU
-def handle_image_selection(image_name, state):
     print(f"User selected image: {image_name}")
-    return initialize_simulator(image_name, state)
-genie =  GenieSimulator(
-            image_encoder_type='temporalvae',
-            image_encoder_ckpt='stabilityai/stable-video-diffusion-img2vid',
-            quantize=False,
-            backbone_type='stmar',
-            backbone_ckpt='data/mar_ckpt/langtable',
-            prompt_horizon=PROMPT_HORIZON,
-            action_stride=1,
-            domain='language_table',
-            device="cuda"
-        )
-image = Image.open("sim/assets/langtable_prompt/frame_06.png")
-prompt_image = np.tile(
-    np.array(image), (genie.prompt_horizon, 1, 1, 1)
-).astype(np.uint8)
-prompt_action = np.zeros(
-    (genie.prompt_horizon, genie.action_stride, 2)
-).astype(np.float32)
-genie.set_initial_state((prompt_image, prompt_action))
-genie.device = "cuda"
 if __name__ == '__main__':
     with gr.Blocks() as demo:
-        genie_instance = gr.State({'genie': genie})
-        genie.device = "cuda"
         with gr.Row():
             gr.Textbox(label='HMA Demo: Select a prompt initial image from the gallery and Interact with arrow keys. \n'
-            'Note: the speed is limited due to free GPU in HF and the interface ', lines=1)
         with gr.Row():
             image_selector = gr.Dropdown(
                 choices=available_images, value=available_images[0], label="Select an Image"
@@ -114,10 +100,11 @@ if __name__ == '__main__':
         # Define interactions
         select_button.click(
-            fn=handle_image_selection, inputs=[image_selector, genie], outputs=image_display, show_progress='hidden'
         )
         up.click(fn=lambda: handle_input("up"), outputs=image_display, show_progress='hidden')
         down.click(fn=lambda: handle_input("down"), outputs=image_display, show_progress='hidden')
         left.click(fn=lambda: handle_input("left"), outputs=image_display, show_progress='hidden')
         right.click(fn=lambda: handle_input("right"), outputs=image_display, show_progress='hidden')
     demo.launch(share=True)

 import gradio as gr
 import spaces
 import numpy as np
 from PIL import Image
 import cv2
 from sim.simulator import GenieSimulator
 import os
+import spaces
 if not os.path.exists("data/mar_ckpt/langtable"):
     # download from google drive
 available_images = sorted([img for img in os.listdir(IMAGE_DIR) if img.endswith(".png")])
+genie = GenieSimulator(
+    image_encoder_type='temporalvae',
+    image_encoder_ckpt='stabilityai/stable-video-diffusion-img2vid',
+    quantize=False,
+    backbone_type='stmar',
+    backbone_ckpt='data/mar_ckpt_long2/langtable',
+    prompt_horizon=PROMPT_HORIZON,
+    action_stride=1,
+    domain='language_table',
+)
 # Helper function to reset GenieSimulator with the selected image
 @spaces.GPU
+def initialize_simulator(image_name):
     image_path = os.path.join(IMAGE_DIR, image_name)
     image = Image.open(image_path)
     prompt_image = np.tile(np.array(image), (genie.prompt_horizon, 1, 1, 1)).astype(np.uint8)
     reset_image = cv2.resize(reset_image, (RES, RES))
     return Image.fromarray(reset_image)
+# Example model: takes a direction and returns a random image
 @spaces.GPU
+def model(direction: str):
     if direction == 'right':
         action = np.array([0, 0.05])
     elif direction == 'left':
     next_image = cv2.resize(next_image, (RES, RES))
     return Image.fromarray(next_image)
+# Gradio function to handle user input
 @spaces.GPU
 def handle_input(direction):
     print(f"User clicked: {direction}")
+    new_image = model(direction)  # Get a new image from the model
     return new_image
+# Gradio function to handle image selection
 @spaces.GPU
+def handle_image_selection(image_name):
     print(f"User selected image: {image_name}")
+    return initialize_simulator(image_name)
 if __name__ == '__main__':
     with gr.Blocks() as demo:
         with gr.Row():
             gr.Textbox(label='HMA Demo: Select a prompt initial image from the gallery and Interact with arrow keys. \n'
+            'Note: the speed is limited due to free GPU in HF and the interface supports one user at a time.', lines=1)
         with gr.Row():
             image_selector = gr.Dropdown(
                 choices=available_images, value=available_images[0], label="Select an Image"
         # Define interactions
         select_button.click(
+            fn=handle_image_selection, inputs=image_selector, outputs=image_display
         )
         up.click(fn=lambda: handle_input("up"), outputs=image_display, show_progress='hidden')
         down.click(fn=lambda: handle_input("down"), outputs=image_display, show_progress='hidden')
         left.click(fn=lambda: handle_input("left"), outputs=image_display, show_progress='hidden')
         right.click(fn=lambda: handle_input("right"), outputs=image_display, show_progress='hidden')
     demo.launch(share=True)