Spaces:

liruiw
/

hma

Runtime error

liruiw commited on Dec 16, 2024

Commit

8d2f46e

1 Parent(s): e176061

fix

Files changed (2) hide show

app.py CHANGED Viewed

@@ -80,6 +80,17 @@ if __name__ == '__main__':
         )
     with gr.Blocks() as demo:
         with gr.Row():
             image_selector = gr.Dropdown(
                 choices=available_images, value=available_images[0], label="Select an Image"

         )
     with gr.Blocks() as demo:
+        image = Image.open("sim/assets/langtable_prompt/frame_06.png")
+        prompt_image = np.tile(
+            np.array(image), (genie.prompt_horizon, 1, 1, 1)
+        ).astype(np.uint8)
+        prompt_action = np.zeros(
+            (genie.prompt_horizon, genie.action_stride, 2)
+        ).astype(np.float32)
+        genie.set_initial_state((prompt_image, prompt_action))
+        image = genie.reset()
         with gr.Row():
             image_selector = gr.Dropdown(
                 choices=available_images, value=available_images[0], label="Select an Image"

sim/simulator.py CHANGED Viewed

@@ -5,6 +5,7 @@ import einops
 import skimage
 import time
 from genie.st_mask_git import STMaskGIT
 from genie.st_mar import STMAR
 from datasets.utils import get_image_encoder
@@ -229,7 +230,7 @@ class GenieSimulator(LearnedSimulator):
     def set_initial_state(self, state: Tuple[np.ndarray, np.ndarray]):
         self.init_prompt = state
     @torch.inference_mode()
     def step(self, action: np.ndarray) -> Dict:
         # action: (action_stride, A) OR (A,)
@@ -364,7 +365,7 @@ class GenieSimulator(LearnedSimulator):
         return step_result
     @torch.inference_mode()
     def _encode_image(self, image: np.ndarray) -> torch.Tensor:
         # (H, W, 3)
@@ -396,7 +397,7 @@ class GenieSimulator(LearnedSimulator):
             latent = latent.squeeze(0).to(torch.float32).to(self.device)
             return latent
     @torch.inference_mode()
     def _decode_image(self, latent: torch.Tensor) -> np.ndarray:
         # latent can be either quantized indices or raw latent
@@ -467,7 +468,7 @@ class GenieSimulator(LearnedSimulator):
         image = np.clip(image, 0, 255).astype(np.uint8)
         return image
     def reset(self) -> np.ndarray:
         # if ground truth physics simulator is provided,
         # return the the side-by-side concatenated image

 import skimage
 import time
+import spaces
 from genie.st_mask_git import STMaskGIT
 from genie.st_mar import STMAR
 from datasets.utils import get_image_encoder
     def set_initial_state(self, state: Tuple[np.ndarray, np.ndarray]):
         self.init_prompt = state
+    @spaces.GPU
     @torch.inference_mode()
     def step(self, action: np.ndarray) -> Dict:
         # action: (action_stride, A) OR (A,)
         return step_result
+    @spaces.GPU
     @torch.inference_mode()
     def _encode_image(self, image: np.ndarray) -> torch.Tensor:
         # (H, W, 3)
             latent = latent.squeeze(0).to(torch.float32).to(self.device)
             return latent
+    @spaces.GPU
     @torch.inference_mode()
     def _decode_image(self, latent: torch.Tensor) -> np.ndarray:
         # latent can be either quantized indices or raw latent
         image = np.clip(image, 0, 255).astype(np.uint8)
         return image
+    @spaces.GPU
     def reset(self) -> np.ndarray:
         # if ground truth physics simulator is provided,
         # return the the side-by-side concatenated image