Spaces:

liruiw
/

hma

Running on Zero

App Files Files Community

liruiw commited on Dec 8, 2024

Commit

4c4632b

1 Parent(s): 8eeb719

fix

Browse files

Files changed (1) hide show

sim/simulator.py +32 -32

sim/simulator.py CHANGED Viewed

@@ -27,13 +27,13 @@ class Simulator:
     @torch.inference_mode()
     def step(self, action):
         raise NotImplementedError
     def reset(self):
         raise NotImplementedError
     def close(self):
         raise NotImplementedError
     @property
     def dt(self):
         raise NotImplementedError
@@ -46,16 +46,16 @@ class PhysicsSimulator(Simulator):
     # physics engine should be able to update dt
     def set_dt(self, dt):
         raise NotImplementedError
     # physics engine should be able to get scene state
     # e.g., robot joint positions, object positions, etc.
     def get_raw_state(self, port: Optional[str] = None):
         raise NotImplementedError
     @property
     def action_dimension(self):
         raise NotImplementedError
 class LearnedSimulator(Simulator):
     def __init__(self):
@@ -65,9 +65,9 @@ class LearnedSimulator(Simulator):
 # data replayed respect physics, so we inherit from PhysicsSimulator
 # it can be considered as a special case of PhysicsSimulator
 class ReplaySimulator(PhysicsSimulator):
-    def __init__(self,
-        frames,
-        prompt_horizon: int = 0,
         dt: Optional[float] = None
     ):
         super().__init__()
@@ -76,10 +76,10 @@ class ReplaySimulator(PhysicsSimulator):
         assert self.frame_idx < len(self.frames)
         self._dt = dt
         self.prompt_horizon = prompt_horizon
     def __len__(self):
         return len(self.frames) - self.prompt_horizon
     def step(self, action):
         frame = self.frames[self.frame_idx]
         assert self.frame_idx < len(self.frames)
@@ -87,20 +87,20 @@ class ReplaySimulator(PhysicsSimulator):
         return {
             'pred_next_frame': frame
         }
     def reset(self):    # return current frame = last frame of prompt
         self.frame_idx = self.prompt_horizon
         return self.prompt()[-1]
     def prompt(self):
         return self.frames[:self.prompt_horizon]
     @property
     def dt(self):
         return self._dt
 class GenieSimulator(LearnedSimulator):
@@ -164,7 +164,7 @@ class GenieSimulator(LearnedSimulator):
             elif backbone_type == "stmar":
                 inference_iterations = 2
-        # misc
         self.device = torch.device(device)
         self.measure_step_time = measure_step_time
         self.compute_psnr = compute_psnr
@@ -200,11 +200,11 @@ class GenieSimulator(LearnedSimulator):
         else:
             self.backbone = STMAR.from_pretrained(backbone_ckpt)
         self.backbone = self.backbone.to(device=self.device).eval()
         self.post_processor = post_processor
         # load physics simulator if available
-        # the phys sim to get ground truth image,
         # assume the phys sim has aligned prompt frames
         self.gt_phys_sim = physics_simulator
         self.gt_teacher_force = physics_simulator_teacher_force
@@ -237,7 +237,7 @@ class GenieSimulator(LearnedSimulator):
         # return: (H, W, 3)
         assert self.cached_latent_frames is not None and self.cached_actions is not None, \
             "Model is not prompted yet. Please call `set_initial_state` first."
         if action.ndim == 1:
             action = np.tile(action, (self.action_stride, 1))
@@ -273,7 +273,7 @@ class GenieSimulator(LearnedSimulator):
             start_time = time.time()
         pred_next_latent_state = self.backbone.maskgit_generate(
             input_latent_states,
-            out_t=self.prompt_horizon,
             maskgit_steps=self.inference_iterations,
             temperature=self.sampling_temperature,
             action_ids=input_actions,
@@ -310,7 +310,7 @@ class GenieSimulator(LearnedSimulator):
             # compute PSNR against ground truth
             if self.compute_psnr:
                 psnr = skimage.metrics.peak_signal_noise_ratio(
-                    image_true=gt_next_frame / 255.,
                     image_test=pred_next_frame / 255.,
                     data_range=1.0
                 )
@@ -348,7 +348,7 @@ class GenieSimulator(LearnedSimulator):
             if self.gt_teacher_force is not None and self.step_count % self.gt_teacher_force == 0:
                 pred_next_latent_state = self._encode_image(gt_next_frame)
         # update history buffer
         self.cached_latent_frames = torch.cat([
             self.cached_latent_frames[1:], pred_next_latent_state.unsqueeze(0)
@@ -356,7 +356,7 @@ class GenieSimulator(LearnedSimulator):
         self.cached_actions = torch.cat([
             self.cached_actions[1:], action.unsqueeze(0)
         ])
         # post processing
         if self.post_processor is not None:
             pred_next_frame = self.post_processor(pred_next_frame, action)
@@ -364,7 +364,7 @@ class GenieSimulator(LearnedSimulator):
         self.step_count += 1
         return step_result
     @torch.inference_mode()
     def _encode_image(self, image: np.ndarray) -> torch.Tensor:
@@ -422,11 +422,11 @@ class GenieSimulator(LearnedSimulator):
         decoded_image = decoded_image.squeeze(0).to(torch.float32).detach().cpu().numpy()
         decoded_image = self._unnormalize_image(decoded_image).transpose(1, 2, 0)
         return decoded_image
     def _normalize_image(self, image: np.ndarray) -> np.ndarray:
         # (H, W, 3) normalized to [-1, 1]
-        # if `resize`, resize the shorter side to `resized_res`
         #   and then do a center crop
         image = np.asarray(image, dtype=np.float32)
@@ -435,7 +435,7 @@ class GenieSimulator(LearnedSimulator):
         # resize if asked
         if self.resize_image:
-            resized_res = self.resize_image_resolution
             if H < W:
                 Hnew, Wnew = resized_res, int(resized_res * W / H)
             else:
@@ -469,7 +469,7 @@ class GenieSimulator(LearnedSimulator):
     def reset(self) -> np.ndarray:
-        # if ground truth physics simulator is provided,
         # return the the side-by-side concatenated image
         # get the initial prompt from the physics simulator if not yet set
@@ -480,7 +480,7 @@ class GenieSimulator(LearnedSimulator):
             action_prompt = np.zeros(
                 (self.prompt_horizon, self.action_stride, self.gt_phys_sim.action_dimension)
             ).astype(np.float32)
-        else:
             assert self.init_prompt is not None, "Initial state is not set."
             image_prompt, action_prompt = self.init_prompt
@@ -498,7 +498,7 @@ class GenieSimulator(LearnedSimulator):
         ], axis=0)
         if self.resize_image:
-            current_image = cv2.resize(current_image,
                 (self.resize_image_resolution, self.resize_image_resolution))
         if self.gt_phys_sim is not None:

     @torch.inference_mode()
     def step(self, action):
         raise NotImplementedError
     def reset(self):
         raise NotImplementedError
     def close(self):
         raise NotImplementedError
     @property
     def dt(self):
         raise NotImplementedError
     # physics engine should be able to update dt
     def set_dt(self, dt):
         raise NotImplementedError
     # physics engine should be able to get scene state
     # e.g., robot joint positions, object positions, etc.
     def get_raw_state(self, port: Optional[str] = None):
         raise NotImplementedError
     @property
     def action_dimension(self):
         raise NotImplementedError
 class LearnedSimulator(Simulator):
     def __init__(self):
 # data replayed respect physics, so we inherit from PhysicsSimulator
 # it can be considered as a special case of PhysicsSimulator
 class ReplaySimulator(PhysicsSimulator):
+    def __init__(self,
+        frames,
+        prompt_horizon: int = 0,
         dt: Optional[float] = None
     ):
         super().__init__()
         assert self.frame_idx < len(self.frames)
         self._dt = dt
         self.prompt_horizon = prompt_horizon
     def __len__(self):
         return len(self.frames) - self.prompt_horizon
     def step(self, action):
         frame = self.frames[self.frame_idx]
         assert self.frame_idx < len(self.frames)
         return {
             'pred_next_frame': frame
         }
     def reset(self):    # return current frame = last frame of prompt
         self.frame_idx = self.prompt_horizon
         return self.prompt()[-1]
     def prompt(self):
         return self.frames[:self.prompt_horizon]
     @property
     def dt(self):
         return self._dt
 class GenieSimulator(LearnedSimulator):
             elif backbone_type == "stmar":
                 inference_iterations = 2
+        # misc
         self.device = torch.device(device)
         self.measure_step_time = measure_step_time
         self.compute_psnr = compute_psnr
         else:
             self.backbone = STMAR.from_pretrained(backbone_ckpt)
         self.backbone = self.backbone.to(device=self.device).eval()
         self.post_processor = post_processor
         # load physics simulator if available
+        # the phys sim to get ground truth image,
         # assume the phys sim has aligned prompt frames
         self.gt_phys_sim = physics_simulator
         self.gt_teacher_force = physics_simulator_teacher_force
         # return: (H, W, 3)
         assert self.cached_latent_frames is not None and self.cached_actions is not None, \
             "Model is not prompted yet. Please call `set_initial_state` first."
         if action.ndim == 1:
             action = np.tile(action, (self.action_stride, 1))
             start_time = time.time()
         pred_next_latent_state = self.backbone.maskgit_generate(
             input_latent_states,
+            out_t=input_latent_states.shape[1] - 1,,
             maskgit_steps=self.inference_iterations,
             temperature=self.sampling_temperature,
             action_ids=input_actions,
             # compute PSNR against ground truth
             if self.compute_psnr:
                 psnr = skimage.metrics.peak_signal_noise_ratio(
+                    image_true=gt_next_frame / 255.,
                     image_test=pred_next_frame / 255.,
                     data_range=1.0
                 )
             if self.gt_teacher_force is not None and self.step_count % self.gt_teacher_force == 0:
                 pred_next_latent_state = self._encode_image(gt_next_frame)
         # update history buffer
         self.cached_latent_frames = torch.cat([
             self.cached_latent_frames[1:], pred_next_latent_state.unsqueeze(0)
         self.cached_actions = torch.cat([
             self.cached_actions[1:], action.unsqueeze(0)
         ])
         # post processing
         if self.post_processor is not None:
             pred_next_frame = self.post_processor(pred_next_frame, action)
         self.step_count += 1
         return step_result
     @torch.inference_mode()
     def _encode_image(self, image: np.ndarray) -> torch.Tensor:
         decoded_image = decoded_image.squeeze(0).to(torch.float32).detach().cpu().numpy()
         decoded_image = self._unnormalize_image(decoded_image).transpose(1, 2, 0)
         return decoded_image
     def _normalize_image(self, image: np.ndarray) -> np.ndarray:
         # (H, W, 3) normalized to [-1, 1]
+        # if `resize`, resize the shorter side to `resized_res`
         #   and then do a center crop
         image = np.asarray(image, dtype=np.float32)
         # resize if asked
         if self.resize_image:
+            resized_res = self.resize_image_resolution
             if H < W:
                 Hnew, Wnew = resized_res, int(resized_res * W / H)
             else:
     def reset(self) -> np.ndarray:
+        # if ground truth physics simulator is provided,
         # return the the side-by-side concatenated image
         # get the initial prompt from the physics simulator if not yet set
             action_prompt = np.zeros(
                 (self.prompt_horizon, self.action_stride, self.gt_phys_sim.action_dimension)
             ).astype(np.float32)
+        else:
             assert self.init_prompt is not None, "Initial state is not set."
             image_prompt, action_prompt = self.init_prompt
         ], axis=0)
         if self.resize_image:
+            current_image = cv2.resize(current_image,
                 (self.resize_image_resolution, self.resize_image_resolution))
         if self.gt_phys_sim is not None: