Spaces:

liruiw
/

hma

Runtime error

liruiw commited on Dec 8, 2024

Commit

8eeb719

1 Parent(s): 13dce27

improve pred

Files changed (2) hide show

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ genie = GenieSimulator(
     quantize=False,
     backbone_type='stmar',
     backbone_ckpt='data/mar_ckpt/langtable',
-    prompt_horizon=5,
     action_stride=1,
     domain='language_table',
 )

     quantize=False,
     backbone_type='stmar',
     backbone_ckpt='data/mar_ckpt/langtable',
+    prompt_horizon=2,
     action_stride=1,
     domain='language_table',
 )

sim/simulator.py CHANGED Viewed

@@ -248,22 +248,26 @@ class GenieSimulator(LearnedSimulator):
         # encoding
         input_latent_states = torch.cat([
             self.cached_latent_frames,
-            torch.zeros_like(self.cached_latent_frames[-1:]),
         ]).unsqueeze(0).to(torch.float32)
         # dtype conversion and mask token
         if self.backbone_type == "stmaskgit":
             input_latent_states = input_latent_states.long()
-            input_latent_states[:, self.prompt_horizon] = self.backbone.mask_token_id
         elif self.backbone_type == "stmar":
-            input_latent_states[:, self.prompt_horizon] = self.backbone.mask_token
         # dynamics rollout
         action = torch.from_numpy(action).to(device=self.device)
         input_actions = torch.cat([     # (1, prompt_horizon + 1, action_stride * A)
             self.cached_actions,
-            action.unsqueeze(0)
-        ]).view(1, self.prompt_horizon + 1, -1).to(torch.float32)
         if self.measure_step_time:
             start_time = time.time()

         # encoding
         input_latent_states = torch.cat([
             self.cached_latent_frames,
+            torch.zeros_like(self.cached_latent_frames[[0]]),
         ]).unsqueeze(0).to(torch.float32)
+        input_latent_states = input_latent_states[:, :self.prompt_horizon + 1]
         # dtype conversion and mask token
         if self.backbone_type == "stmaskgit":
             input_latent_states = input_latent_states.long()
+            input_latent_states[:, -1] = self.backbone.mask_token_id
         elif self.backbone_type == "stmar":
+            input_latent_states[:, -1] = self.backbone.mask_token
         # dynamics rollout
         action = torch.from_numpy(action).to(device=self.device)
         input_actions = torch.cat([     # (1, prompt_horizon + 1, action_stride * A)
             self.cached_actions,
+            action.unsqueeze(0),
+            action.unsqueeze(0) # the last action is not used, but we need a_{t-1}, s_{t-1} to predict s_t
+        ]).view(1, -1, action.shape[-1]).to(torch.float32) #  + 1
+        input_actions = input_actions[:, :self.prompt_horizon + 1]
         if self.measure_step_time:
             start_time = time.time()