Sombit commited on
Commit
ecec1c0
·
verified ·
1 Parent(s): 3b501b7

Upload TrajectoryVLA

Browse files
Files changed (2) hide show
  1. config.json +32 -59
  2. prismatic_model.py +12 -3
config.json CHANGED
@@ -1,64 +1,37 @@
1
  {
 
 
 
 
2
  "auto_map": {
3
- "AutoConfig": "prismatic_config.TrajectoryVLAConfig"
4
  },
5
- "cheat": false,
6
- "model_type": "trajectoryvla",
7
- "num_timesteps": 6,
8
- "prismatic_config": {
9
- "architectures": [
10
- "TrajectoryVLA"
11
- ],
12
- "auto_map": {
13
- "AutoModelForVision2Seq": "prismatic_model.TrajectoryVLA"
14
- },
15
- "model_type": "prismatic",
16
- "return_dict": false,
17
- "torch_dtype": "bfloat16"
 
 
18
  },
19
- "rotation_components": 9,
20
- "seperate_control_proj": true,
21
- "timestep_proj_config": {
22
- "num_tokens": 3,
23
- "pos_embed_scale": 8,
24
- "proj_layers": [
25
- 128,
26
- 512,
27
- 1024
28
- ],
29
- "time_delta_sec": 0.1
30
- },
31
- "token_proj_config": {
32
- "control_tokens_layers": [
33
- 4096,
34
- 2048,
35
- 1024
36
- ],
37
- "image_tokens_mode": "vit",
38
- "llm_image_tokens_layers": [],
39
- "vit_tokens_layers": [
40
- 2176,
41
- 1024
42
- ]
43
- },
44
- "token_size": 1024,
45
- "transformer_config": {
46
- "decoder_block_config": {
47
- "dropout": 0.0,
48
- "feature_size": 1024,
49
- "head_dim": 64,
50
- "num_heads": 16
51
- },
52
- "encoder_block_config": {
53
- "feature_size": 1024,
54
- "head_dim": 64,
55
- "num_heads": 16
56
- },
57
- "num_blocks": 2,
58
- "pos_embed_config": {
59
- "embedding_dim": 1024,
60
- "num_embeddings": 300
61
- }
62
- },
63
- "transformers_version": "4.44.2"
64
  }
 
1
  {
2
+ "arch_specifier": "no-align+gelu-mlp",
3
+ "architectures": [
4
+ "TrajectoryVLA"
5
+ ],
6
  "auto_map": {
7
+ "AutoModelForVision2Seq": "prismatic_model.TrajectoryVLA"
8
  },
9
+ "hf_llm_id": "meta-llama/Llama-2-7b-hf",
10
+ "image_resize_strategy": "letterbox",
11
+ "image_sizes": [
12
+ 224,
13
+ 224
14
+ ],
15
+ "llm_backbone_id": "llama2-7b-pure",
16
+ "llm_max_length": 2048,
17
+ "model_type": "prismatic",
18
+ "output_projector_states": false,
19
+ "pad_to_multiple_of": 64,
20
+ "pad_token_id": 32000,
21
+ "return_dict": false,
22
+ "text_config": {
23
+ "model_type": "llama"
24
  },
25
+ "timm_model_ids": [
26
+ "vit_large_patch14_reg4_dinov2.lvd142m",
27
+ "vit_so400m_patch14_siglip_224"
28
+ ],
29
+ "timm_override_act_layers": [
30
+ null,
31
+ null
32
+ ],
33
+ "torch_dtype": "bfloat16",
34
+ "transformers_version": "4.44.2",
35
+ "use_fused_vision_backbone": true,
36
+ "vision_backbone_id": "dinosiglip-vit-so-224px"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  }
prismatic_model.py CHANGED
@@ -26,7 +26,7 @@ import torch
26
  import torch.nn as nn
27
  import transformers
28
  from timm.models.vision_transformer import LayerScale
29
- from transformers import AutoModelForCausalLM, PretrainedConfig, PreTrainedModel
30
  from transformers.modeling_outputs import ModelOutput
31
  import collections
32
  import math
@@ -40,6 +40,8 @@ from PIL import Image
40
  from pathlib import Path
41
  from torch.amp.autocast_mode import autocast # Corrected import for latest PyTorch
42
  from scipy.spatial.transform import Rotation as R
 
 
43
  ht_token_path = Path(".hf_token")
44
  HF_TOKEN = ht_token_path.read_text().strip() if isinstance(ht_token_path, Path) else hf_token_path
45
 
@@ -256,7 +258,10 @@ class LLMBackbone(nn.Module):
256
 
257
  return tokenizer
258
 
 
259
  class PrismaticForConditionalGeneration(PrismaticPreTrainedModel):
 
 
260
  def __init__(self, config: PrismaticConfig) -> None:
261
  super().__init__(config)
262
  # [Validation] Lightweight Validate on `config` Fields + Dependency Versions
@@ -773,7 +778,7 @@ class TimestepProjModule(nn.Module):
773
 
774
 
775
  # class Waypointer(nn.Module):
776
-
777
  class TrajectoryVLA(PrismaticForConditionalGeneration):
778
 
779
 
@@ -781,6 +786,7 @@ class TrajectoryVLA(PrismaticForConditionalGeneration):
781
 
782
  def __init__(self, config: TrajectoryVLAConfig) -> None:
783
  super().__init__(config.prismatic_config)
 
784
  self.control_tokenizer = WaypointTokenizer(self.llm_backbone.tokenizer)
785
  self.timestep_proj = TimestepProjModule(
786
  config.timestep_proj_config,
@@ -1008,7 +1014,10 @@ def read_pt(pt_path):
1008
  # control_target = read_pt('/work/nikolay_nikolov/debug/inference/control_target.pt')
1009
 
1010
  if __name__ == "__main__":
1011
-
 
 
 
1012
  prismatic_config_dict = {
1013
  "vision_backbone_id":"dinosiglip-vit-so-224px",
1014
  "llm_backbone_id":"llama2-7b-pure",
 
26
  import torch.nn as nn
27
  import transformers
28
  from timm.models.vision_transformer import LayerScale
29
+ from transformers import AutoModelForCausalLM, PretrainedConfig, PreTrainedModel, AutoModel, AutoConfig
30
  from transformers.modeling_outputs import ModelOutput
31
  import collections
32
  import math
 
40
  from pathlib import Path
41
  from torch.amp.autocast_mode import autocast # Corrected import for latest PyTorch
42
  from scipy.spatial.transform import Rotation as R
43
+ # import automodel
44
+
45
  ht_token_path = Path(".hf_token")
46
  HF_TOKEN = ht_token_path.read_text().strip() if isinstance(ht_token_path, Path) else hf_token_path
47
 
 
258
 
259
  return tokenizer
260
 
261
+ # @AutoModel.register(PrismaticConfig)
262
  class PrismaticForConditionalGeneration(PrismaticPreTrainedModel):
263
+ # model_type: ClassVar[str] = "prismatic"
264
+ config_class: PretrainedConfig = PrismaticConfig
265
  def __init__(self, config: PrismaticConfig) -> None:
266
  super().__init__(config)
267
  # [Validation] Lightweight Validate on `config` Fields + Dependency Versions
 
778
 
779
 
780
  # class Waypointer(nn.Module):
781
+ # @AutoModel.register(TrajectoryVLAConfig)
782
  class TrajectoryVLA(PrismaticForConditionalGeneration):
783
 
784
 
 
786
 
787
  def __init__(self, config: TrajectoryVLAConfig) -> None:
788
  super().__init__(config.prismatic_config)
789
+
790
  self.control_tokenizer = WaypointTokenizer(self.llm_backbone.tokenizer)
791
  self.timestep_proj = TimestepProjModule(
792
  config.timestep_proj_config,
 
1014
  # control_target = read_pt('/work/nikolay_nikolov/debug/inference/control_target.pt')
1015
 
1016
  if __name__ == "__main__":
1017
+ AutoConfig.register("prismatic",PrismaticConfig)
1018
+ AutoConfig.register("trajectoryvla",TrajectoryVLAConfig)
1019
+ AutoModel.register('prismatic',PrismaticForConditionalGeneration)
1020
+ AutoModel.register('trajectoryvla',TrajectoryVLA)
1021
  prismatic_config_dict = {
1022
  "vision_backbone_id":"dinosiglip-vit-so-224px",
1023
  "llm_backbone_id":"llama2-7b-pure",