Sombit commited on
Commit
303fe03
·
verified ·
1 Parent(s): c7f4f9e

Upload TrajectoryVLA

Browse files
Files changed (1) hide show
  1. config.json +32 -59
config.json CHANGED
@@ -1,64 +1,37 @@
1
  {
 
 
 
 
2
  "auto_map": {
3
- "AutoConfig": "prismatic_config.TrajectoryVLAConfig"
4
  },
5
- "cheat": false,
6
- "model_type": "trajectoryvla",
7
- "num_timesteps": 6,
8
- "prismatic_config": {
9
- "architectures": [
10
- "TrajectoryVLA"
11
- ],
12
- "auto_map": {
13
- "AutoModelForVision2Seq": "prismatic_model.TrajectoryVLA"
14
- },
15
- "model_type": "prismatic",
16
- "return_dict": false,
17
- "torch_dtype": "bfloat16"
 
 
18
  },
19
- "rotation_components": 9,
20
- "seperate_control_proj": true,
21
- "timestep_proj_config": {
22
- "num_tokens": 3,
23
- "pos_embed_scale": 8,
24
- "proj_layers": [
25
- 128,
26
- 512,
27
- 1024
28
- ],
29
- "time_delta_sec": 0.1
30
- },
31
- "token_proj_config": {
32
- "control_tokens_layers": [
33
- 4096,
34
- 2048,
35
- 1024
36
- ],
37
- "image_tokens_mode": "vit",
38
- "llm_image_tokens_layers": [],
39
- "vit_tokens_layers": [
40
- 2176,
41
- 1024
42
- ]
43
- },
44
- "token_size": 1024,
45
- "transformer_config": {
46
- "decoder_block_config": {
47
- "dropout": 0.0,
48
- "feature_size": 1024,
49
- "head_dim": 64,
50
- "num_heads": 16
51
- },
52
- "encoder_block_config": {
53
- "feature_size": 1024,
54
- "head_dim": 64,
55
- "num_heads": 16
56
- },
57
- "num_blocks": 2,
58
- "pos_embed_config": {
59
- "embedding_dim": 1024,
60
- "num_embeddings": 300
61
- }
62
- },
63
- "transformers_version": "4.44.2"
64
  }
 
1
  {
2
+ "arch_specifier": "no-align+gelu-mlp",
3
+ "architectures": [
4
+ "TrajectoryVLA"
5
+ ],
6
  "auto_map": {
7
+ "AutoModelForVision2Seq": "prismatic_model.TrajectoryVLA"
8
  },
9
+ "hf_llm_id": "meta-llama/Llama-2-7b-hf",
10
+ "image_resize_strategy": "letterbox",
11
+ "image_sizes": [
12
+ 224,
13
+ 224
14
+ ],
15
+ "llm_backbone_id": "llama2-7b-pure",
16
+ "llm_max_length": 2048,
17
+ "model_type": "prismatic",
18
+ "output_projector_states": false,
19
+ "pad_to_multiple_of": 64,
20
+ "pad_token_id": 32000,
21
+ "return_dict": false,
22
+ "text_config": {
23
+ "model_type": "llama"
24
  },
25
+ "timm_model_ids": [
26
+ "vit_large_patch14_reg4_dinov2.lvd142m",
27
+ "vit_so400m_patch14_siglip_224"
28
+ ],
29
+ "timm_override_act_layers": [
30
+ null,
31
+ null
32
+ ],
33
+ "torch_dtype": "bfloat16",
34
+ "transformers_version": "4.44.2",
35
+ "use_fused_vision_backbone": true,
36
+ "vision_backbone_id": "dinosiglip-vit-so-224px"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  }