gpt_model: n_layer: 23 n_single_layer: 1 rope_theta: 10000 n_head: 12 n_embd: 1536 bias: true eps: 1.e-6 shape_model_vocab_size: 16384 text_model_embed_dim: 768 use_pooled_text_embed: False shape_model_embed_dim: 32 encoder_with_cls_token: true shape_model: encoder_with_cls_token: true num_encoder_latents: 512 num_decoder_latents: 0 embed_dim: 32 width: 768 num_heads: 12 out_dim: 1 eps: 1.e-6 num_freqs: 128 point_feats: 3 embed_point_feats: false num_encoder_layers: 13 encoder_cross_attention_levels: [0, 2, 4, 8] num_decoder_layers: 24 num_codes: 16384 text_model_pretrained_model_name_or_path: "openai/clip-vit-large-patch14"