type: transformer dim: 64 n_layers: 4 n_heads: 4 fc_scale: 2 context_length: 2048 vocab_size: 178