{ | |
"activation_function": "gelu", | |
"architectures": [ | |
"DeepShallowModel" | |
], | |
"attention_heads": 8, | |
"decoder_layers": 1, | |
"dropout": 0.1, | |
"emb_size": 512, | |
"encoder_layers": 12, | |
"ffn_hid_dim": 2048, | |
"max_position_embeddings": 64, | |
"model_type": "transformer", | |
"src_vocab_size": 10000, | |
"tgt_vocab_size": 10000, | |
"torch_dtype": "float32", | |
"transformers_version": "4.17.0" | |
} | |