{"batch_size": 16, "buffer_size": 40000, "shuffle_seed": 32, "vocab_file": "wiki_en_vocab", "min_seq_len": false, "ckpt_interval": 2000, "val_steps": 1000, "train_size": 95, "vocab_size": 50257, "seq_len": 512, "learning_rate": 0.001, "beta_1": 0.9, "beta_2": 0.95, "decay_lr": false, "decay_steps": 400000, "alpha": 0.1, "emb_dim": 512, "heads": 8, "mlp_dim": 512, "depth": 10, "dropout": 0.0, "initializer": "glorot_uniform", "embedding_initializer": "glorot_uniform", "eps": 1e-06, "mlp_activation": "gelu"}