{ | |
"d_model": 8, | |
"d_head": 8, | |
"n_layers": 2, | |
"n_ctx": 32, | |
"n_heads": 1, | |
"d_vocab": 10, | |
"normalization_type": "LN", | |
"attn_only": true, | |
"positional_embedding_type": "shortformer", | |
"seed": null | |
} |
{ | |
"d_model": 8, | |
"d_head": 8, | |
"n_layers": 2, | |
"n_ctx": 32, | |
"n_heads": 1, | |
"d_vocab": 10, | |
"normalization_type": "LN", | |
"attn_only": true, | |
"positional_embedding_type": "shortformer", | |
"seed": null | |
} |