dm16 / model.yaml
jqhoogland's picture
Upload final model (step 75000) and all checkpoints at 2024-10-18T07:05:54.447826
b3912d8 verified
!!python/object:aether.model.model.TransformerLensModelArguments
implementation: transformer_lens
model_name: default
n_layers: 2
model_seed: 0
d_model: 16
n_ctx: 1024
d_head: 32
n_heads: 8
act_fn: gelu
d_vocab: 5000
use_local_attn: false
tokenizer_name: georgeyw/TinyStories-tokenizer-5k
window_size: null
attn_types: null
attn_only: true
positional_embedding_type: shortformer