TinyNews-28M / config.json
GregSamek's picture
Upload config.json
a3e057c verified
raw
history blame contribute delete
375 Bytes
{
"model": {
"vocab_size": 8192,
"context_length": 128,
"d_embedding": 384,
"d_intermediate": 1536,
"n_heads": 12,
"n_layers": 12,
"qkv_bias": false
},
"train": {
"peak_lr": 0.001,
"warmup_ratio": 0.01,
"n_epochs": 2,
"batch_size": 8,
"weight_decay": 0.1
}
}