{ | |
"model": { | |
"vocab_size": 8192, | |
"context_length": 128, | |
"d_embedding": 384, | |
"d_intermediate": 1536, | |
"n_heads": 12, | |
"n_layers": 12, | |
"qkv_bias": false | |
}, | |
"train": { | |
"peak_lr": 0.001, | |
"warmup_ratio": 0.01, | |
"n_epochs": 2, | |
"batch_size": 8, | |
"weight_decay": 0.1 | |
} | |
} |