|
|
|
import torch |
|
|
|
MODEL_CONFIG = { |
|
'VOCAB_SIZE': 50000, |
|
'D_MODEL': 1024, |
|
'N_HEADS': 32, |
|
'D_FF': 4096, |
|
'N_LAYERS': 32, |
|
'MAX_SEQ_LEN': 512, |
|
'BATCH_SIZE': 32, |
|
'LEARNING_RATE': 1e-4, |
|
'NUM_EPOCHS': 20, |
|
'DEVICE': 'cuda' if torch.cuda.is_available() else 'cpu', |
|
'WARMUP_STEPS': 4000, |
|
'ADAM_EPSILON': 1e-8, |
|
'WEIGHT_DECAY': 0.01, |
|
'GRADIENT_ACCUMULATION_STEPS': 2, |
|
'MAX_GRAD_NORM': 1.0, |
|
'DROPOUT': 0.1, |
|
} |
|
|
|
TRAINING_CONFIG = { |
|
'CHECKPOINT_SAVE_STEPS': 5000, |
|
'LOGGING_STEPS': 100, |
|
'EVAL_STEPS': 1000, |
|
'SAVE_TOTAL_LIMIT': 5 |
|
} |
|
|