batch_size: 2 | |
controller_dropout: 0.1 | |
controller_layers: 2 | |
controller_lr: 0.0001 | |
dataset_name: gsm8k | |
epochs: 1 | |
eval_baseline: true | |
eval_interval: 1 | |
eval_samples: 5 | |
hidden_size: 2560 | |
lambda_accuracy: 1.0 | |
lambda_flops: 0.005 | |
log_interval: 10 | |
max_ponder_steps: 3 | |
model_name: microsoft/phi-2 | |
output_dir: outputs/test_training | |
save_interval: 1 | |
threshold: 0.3 | |
train_samples: 10 | |
warmup_steps: 100 | |