test / test_training /config.yaml
tanglumy
initial commit of model weights
1a6c70c
batch_size: 2
controller_dropout: 0.1
controller_layers: 2
controller_lr: 0.0001
dataset_name: gsm8k
epochs: 1
eval_baseline: true
eval_interval: 1
eval_samples: 5
hidden_size: 2560
lambda_accuracy: 1.0
lambda_flops: 0.005
log_interval: 10
max_ponder_steps: 3
model_name: microsoft/phi-2
output_dir: outputs/test_training
save_interval: 1
threshold: 0.3
train_samples: 10
warmup_steps: 100