File size: 884 Bytes
4159782 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
experiment:
seed: 42
name: debug
group: debug
dry_run: true
offline_run: false
evaluate_segmentation: false
evaluate_babyslm: false
blimp_tasks: null
resume_checkpoint_path: null
resume_run_id: null
dataset:
name: phonemetransformers/CHILDES
subconfig: English
text_column: phonemized_utterance
is_phonemes: true
max_age: null
remove_child_utterances: true
valid_size: 10000
tokenizer:
name: phonemetransformers/CHILDES-English-phoneme-tokenizer
data_preprocessing:
max_input_length: 64
join_utts: dynamic
remove_word_boundaries: true
subsample: null
subsample_type: examples
model:
name: gpt2_lm
model_kwargs:
n_layer: 2
n_head: 4
n_embd: 128
n_positions: 256
n_inner: 512
trainer:
batch_size: 32
lr: 0.001
num_warmup_steps: 10
max_training_steps: 100
logging_steps: 1
save_steps: 50
eval_steps: 50
|