experiment: | |
seed: 42 | |
name: debug | |
group: debug | |
dry_run: true | |
offline_run: false | |
evaluate_segmentation: false | |
evaluate_babyslm: false | |
blimp_tasks: null | |
resume_checkpoint_path: null | |
resume_run_id: null | |
dataset: | |
name: phonemetransformers/CHILDES | |
subconfig: English | |
text_column: phonemized_utterance | |
is_phonemes: true | |
max_age: null | |
remove_child_utterances: true | |
valid_size: 10000 | |
tokenizer: | |
name: phonemetransformers/CHILDES-English-phoneme-tokenizer | |
data_preprocessing: | |
max_input_length: 64 | |
join_utts: dynamic | |
remove_word_boundaries: true | |
subsample: null | |
subsample_type: examples | |
model: | |
name: gpt2_lm | |
model_kwargs: | |
n_layer: 2 | |
n_head: 4 | |
n_embd: 128 | |
n_positions: 256 | |
n_inner: 512 | |
trainer: | |
batch_size: 32 | |
lr: 0.001 | |
num_warmup_steps: 10 | |
max_training_steps: 100 | |
logging_steps: 1 | |
save_steps: 50 | |
eval_steps: 50 | |