File size: 1,101 Bytes
2f30e1d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import time
#out_dir = 'out-owt-gpt2mini'
out_dir = 'out-funcom_raw_scratch'
eval_interval = 1000
eval_iters = 40
wandb_log = True # feel free to turn on
wandb_project = 'fundats_srcml'
wandb_run_name = 'ft-gpt2-srcml-1' #+ str(time.time())
dataset = 'fundats_srcml'
init_from = 'scratch'
#init_from = 'gpt2-large'
# only save checkpoints if the validation loss improves
always_save_checkpoint = True
#n_layer = 6
#n_head = 6
#n_embd = 384
#dropout = 0.2
block_size = 1024
# gpt2-large
#n_layer = 36
#n_head = 20
#n_embd = 1280
#dropout = 0.2
# gpt2-medium
n_layer = 24
n_head = 16
n_embd = 1024
dropout = 0.2
# the number of examples per iter:
# 1 batch_size * 32 grad_accum * 1024 tokens = 32,768 tokens/iter
# shakespeare has 301,966 tokens, so 1 epoch ~= 9.2 iters
# stackoverflow has 10,495,518,108 tokens
# openwebtext has 9,035,582,489 tokens
# funcom_raw has 8,752,695,577 tokens
# fundats_srcml has 48,774,749,459 tokens
batch_size = 4
gradient_accumulation_steps = 4
max_iters = 372122 * 10
# finetune at constant LR
learning_rate = 3e-5
decay_lr = False
#weight_decay = 1e-1
|