codebyzeb commited on
Commit
f71783d
·
verified ·
1 Parent(s): d3aacf0

Delete llm/fw57M-tied/hparams.yaml

Browse files
Files changed (1) hide show
  1. llm/fw57M-tied/hparams.yaml +0 -84
llm/fw57M-tied/hparams.yaml DELETED
@@ -1,84 +0,0 @@
1
- loggers:
2
- tensorboard:
3
- _target_: src.trainer.TensorBoardLogger
4
- save_dir: ./
5
- name: ''
6
- version: null
7
- callbacks:
8
- lr_monitor:
9
- _target_: src.callbacks.lr_monitor.SimpleLearningRateMonitor
10
- grad_norm:
11
- _target_: src.callbacks.grad_norm.GradNorm
12
- norm_type: 2
13
- group_separator: /
14
- histogram_freq: null
15
- check_clipping: false
16
- log_weight_distribution: false
17
- only_total: true
18
- speed_monitor:
19
- _target_: src.callbacks.speed_monitor.SpeedMonitor
20
- grad_accum:
21
- _target_: src.callbacks.gradient_accumulation.GradientAccumulationScheduler
22
- scheduling:
23
- 0: 4
24
- model_checkpoint:
25
- _target_: src.callbacks.model_checkpoint.ModelCheckpoint
26
- dirpath: .checkpoints
27
- filename: '{step}'
28
- enable_version_counter: false
29
- every_n_train_steps: 2000
30
- save_top_k: -1
31
- save_last: link
32
- verbose: true
33
- save_initial_checkpoint: true
34
- out_parent_folder: model_train
35
- tok_name: bytelevel
36
- run_folder: .
37
- dataset: finewebedu-20B
38
- pwd: /home/zg258/projects/infotokenization
39
- train_data_path: /home/zg258/projects/infotokenization/data/finewebedu-20B/bytelevel-subset/train
40
- val_data_path: /home/zg258/projects/infotokenization/data/finewebedu-20B/bytelevel-subset/validation
41
- model: fw57M-tied
42
- resume_from_checkpoint: .checkpoints/last.ckpt
43
- save_initial_checkpoint: true
44
- seed: 42
45
- torch_compile: true
46
- data:
47
- batch_size: 32
48
- eval_batch_size: 128
49
- shuffle: true
50
- drop_last: false
51
- num_workers: 12
52
- pin_memory: true
53
- persistent_workers: false
54
- prefetch_factor: 2
55
- multiprocessing_context: null
56
- optim:
57
- optim_name: adamw
58
- lr: 0.0006
59
- weight_decay: 0.01
60
- optim_kwargs:
61
- fused: true
62
- eps: 1.0e-08
63
- betas:
64
- - 0.9
65
- - 0.95
66
- scheduler_name: warmup_stable_decay
67
- num_warmup_steps: 2000
68
- scheduler_kwargs:
69
- num_stable_steps: 44000
70
- num_decay_steps: 4000
71
- min_lr_ratio: 0.01
72
- trainer:
73
- accelerator: gpu
74
- devices: 1
75
- precision: bf16-true
76
- deterministic: false
77
- log_every_n_steps: 1
78
- enable_progress_bar: true
79
- fast_dev_run: false
80
- gradient_clip_val: 1.0
81
- gradient_clip_algorithm: norm
82
- val_check_interval: 2000
83
- max_steps: 50000
84
- limit_val_batches: 500