liho00 commited on
Commit
dece740
1 Parent(s): 7ffe306

Upload training_config.yml with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_config.yml +9 -10
training_config.yml CHANGED
@@ -11,10 +11,10 @@ model:
11
  use_clip: false
12
  tokenizer:
13
  _component_: models.a2a_tokenizer
14
- path: checkpoints/Meta-Llama-3-8B-Instruct/original/tokenizer.model
15
  checkpointer:
16
  _component_: torchtune.utils.FullModelMetaCheckpointer
17
- checkpoint_dir: checkpoints/Meta-Llama-3-8B-Instruct/original
18
  checkpoint_files:
19
  - consolidated.00.pth
20
  adapter_checkpoint: null
@@ -25,7 +25,7 @@ resume_from_checkpoint: false
25
  interim_checkpoint_steps: 5000
26
  interim_gen_steps: null
27
  max_new_tokens: 100
28
- temperature: 0.6
29
  top_k: 300
30
  dataset:
31
  _component_: ds.EvenBatcher
@@ -45,21 +45,21 @@ dataset:
45
  train_on_input: false
46
  seed: null
47
  shuffle: true
48
- batch_size: 4
49
  optimizer:
50
  _component_: torch.optim.AdamW
51
  weight_decay: 0.01
52
- lr: 0.0003
53
  lr_scheduler:
54
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
55
  num_warmup_steps: 100
56
  loss:
57
  _component_: torch.nn.CrossEntropyLoss
58
- epochs: 10
59
  max_steps_per_epoch: null
60
- gradient_accumulation_steps: 64
61
  compile: false
62
- output_dir: /tmp/lora_finetune_output
63
  metric_logger:
64
  _component_: torchtune.utils.metric_logging.DiskLogger
65
  log_dir: ${output_dir}
@@ -77,7 +77,6 @@ inference:
77
 
78
  Caption the previous video.'
79
  max_new_tokens: 300
80
- temperature: 0.6
81
  top_k: 300
82
  quantizer: null
83
- gradient-accumulation-steps: 32
 
11
  use_clip: false
12
  tokenizer:
13
  _component_: models.a2a_tokenizer
14
+ path: checkpoints/Meta-Llama-3-8B-Instruct/tokenizer.model
15
  checkpointer:
16
  _component_: torchtune.utils.FullModelMetaCheckpointer
17
+ checkpoint_dir: checkpoints/Meta-Llama-3-8B-Instruct/
18
  checkpoint_files:
19
  - consolidated.00.pth
20
  adapter_checkpoint: null
 
25
  interim_checkpoint_steps: 5000
26
  interim_gen_steps: null
27
  max_new_tokens: 100
28
+ temperature: 0.8
29
  top_k: 300
30
  dataset:
31
  _component_: ds.EvenBatcher
 
45
  train_on_input: false
46
  seed: null
47
  shuffle: true
48
+ batch_size: 2
49
  optimizer:
50
  _component_: torch.optim.AdamW
51
  weight_decay: 0.01
52
+ lr: 3.0e-06
53
  lr_scheduler:
54
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
55
  num_warmup_steps: 100
56
  loss:
57
  _component_: torch.nn.CrossEntropyLoss
58
+ epochs: 1
59
  max_steps_per_epoch: null
60
+ gradient_accumulation_steps: 32
61
  compile: false
62
+ output_dir: /home/user/omegalabs-anytoany-bittensor/modelss
63
  metric_logger:
64
  _component_: torchtune.utils.metric_logging.DiskLogger
65
  log_dir: ${output_dir}
 
77
 
78
  Caption the previous video.'
79
  max_new_tokens: 300
80
+ temperature: 0.8
81
  top_k: 300
82
  quantizer: null