HaileyStorm commited on
Commit
294f54e
·
verified ·
1 Parent(s): 3221815

Upload 2 files

Browse files
Files changed (2) hide show
  1. scripts/full.yaml +11 -11
  2. scripts/test_generation.py +1 -1
scripts/full.yaml CHANGED
@@ -1,7 +1,7 @@
1
  # Tokenizer
2
  tokenizer:
3
  _component_: torchtune.models.llama3.llama3_tokenizer
4
- path: ../../slice_with_mergekit/merged/original/tokenizer.model
5
 
6
  # Dataset and Sampler
7
  dataset:
@@ -11,7 +11,7 @@ dataset:
11
  # _component_: torchtune.datasets.stack_exchanged_paired_dataset
12
  seed: 42
13
  shuffle: True
14
- batch_size: 1
15
 
16
  # Model Arguments
17
  model:
@@ -29,25 +29,25 @@ model:
29
 
30
  checkpointer:
31
  _component_: torchtune.utils.FullModelHFCheckpointer
32
- checkpoint_dir: ../../slice_with_mergekit/merged/
33
  checkpoint_files: [
34
  model-00001-of-00003.safetensors,
35
  model-00002-of-00003.safetensors,
36
  model-00003-of-00003.safetensors
37
  ]
38
  recipe_checkpoint: null
39
- output_dir: /media/hailey/More/AI/PruneMe/train/torchtune/llama3-5b/
40
  model_type: LLAMA3
41
  resume_from_checkpoint: False
42
 
43
  # Fine-tuning arguments
44
- epochs: 1
45
  optimizer:
46
  _component_: torch.optim.AdamW #bitsandbytes.optim.PagedAdamW8bit
47
- lr: 5e-6
48
  lr_scheduler:
49
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
50
- num_warmup_steps: 1000
51
  #loss:
52
  # _component_: torchtune.modules.loss.DPOLoss
53
  # beta: 0.1
@@ -57,8 +57,8 @@ loss:
57
  _component_: torch.nn.CrossEntropyLoss
58
 
59
  max_steps_per_epoch: null
60
- gradient_accumulation_steps: 1
61
- optimizer_in_bwd: True # False if grad accum > 1
62
  compile: False
63
 
64
  # Training environment
@@ -68,7 +68,7 @@ device: cuda
68
  enable_activation_checkpointing: True
69
 
70
  # Reduced precision
71
- dtype: fp32
72
 
73
  # Logging
74
  # enable logging to the built-in WandBLogger
@@ -76,6 +76,6 @@ metric_logger:
76
  _component_: torchtune.utils.metric_logging.WandBLogger
77
  # the W&B project to log to
78
  project: llama3-5b
79
- output_dir: /media/hailey/More/AI/PruneMe/train/torchtune/llama3-5b-dpo/
80
  log_every_n_steps: 1
81
  log_peak_memory_stats: False
 
1
  # Tokenizer
2
  tokenizer:
3
  _component_: torchtune.models.llama3.llama3_tokenizer
4
+ path: ../original/tokenizer.model
5
 
6
  # Dataset and Sampler
7
  dataset:
 
11
  # _component_: torchtune.datasets.stack_exchanged_paired_dataset
12
  seed: 42
13
  shuffle: True
14
+ batch_size: 2
15
 
16
  # Model Arguments
17
  model:
 
29
 
30
  checkpointer:
31
  _component_: torchtune.utils.FullModelHFCheckpointer
32
+ checkpoint_dir: ../merged/
33
  checkpoint_files: [
34
  model-00001-of-00003.safetensors,
35
  model-00002-of-00003.safetensors,
36
  model-00003-of-00003.safetensors
37
  ]
38
  recipe_checkpoint: null
39
+ output_dir: ./llama3-5b/
40
  model_type: LLAMA3
41
  resume_from_checkpoint: False
42
 
43
  # Fine-tuning arguments
44
+ epochs: 5
45
  optimizer:
46
  _component_: torch.optim.AdamW #bitsandbytes.optim.PagedAdamW8bit
47
+ lr: 3e-6
48
  lr_scheduler:
49
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
50
+ num_warmup_steps: 1500
51
  #loss:
52
  # _component_: torchtune.modules.loss.DPOLoss
53
  # beta: 0.1
 
57
  _component_: torch.nn.CrossEntropyLoss
58
 
59
  max_steps_per_epoch: null
60
+ gradient_accumulation_steps: 2
61
+ optimizer_in_bwd: False # False if grad accum > 1
62
  compile: False
63
 
64
  # Training environment
 
68
  enable_activation_checkpointing: True
69
 
70
  # Reduced precision
71
+ dtype: bf16 #fp32
72
 
73
  # Logging
74
  # enable logging to the built-in WandBLogger
 
76
  _component_: torchtune.utils.metric_logging.WandBLogger
77
  # the W&B project to log to
78
  project: llama3-5b
79
+ output_dir: ./logs/
80
  log_every_n_steps: 1
81
  log_peak_memory_stats: False
scripts/test_generation.py CHANGED
@@ -2,7 +2,7 @@ import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
 
4
  # Define your model path
5
- model_path = "./merged" # or the path/model_name you have
6
 
7
  # Your custom quantization configuration
8
  quantization_config = None
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
 
4
  # Define your model path
5
+ model_path = "./llama3-5b/hf" # or the path/model_name you have
6
 
7
  # Your custom quantization configuration
8
  quantization_config = None