HaileyStorm commited on
Commit
10e98ea
·
verified ·
1 Parent(s): 18df521

Upload 3 files

Browse files
Files changed (3) hide show
  1. full.yaml +81 -0
  2. lora.yaml +93 -0
  3. test_generation.py +63 -0
full.yaml ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tokenizer
2
+ tokenizer:
3
+ _component_: torchtune.models.llama3.llama3_tokenizer
4
+ path: ../../slice_with_mergekit/merged/original/tokenizer.model
5
+
6
+ # Dataset and Sampler
7
+ dataset:
8
+ _component_: custom_datasets.orpo_dpo_mix_40k_dataset
9
+ max_seq_len: 8196
10
+ #dataset:
11
+ # _component_: torchtune.datasets.stack_exchanged_paired_dataset
12
+ seed: 42
13
+ shuffle: True
14
+ batch_size: 1
15
+
16
+ # Model Arguments
17
+ model:
18
+ _component_: torchtune.models.llama3.llama3
19
+ vocab_size: 128256
20
+ num_layers: 20
21
+ num_heads: 32
22
+ num_kv_heads: 8
23
+ embed_dim: 4096
24
+ max_seq_len: 8196
25
+ intermediate_dim: 14336
26
+ attn_dropout: 0.0
27
+ norm_eps: 1e-5
28
+ rope_base: 500000.0
29
+
30
+ checkpointer:
31
+ _component_: torchtune.utils.FullModelHFCheckpointer
32
+ checkpoint_dir: ../../slice_with_mergekit/merged/
33
+ checkpoint_files: [
34
+ model-00001-of-00003.safetensors,
35
+ model-00002-of-00003.safetensors,
36
+ model-00003-of-00003.safetensors
37
+ ]
38
+ recipe_checkpoint: null
39
+ output_dir: /media/hailey/More/AI/PruneMe/train/torchtune/llama3-5b/
40
+ model_type: LLAMA3
41
+ resume_from_checkpoint: False
42
+
43
+ # Fine-tuning arguments
44
+ epochs: 1
45
+ optimizer:
46
+ _component_: torch.optim.AdamW #bitsandbytes.optim.PagedAdamW8bit
47
+ lr: 5e-6
48
+ lr_scheduler:
49
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
50
+ num_warmup_steps: 1000
51
+ #loss:
52
+ # _component_: torchtune.modules.loss.DPOLoss
53
+ # beta: 0.1
54
+ # label_smoothing: 0
55
+ # loss_type: sigmoid
56
+ loss:
57
+ _component_: torch.nn.CrossEntropyLoss
58
+
59
+ max_steps_per_epoch: null
60
+ gradient_accumulation_steps: 1
61
+ optimizer_in_bwd: True # False if grad accum > 1
62
+ compile: False
63
+
64
+ # Training environment
65
+ device: cuda
66
+
67
+ # Memory management
68
+ enable_activation_checkpointing: True
69
+
70
+ # Reduced precision
71
+ dtype: fp32
72
+
73
+ # Logging
74
+ # enable logging to the built-in WandBLogger
75
+ metric_logger:
76
+ _component_: torchtune.utils.metric_logging.WandBLogger
77
+ # the W&B project to log to
78
+ project: llama3-5b
79
+ output_dir: /media/hailey/More/AI/PruneMe/train/torchtune/llama3-5b-dpo/
80
+ log_every_n_steps: 1
81
+ log_peak_memory_stats: False
lora.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tokenizer
2
+ tokenizer:
3
+ _component_: torchtune.models.llama3.llama3_tokenizer
4
+ path: ../../slice_with_mergekit/merged/original/tokenizer.model
5
+
6
+ # Dataset and Sampler
7
+ dataset:
8
+ _component_: custom_datasets.orpo_dpo_mix_40k_dataset
9
+ max_seq_len: 4096 #8192
10
+ #dataset:
11
+ # _component_: torchtune.datasets.stack_exchanged_paired_dataset
12
+ seed: 42
13
+ shuffle: True
14
+ batch_size: 2
15
+
16
+ # Model Arguments
17
+ model:
18
+ _component_: torchtune.models.llama3.lora_llama3
19
+ vocab_size: 128256
20
+ num_layers: 20
21
+ num_heads: 32
22
+ num_kv_heads: 8
23
+ embed_dim: 4096
24
+ max_seq_len: 4096 #8192
25
+ intermediate_dim: 14336
26
+ attn_dropout: 0.0
27
+ norm_eps: 1e-5
28
+ rope_base: 500000.0
29
+ lora_attn_modules: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] #['q_proj', 'v_proj', 'k_proj', 'output_proj']
30
+ apply_lora_to_mlp: True
31
+ apply_lora_to_output: True
32
+ lora_rank: 96
33
+ lora_alpha: 192
34
+ quantize_base: True # False
35
+
36
+
37
+ checkpointer:
38
+ _component_: torchtune.utils.FullModelHFCheckpointer
39
+ checkpoint_dir: ../../slice_with_mergekit/merged/
40
+ checkpoint_files: [
41
+ model-00001-of-00003.safetensors,
42
+ model-00002-of-00003.safetensors,
43
+ model-00003-of-00003.safetensors
44
+ ]
45
+ adapter_checkpoint: #None
46
+ recipe_checkpoint: #None
47
+ output_dir: /media/hailey/More/AI/PruneMe/train/torchtune/llama3-5b/
48
+ model_type: LLAMA3
49
+ resume_from_checkpoint: False
50
+
51
+ # Fine-tuning arguments
52
+ epochs: 3 #265
53
+ optimizer:
54
+ _component_: torch.optim.AdamW #bitsandbytes.optim.PagedAdamW8bit
55
+ lr: 3e-6
56
+ lr_scheduler:
57
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
58
+ num_warmup_steps: 1500
59
+ #loss:
60
+ # _component_: torchtune.modules.loss.DPOLoss
61
+ # beta: 0.1
62
+ # label_smoothing: 0
63
+ # loss_type: sigmoid
64
+ loss:
65
+ _component_: torch.nn.CrossEntropyLoss
66
+
67
+ max_steps_per_epoch: 500 #null
68
+ gradient_accumulation_steps: 2
69
+ optimizer_in_bwd: False # False if grad accum > 1
70
+ compile: False
71
+
72
+ # Training environment
73
+ device: cuda
74
+
75
+ # Memory management
76
+ enable_activation_checkpointing: True
77
+
78
+ # Reduced precision
79
+ dtype: fp32 #bf16
80
+
81
+ profiler:
82
+ _component_: torchtune.utils.profiler
83
+ enabled: False
84
+
85
+ # Logging
86
+ # enable logging to the built-in WandBLogger
87
+ metric_logger:
88
+ _component_: torchtune.utils.metric_logging.WandBLogger
89
+ # the W&B project to log to
90
+ project: llama3-5b
91
+ output_dir: /media/hailey/More/AI/PruneMe/train/torchtune/llama3-5b-dpo/
92
+ log_every_n_steps: 1
93
+ log_peak_memory_stats: False
test_generation.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
+
4
+ # Define your model path
5
+ model_path = "./merged" # or the path/model_name you have
6
+
7
+ # Your custom quantization configuration
8
+ quantization_config = None
9
+
10
+ # Load the model and tokenizer
11
+ model = AutoModelForCausalLM.from_pretrained(model_path,
12
+ device_map="auto",
13
+ quantization_config=quantization_config,
14
+ output_hidden_states=True)
15
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
16
+
17
+ # Initialize the messages list with a generic short system message
18
+ messages = [
19
+ {"role": "system", "content": "You are a helpful assistant."}
20
+ ]
21
+
22
+ # Function to generate a response
23
+ def generate_response(messages):
24
+ input_ids = tokenizer.apply_chat_template(
25
+ messages,
26
+ add_generation_prompt=True,
27
+ return_tensors="pt"
28
+ ).to(model.device)
29
+
30
+ terminators = [
31
+ tokenizer.eos_token_id,
32
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
33
+ ]
34
+
35
+ outputs = model.generate(
36
+ input_ids,
37
+ max_new_tokens=256,
38
+ eos_token_id=terminators,
39
+ do_sample=True,
40
+ temperature=0.6,
41
+ top_p=0.9,
42
+ )
43
+ response = outputs[0][input_ids.shape[-1]:]
44
+ return tokenizer.decode(response, skip_special_tokens=True)
45
+
46
+ # Interactive loop
47
+ while True:
48
+ # Get user input
49
+ user_input = input("User: ")
50
+
51
+ # Check if the user wants to quit
52
+ if user_input.lower() == 'q':
53
+ break
54
+
55
+ # Update the messages list with the user input
56
+ messages.append({"role": "user", "content": user_input})
57
+
58
+ # Generate a response based on the updated messages
59
+ response = generate_response(messages)
60
+ print("Assistant:", response)
61
+
62
+ # Update the messages list with the generated response
63
+ messages.append({"role": "assistant", "content": response})