HaileyStorm
/

llama3-5.4b-instruct-unhealed

+# Tokenizer
+tokenizer:
+  _component_: torchtune.models.llama3.llama3_tokenizer
+  path: ../../slice_with_mergekit/merged/original/tokenizer.model
+# Dataset and Sampler
+dataset:
+  _component_: custom_datasets.orpo_dpo_mix_40k_dataset
+  max_seq_len: 8196
+#dataset:
+#  _component_: torchtune.datasets.stack_exchanged_paired_dataset
+seed: 42
+shuffle: True
+batch_size: 1
+# Model Arguments
+model:
+  _component_: torchtune.models.llama3.llama3
+  vocab_size: 128256
+  num_layers: 20
+  num_heads: 32
+  num_kv_heads: 8
+  embed_dim: 4096
+  max_seq_len: 8196
+  intermediate_dim: 14336
+  attn_dropout: 0.0
+  norm_eps: 1e-5
+  rope_base: 500000.0
+checkpointer:
+  _component_: torchtune.utils.FullModelHFCheckpointer
+  checkpoint_dir: ../../slice_with_mergekit/merged/
+  checkpoint_files: [
+    model-00001-of-00003.safetensors,
+    model-00002-of-00003.safetensors,
+    model-00003-of-00003.safetensors
+  ]
+  recipe_checkpoint: null
+  output_dir: /media/hailey/More/AI/PruneMe/train/torchtune/llama3-5b/
+  model_type: LLAMA3
+resume_from_checkpoint: False
+# Fine-tuning arguments
+epochs: 1
+optimizer:
+  _component_: torch.optim.AdamW #bitsandbytes.optim.PagedAdamW8bit
+  lr: 5e-6
+lr_scheduler:
+  _component_: torchtune.modules.get_cosine_schedule_with_warmup
+  num_warmup_steps: 1000
+#loss:
+#  _component_: torchtune.modules.loss.DPOLoss
+#  beta: 0.1
+#  label_smoothing: 0
+#  loss_type: sigmoid
+loss:
+  _component_: torch.nn.CrossEntropyLoss
+max_steps_per_epoch: null
+gradient_accumulation_steps: 1
+optimizer_in_bwd: True  # False if grad accum > 1
+compile: False
+# Training environment
+device: cuda
+# Memory management
+enable_activation_checkpointing: True
+# Reduced precision
+dtype: fp32
+# Logging
+# enable logging to the built-in WandBLogger
+metric_logger:
+  _component_: torchtune.utils.metric_logging.WandBLogger
+  # the W&B project to log to
+  project: llama3-5b
+output_dir: /media/hailey/More/AI/PruneMe/train/torchtune/llama3-5b-dpo/
+log_every_n_steps: 1
+log_peak_memory_stats: False

lora.yaml ADDED Viewed

	@@ -0,0 +1,93 @@

+# Tokenizer
+tokenizer:
+  _component_: torchtune.models.llama3.llama3_tokenizer
+  path: ../../slice_with_mergekit/merged/original/tokenizer.model
+# Dataset and Sampler
+dataset:
+  _component_: custom_datasets.orpo_dpo_mix_40k_dataset
+  max_seq_len: 4096  #8192
+#dataset:
+#  _component_: torchtune.datasets.stack_exchanged_paired_dataset
+seed: 42
+shuffle: True
+batch_size: 2
+# Model Arguments
+model:
+  _component_: torchtune.models.llama3.lora_llama3
+  vocab_size: 128256
+  num_layers: 20
+  num_heads: 32
+  num_kv_heads: 8
+  embed_dim: 4096
+  max_seq_len: 4096  #8192
+  intermediate_dim: 14336
+  attn_dropout: 0.0
+  norm_eps: 1e-5
+  rope_base: 500000.0
+  lora_attn_modules: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]   #['q_proj', 'v_proj', 'k_proj', 'output_proj']
+  apply_lora_to_mlp: True
+  apply_lora_to_output: True
+  lora_rank: 96
+  lora_alpha: 192
+  quantize_base: True # False
+checkpointer:
+  _component_: torchtune.utils.FullModelHFCheckpointer
+  checkpoint_dir: ../../slice_with_mergekit/merged/
+  checkpoint_files: [
+    model-00001-of-00003.safetensors,
+    model-00002-of-00003.safetensors,
+    model-00003-of-00003.safetensors
+  ]
+  adapter_checkpoint: #None
+  recipe_checkpoint: #None
+  output_dir: /media/hailey/More/AI/PruneMe/train/torchtune/llama3-5b/
+  model_type: LLAMA3
+resume_from_checkpoint: False
+# Fine-tuning arguments
+epochs: 3 #265
+optimizer:
+  _component_: torch.optim.AdamW #bitsandbytes.optim.PagedAdamW8bit
+  lr: 3e-6
+lr_scheduler:
+  _component_: torchtune.modules.get_cosine_schedule_with_warmup
+  num_warmup_steps: 1500
+#loss:
+#  _component_: torchtune.modules.loss.DPOLoss
+#  beta: 0.1
+#  label_smoothing: 0
+#  loss_type: sigmoid
+loss:
+  _component_: torch.nn.CrossEntropyLoss
+max_steps_per_epoch: 500 #null
+gradient_accumulation_steps: 2
+optimizer_in_bwd: False  # False if grad accum > 1
+compile: False
+# Training environment
+device: cuda
+# Memory management
+enable_activation_checkpointing: True
+# Reduced precision
+dtype: fp32  #bf16
+profiler:
+  _component_: torchtune.utils.profiler
+  enabled: False
+# Logging
+# enable logging to the built-in WandBLogger
+metric_logger:
+  _component_: torchtune.utils.metric_logging.WandBLogger
+  # the W&B project to log to
+  project: llama3-5b
+output_dir: /media/hailey/More/AI/PruneMe/train/torchtune/llama3-5b-dpo/
+log_every_n_steps: 1
+log_peak_memory_stats: False

test_generation.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+# Define your model path
+model_path = "./merged"  # or the path/model_name you have
+# Your custom quantization configuration
+quantization_config = None
+# Load the model and tokenizer
+model = AutoModelForCausalLM.from_pretrained(model_path,
+                                             device_map="auto",
+                                             quantization_config=quantization_config,
+                                             output_hidden_states=True)
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+# Initialize the messages list with a generic short system message
+messages = [
+    {"role": "system", "content": "You are a helpful assistant."}
+]
+# Function to generate a response
+def generate_response(messages):
+    input_ids = tokenizer.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        return_tensors="pt"
+    ).to(model.device)
+    terminators = [
+        tokenizer.eos_token_id,
+        tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
+    outputs = model.generate(
+        input_ids,
+        max_new_tokens=256,
+        eos_token_id=terminators,
+        do_sample=True,
+        temperature=0.6,
+        top_p=0.9,
+    )
+    response = outputs[0][input_ids.shape[-1]:]
+    return tokenizer.decode(response, skip_special_tokens=True)
+# Interactive loop
+while True:
+    # Get user input
+    user_input = input("User: ")
+    # Check if the user wants to quit
+    if user_input.lower() == 'q':
+        break
+    # Update the messages list with the user input
+    messages.append({"role": "user", "content": user_input})
+    # Generate a response based on the updated messages
+    response = generate_response(messages)
+    print("Assistant:", response)
+    # Update the messages list with the generated response
+    messages.append({"role": "assistant", "content": response})