Meta-Llama-3-8B-Instruct-torchtune-fare-rule-philipines-26-aug-2024
/
original
/torchtune_config.yaml
model: | |
_component_: torchtune.models.llama3.qlora_llama3_8b | |
lora_attn_modules: | |
- q_proj | |
- v_proj | |
- k_proj | |
- output_proj | |
apply_lora_to_mlp: true | |
apply_lora_to_output: false | |
lora_rank: 8 | |
lora_alpha: 16 | |
tokenizer: | |
_component_: torchtune.models.llama3.llama3_tokenizer | |
path: /tmp/Meta-Llama-3-8B-Instruct/original/tokenizer.model | |
checkpointer: | |
_component_: torchtune.utils.FullModelMetaCheckpointer | |
checkpoint_dir: /tmp/Meta-Llama-3-8B-Instruct/original/ | |
checkpoint_files: | |
- consolidated.00.pth | |
recipe_checkpoint: null | |
output_dir: /tmp/Meta-Llama-3-8B-Instruct/ | |
model_type: LLAMA3 | |
resume_from_checkpoint: false | |
dataset: | |
_component_: torchtune.datasets.instruct_dataset | |
source: b-r-ve/alpaca_fare_rules_shorter_length_500_aed_2024_philipines_25_08_24 | |
template: torchtune.data.AlpacaInstructTemplate | |
max_seq_len: 2610 | |
train_on_input: true | |
split: train[:60%] | |
seed: null | |
shuffle: true | |
batch_size: 2 | |
optimizer: | |
_component_: torch.optim.AdamW | |
weight_decay: 0.01 | |
lr: 0.0003 | |
lr_scheduler: | |
_component_: torchtune.modules.get_cosine_schedule_with_warmup | |
num_warmup_steps: 100 | |
loss: | |
_component_: torch.nn.CrossEntropyLoss | |
epochs: 1 | |
max_steps_per_epoch: null | |
gradient_accumulation_steps: 16 | |
compile: false | |
output_dir: /tmp/qlora_finetune_output/ | |
metric_logger: | |
_component_: torchtune.utils.metric_logging.WandBLogger | |
log_dir: ${output_dir} | |
project: torchtune_llama3_8B_qlora_single_device | |
log_every_n_steps: 1 | |
log_peak_memory_stats: false | |
device: cuda | |
dtype: bf16 | |
enable_activation_checkpointing: true | |
profiler: | |
_component_: torchtune.utils.setup_torch_profiler | |
enabled: false | |
output_dir: ${output_dir}/profiling_outputs | |
cpu: true | |
cuda: true | |
profile_memory: false | |
with_stack: false | |
record_shapes: true | |
with_flops: false | |
wait_steps: 5 | |
warmup_steps: 5 | |
active_steps: 2 | |
num_cycles: 1 | |