_config_type: haystacks.embeddings.train_batch.TrainConfig | |
accelerator: gpu | |
accumulate_grad_batches: 1 | |
adam_beta1: 0.9 | |
adam_beta2: 0.95 | |
base_save_dir: /home/sabri/code/haystacks/checkpoints | |
check_val_every_n_epoch: null | |
ckpt_path: null | |
devices: 8 | |
dtype: bfloat16 | |
embedding_module: | |
_config_type: haystacks.embeddings.modeling.TokenEmbeddingModuleConfig | |
batch_attention_layers: [] | |
embedding_dim: 8192 | |
kwargs: {} | |
target: | |
_is_type: true | |
name: haystacks.embeddings.modeling.TokenEmbeddingModule | |
enable_checkpointing: false | |
foreach: null | |
gradient_clip_val: null | |
launch_id: null | |
learnable_bias: true | |
learnable_temp: true | |
limit_train_batches: 1.0 | |
limit_val_batches: 1.0 | |
load_hub: true | |
log_every_n_steps: 4 | |
log_grad_norms: false | |
loss_comparison: matched | |
loss_token_idxs: | |
- 64 | |
- 128 | |
- 256 | |
- 512 | |
- 1024 | |
lr: 0.0001 | |
lr_scheduler: null | |
manual_save_epochs: null | |
manual_save_steps: 8192 | |
max_epochs: 512 | |
max_hidden_layers: null | |
max_problems: null | |
max_seq_len: 1024 | |
max_steps: -1 | |
model_name: meta-llama/Llama-3.2-1B-Instruct | |
name: no_batch-attention-lr0.0001-bs32-d8192-new | |
num_sanity_val_steps: null | |
num_workers: 0 | |
objective: cross_entropy | |
output_dir: null | |
overfit_batches: 0.0 | |
precision: bf16 | |
reload_dataloaders_every_n_epochs: 0 | |
run_dir: null | |
run_id: null | |
samples_per_batch: 32 | |
save_intermediates: true | |
script_id: null | |
seed: 42 | |
train_batch_size: 1 | |
train_data_path: ScalingIntelligence/math-train-l3.2-3Bi-meta-n128 | |
use_wandb: false | |
val_batch_size: 1 | |
val_check_interval: 64 | |
val_data_path: ScalingIntelligence/math-test-l3.2-3Bi-meta-n128 | |
val_rollout_data_path: null | |
val_samples_per_batch: 32 | |
validate_before_train: true | |
wandb: | |
_config_type: haystacks.embeddings.train_batch.WandbLoggerConfig | |
group: '' | |
id: null | |
job_type: train | |
kwargs: {} | |
log_model: false | |
mode: online | |
name: null | |
prefix: '' | |
project: haystacks | |
save_dir: . | |
tags: [] | |
target: | |
_is_type: true | |
name: pytorch_lightning.loggers.wandb.WandbLogger | |
weight_decay: 0.1 | |
weights_only: true | |