zakariarada commited on
Commit
f3133e8
1 Parent(s): 64c7c3c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +20 -17
README.md CHANGED
@@ -71,25 +71,28 @@ tokenized_dataset = dataset.map(tokenize_function, batched=True)
71
 
72
  # Training Arguments
73
  training_args = TrainingArguments(
74
- output_dir="./output/TCLM-beta/",
75
- num_train_epochs=1,
76
- per_device_train_batch_size=2,
77
- gradient_accumulation_steps=1,
78
- evaluation_strategy="epoch",
79
- save_strategy="epoch",
80
- learning_rate=1e-4,
81
- weight_decay=0.0,
82
- lr_scheduler_type="cosine",
83
- warmup_ratio=0.0,
84
- logging_dir="./logs",
85
- logging_steps=10,
86
- fp16=True,
87
- save_total_limit=1,
88
- load_best_model_at_end=True,
89
- metric_for_best_model="loss",
90
- greater_is_better=False
 
 
91
  )
92
 
 
93
  # Trainer Setup
94
  trainer = Trainer(
95
  model=model,
 
71
 
72
  # Training Arguments
73
  training_args = TrainingArguments(
74
+ output_dir="./output/TCLM-beta/", # Directory to save model checkpoints
75
+ num_train_epochs=3, # Increase epochs for better fine-tuning results
76
+ per_device_train_batch_size=4, # Adjust based on GPU memory, increase if possible
77
+ gradient_accumulation_steps=4, # Accumulate gradients to simulate a larger batch size
78
+ evaluation_strategy="steps", # Evaluate more frequently for detailed tracking
79
+ eval_steps=500, # Evaluate every 500 steps to track progress without over-evaluating
80
+ save_strategy="steps", # Save checkpoints during training
81
+ save_steps=500, # Save model every 500 steps
82
+ save_total_limit=2, # Limit to the two best models to save disk space
83
+ learning_rate=5e-5, # Lower learning rate for fine-tuning
84
+ weight_decay=0.01, # Slight weight decay to prevent overfitting
85
+ lr_scheduler_type="cosine", # Cosine schedule for smoother learning rate decay
86
+ warmup_ratio=0.06, # Warmup to stabilize initial training
87
+ logging_dir="./logs", # Directory to save training logs
88
+ logging_steps=50, # Log progress every 50 steps for better monitoring
89
+ fp16=True, # Enable mixed precision for faster training with less memory
90
+ load_best_model_at_end=True, # Load the best model at the end based on evaluation metric
91
+ metric_for_best_model="eval_loss", # Use evaluation loss to determine the best model
92
+ greater_is_better=False, # Lower loss is better
93
  )
94
 
95
+
96
  # Trainer Setup
97
  trainer = Trainer(
98
  model=model,