zakariarada
commited on
Commit
•
f3133e8
1
Parent(s):
64c7c3c
Update README.md
Browse files
README.md
CHANGED
@@ -71,25 +71,28 @@ tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
|
71 |
|
72 |
# Training Arguments
|
73 |
training_args = TrainingArguments(
|
74 |
-
output_dir="./output/TCLM-beta/",
|
75 |
-
num_train_epochs=
|
76 |
-
per_device_train_batch_size=
|
77 |
-
gradient_accumulation_steps=
|
78 |
-
evaluation_strategy="
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
91 |
)
|
92 |
|
|
|
93 |
# Trainer Setup
|
94 |
trainer = Trainer(
|
95 |
model=model,
|
|
|
71 |
|
72 |
# Training Arguments
|
73 |
training_args = TrainingArguments(
|
74 |
+
output_dir="./output/TCLM-beta/", # Directory to save model checkpoints
|
75 |
+
num_train_epochs=3, # Increase epochs for better fine-tuning results
|
76 |
+
per_device_train_batch_size=4, # Adjust based on GPU memory, increase if possible
|
77 |
+
gradient_accumulation_steps=4, # Accumulate gradients to simulate a larger batch size
|
78 |
+
evaluation_strategy="steps", # Evaluate more frequently for detailed tracking
|
79 |
+
eval_steps=500, # Evaluate every 500 steps to track progress without over-evaluating
|
80 |
+
save_strategy="steps", # Save checkpoints during training
|
81 |
+
save_steps=500, # Save model every 500 steps
|
82 |
+
save_total_limit=2, # Limit to the two best models to save disk space
|
83 |
+
learning_rate=5e-5, # Lower learning rate for fine-tuning
|
84 |
+
weight_decay=0.01, # Slight weight decay to prevent overfitting
|
85 |
+
lr_scheduler_type="cosine", # Cosine schedule for smoother learning rate decay
|
86 |
+
warmup_ratio=0.06, # Warmup to stabilize initial training
|
87 |
+
logging_dir="./logs", # Directory to save training logs
|
88 |
+
logging_steps=50, # Log progress every 50 steps for better monitoring
|
89 |
+
fp16=True, # Enable mixed precision for faster training with less memory
|
90 |
+
load_best_model_at_end=True, # Load the best model at the end based on evaluation metric
|
91 |
+
metric_for_best_model="eval_loss", # Use evaluation loss to determine the best model
|
92 |
+
greater_is_better=False, # Lower loss is better
|
93 |
)
|
94 |
|
95 |
+
|
96 |
# Trainer Setup
|
97 |
trainer = Trainer(
|
98 |
model=model,
|