Spaces:
Sleeping
Sleeping
Add DataCollatorForLanguageModeling to trainer configuration in train.py for improved data handling during training.
Browse files
train.py
CHANGED
|
@@ -34,6 +34,7 @@ from datasets import (
|
|
| 34 |
)
|
| 35 |
from transformers import AutoTokenizer, Trainer, TrainingArguments
|
| 36 |
from trl import SFTTrainer
|
|
|
|
| 37 |
|
| 38 |
# Configuration
|
| 39 |
max_seq_length = 2048 # Auto supports RoPE Scaling internally
|
|
@@ -211,8 +212,8 @@ def create_trainer(
|
|
| 211 |
logging_steps=1,
|
| 212 |
save_strategy="steps",
|
| 213 |
save_steps=30,
|
| 214 |
-
eval_strategy="steps",
|
| 215 |
-
eval_steps=30,
|
| 216 |
save_total_limit=2,
|
| 217 |
optim="adamw_8bit",
|
| 218 |
weight_decay=0.01,
|
|
@@ -224,6 +225,11 @@ def create_trainer(
|
|
| 224 |
metric_for_best_model="eval_loss",
|
| 225 |
greater_is_better=False,
|
| 226 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
)
|
| 228 |
logger.info("Trainer created successfully")
|
| 229 |
return trainer
|
|
|
|
| 34 |
)
|
| 35 |
from transformers import AutoTokenizer, Trainer, TrainingArguments
|
| 36 |
from trl import SFTTrainer
|
| 37 |
+
from trl.data.data_collator import DataCollatorForLanguageModeling
|
| 38 |
|
| 39 |
# Configuration
|
| 40 |
max_seq_length = 2048 # Auto supports RoPE Scaling internally
|
|
|
|
| 212 |
logging_steps=1,
|
| 213 |
save_strategy="steps",
|
| 214 |
save_steps=30,
|
| 215 |
+
eval_strategy="steps",
|
| 216 |
+
eval_steps=30,
|
| 217 |
save_total_limit=2,
|
| 218 |
optim="adamw_8bit",
|
| 219 |
weight_decay=0.01,
|
|
|
|
| 225 |
metric_for_best_model="eval_loss",
|
| 226 |
greater_is_better=False,
|
| 227 |
),
|
| 228 |
+
data_collator=DataCollatorForLanguageModeling(
|
| 229 |
+
tokenizer=tokenizer,
|
| 230 |
+
mlm=False,
|
| 231 |
+
pad_to_multiple_of=8,
|
| 232 |
+
),
|
| 233 |
)
|
| 234 |
logger.info("Trainer created successfully")
|
| 235 |
return trainer
|