Spaces:
Runtime error
Runtime error
Mohammaderfan koupaei
commited on
Commit
·
660777d
1
Parent(s):
61d58d1
second
Browse files- scripts/training/trainer.py +21 -7
scripts/training/trainer.py
CHANGED
@@ -44,8 +44,11 @@ class NarrativeTrainer:
|
|
44 |
self.global_step = 0
|
45 |
self.best_val_f1 = 0.0
|
46 |
|
47 |
-
# Initialize mixed precision training
|
48 |
-
|
|
|
|
|
|
|
49 |
|
50 |
# Setup training components
|
51 |
self.setup_training()
|
@@ -204,7 +207,8 @@ class NarrativeTrainer:
|
|
204 |
for step, batch in pbar:
|
205 |
batch = {k: v.to(self.device, non_blocking=True) for k, v in batch.items()}
|
206 |
|
207 |
-
|
|
|
208 |
outputs = self.model(
|
209 |
input_ids=batch['input_ids'],
|
210 |
attention_mask=batch['attention_mask'],
|
@@ -213,17 +217,27 @@ class NarrativeTrainer:
|
|
213 |
loss = self.criterion(outputs, batch['labels'])
|
214 |
loss = loss / self.config.gradient_accumulation_steps
|
215 |
|
216 |
-
|
|
|
|
|
|
|
|
|
217 |
|
218 |
if (step + 1) % self.config.gradient_accumulation_steps == 0:
|
219 |
-
self.
|
|
|
|
|
220 |
torch.nn.utils.clip_grad_norm_(
|
221 |
self.model.parameters(),
|
222 |
self.config.max_grad_norm
|
223 |
)
|
224 |
|
225 |
-
self.
|
226 |
-
|
|
|
|
|
|
|
|
|
227 |
self.scheduler.step()
|
228 |
self.optimizer.zero_grad()
|
229 |
|
|
|
44 |
self.global_step = 0
|
45 |
self.best_val_f1 = 0.0
|
46 |
|
47 |
+
# Initialize mixed precision training (Fixed version)
|
48 |
+
if self.config.fp16:
|
49 |
+
self.scaler = torch.cuda.amp.GradScaler()
|
50 |
+
else:
|
51 |
+
self.scaler = None
|
52 |
|
53 |
# Setup training components
|
54 |
self.setup_training()
|
|
|
207 |
for step, batch in pbar:
|
208 |
batch = {k: v.to(self.device, non_blocking=True) for k, v in batch.items()}
|
209 |
|
210 |
+
# Mixed precision training
|
211 |
+
with torch.cuda.amp.autocast(enabled=self.config.fp16):
|
212 |
outputs = self.model(
|
213 |
input_ids=batch['input_ids'],
|
214 |
attention_mask=batch['attention_mask'],
|
|
|
217 |
loss = self.criterion(outputs, batch['labels'])
|
218 |
loss = loss / self.config.gradient_accumulation_steps
|
219 |
|
220 |
+
# Backward pass with scaler if fp16 is enabled
|
221 |
+
if self.config.fp16:
|
222 |
+
self.scaler.scale(loss).backward()
|
223 |
+
else:
|
224 |
+
loss.backward()
|
225 |
|
226 |
if (step + 1) % self.config.gradient_accumulation_steps == 0:
|
227 |
+
if self.config.fp16:
|
228 |
+
self.scaler.unscale_(self.optimizer)
|
229 |
+
|
230 |
torch.nn.utils.clip_grad_norm_(
|
231 |
self.model.parameters(),
|
232 |
self.config.max_grad_norm
|
233 |
)
|
234 |
|
235 |
+
if self.config.fp16:
|
236 |
+
self.scaler.step(self.optimizer)
|
237 |
+
self.scaler.update()
|
238 |
+
else:
|
239 |
+
self.optimizer.step()
|
240 |
+
|
241 |
self.scheduler.step()
|
242 |
self.optimizer.zero_grad()
|
243 |
|