Mohammaderfan koupaei commited on
Commit
660777d
·
1 Parent(s): 61d58d1
Files changed (1) hide show
  1. scripts/training/trainer.py +21 -7
scripts/training/trainer.py CHANGED
@@ -44,8 +44,11 @@ class NarrativeTrainer:
44
  self.global_step = 0
45
  self.best_val_f1 = 0.0
46
 
47
- # Initialize mixed precision training
48
- self.scaler = GradScaler('cuda', enabled=self.config.fp16)
 
 
 
49
 
50
  # Setup training components
51
  self.setup_training()
@@ -204,7 +207,8 @@ class NarrativeTrainer:
204
  for step, batch in pbar:
205
  batch = {k: v.to(self.device, non_blocking=True) for k, v in batch.items()}
206
 
207
- with autocast(enabled=self.config.fp16):
 
208
  outputs = self.model(
209
  input_ids=batch['input_ids'],
210
  attention_mask=batch['attention_mask'],
@@ -213,17 +217,27 @@ class NarrativeTrainer:
213
  loss = self.criterion(outputs, batch['labels'])
214
  loss = loss / self.config.gradient_accumulation_steps
215
 
216
- self.scaler.scale(loss).backward()
 
 
 
 
217
 
218
  if (step + 1) % self.config.gradient_accumulation_steps == 0:
219
- self.scaler.unscale_(self.optimizer)
 
 
220
  torch.nn.utils.clip_grad_norm_(
221
  self.model.parameters(),
222
  self.config.max_grad_norm
223
  )
224
 
225
- self.scaler.step(self.optimizer)
226
- self.scaler.update()
 
 
 
 
227
  self.scheduler.step()
228
  self.optimizer.zero_grad()
229
 
 
44
  self.global_step = 0
45
  self.best_val_f1 = 0.0
46
 
47
+ # Initialize mixed precision training (Fixed version)
48
+ if self.config.fp16:
49
+ self.scaler = torch.cuda.amp.GradScaler()
50
+ else:
51
+ self.scaler = None
52
 
53
  # Setup training components
54
  self.setup_training()
 
207
  for step, batch in pbar:
208
  batch = {k: v.to(self.device, non_blocking=True) for k, v in batch.items()}
209
 
210
+ # Mixed precision training
211
+ with torch.cuda.amp.autocast(enabled=self.config.fp16):
212
  outputs = self.model(
213
  input_ids=batch['input_ids'],
214
  attention_mask=batch['attention_mask'],
 
217
  loss = self.criterion(outputs, batch['labels'])
218
  loss = loss / self.config.gradient_accumulation_steps
219
 
220
+ # Backward pass with scaler if fp16 is enabled
221
+ if self.config.fp16:
222
+ self.scaler.scale(loss).backward()
223
+ else:
224
+ loss.backward()
225
 
226
  if (step + 1) % self.config.gradient_accumulation_steps == 0:
227
+ if self.config.fp16:
228
+ self.scaler.unscale_(self.optimizer)
229
+
230
  torch.nn.utils.clip_grad_norm_(
231
  self.model.parameters(),
232
  self.config.max_grad_norm
233
  )
234
 
235
+ if self.config.fp16:
236
+ self.scaler.step(self.optimizer)
237
+ self.scaler.update()
238
+ else:
239
+ self.optimizer.step()
240
+
241
  self.scheduler.step()
242
  self.optimizer.zero_grad()
243