d-matrix
/

opt-125m

d-Matrix commited on May 14, 2024

Commit

d16c53a

verified ·

1 Parent(s): 4bfa7b2

Update modeling_opt.py

Files changed (1) hide show

modeling_opt.py CHANGED Viewed

@@ -227,7 +227,6 @@ class OPTAttention(nn.Module):
             attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
         # upcast to fp32 if the weights are in fp16. Please see https://github.com/huggingface/transformers/pull/17437
-        breakpoint()
         if attn_weights.dtype == torch.float16:
             attn_weights = self.softmax(attn_weights.float()).to(torch.float16)
         else:

             attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
         # upcast to fp32 if the weights are in fp16. Please see https://github.com/huggingface/transformers/pull/17437
         if attn_weights.dtype == torch.float16:
             attn_weights = self.softmax(attn_weights.float()).to(torch.float16)
         else: