radna
/

Triton-InternViT-6B-448px-V1-5

Image Feature Extraction

feature-extraction

Model card Files Files and versions Community

radna commited on Jun 14

Commit

8f8935f

•

1 Parent(s): d4b4104

Update flash_attention.py

Files changed (1) hide show

flash_attention.py +3 -3

flash_attention.py CHANGED Viewed

@@ -66,7 +66,7 @@ class FlashAttention(nn.Module):
                     max_s,
                     self.dropout_p if self.training else 0.0,
                     self.softmax_scale,
-                    causal,
                 )
                 output = rearrange(output, "(b s) ... -> b s ...", b=batch_size)
             else:
@@ -82,7 +82,7 @@ class FlashAttention(nn.Module):
                     max_s,
                     self.dropout_p if self.training else 0.0,
                     self.softmax_scale,
-                    causal,
                 )
                 output = rearrange(
                     pad_input(
@@ -102,7 +102,7 @@ class FlashAttention(nn.Module):
                 max_s,
                 self.dropout_p if self.training else 0.0,
                 self.softmax_scale,
-                causal,
             )
         return output, None

                     max_s,
                     self.dropout_p if self.training else 0.0,
                     self.softmax_scale,
+                    causal
                 )
                 output = rearrange(output, "(b s) ... -> b s ...", b=batch_size)
             else:
                     max_s,
                     self.dropout_p if self.training else 0.0,
                     self.softmax_scale,
+                    causal
                 )
                 output = rearrange(
                     pad_input(
                 max_s,
                 self.dropout_p if self.training else 0.0,
                 self.softmax_scale,
+                causal
             )
         return output, None