ibm-research
/

mpt-7b-instruct2

Text Generation

text-generation-inference

Model card Files Files and versions Community

stallone commited on Aug 24, 2023

Commit

b6e04ba

·

1 Parent(s): 3872a50

Support for PT2C

Files changed (1) hide show

modeling_mpt.py +1 -1

modeling_mpt.py CHANGED Viewed

@@ -152,7 +152,7 @@ class MPTModel(MPTPreTrainedModel):
         if output_attentions:
             if self.attn_impl != 'torch':
                 raise NotImplementedError('output_attentions is not implemented for MPT when using attn_impl `flash` or `triton`.')
-        if attention_mask is not None and attention_mask[:, 0].sum() != attention_mask.shape[0] and self.training:
             raise NotImplementedError('MPT does not support training with left padding.')
         if self.prefix_lm and prefix_mask is None:
             raise ValueError('prefix_mask is a required argument when MPT is configured with prefix_lm=True.')

         if output_attentions:
             if self.attn_impl != 'torch':
                 raise NotImplementedError('output_attentions is not implemented for MPT when using attn_impl `flash` or `triton`.')
+        if self.training and attention_mask is not None and attention_mask[:, 0].sum() != attention_mask.shape[0]:
             raise NotImplementedError('MPT does not support training with left padding.')
         if self.prefix_lm and prefix_mask is None:
             raise ValueError('prefix_mask is a required argument when MPT is configured with prefix_lm=True.')