Fill-Mask
Transformers
PyTorch
Safetensors
English
nomic_bert
custom_code
zpn commited on
Commit
0f627bd
1 Parent(s): b9c36f5

Update modeling_hf_nomic_bert.py

Browse files
Files changed (1) hide show
  1. modeling_hf_nomic_bert.py +7 -5
modeling_hf_nomic_bert.py CHANGED
@@ -724,7 +724,7 @@ class NomicBertAttention(nn.Module):
724
 
725
  self.rotary_emb_dim = self.head_dim * config.rotary_emb_fraction
726
  if self.rotary_emb_dim > 0:
727
- if config.rotary_scaling_factor:
728
  self.rotary_emb = NomicBertDynamicNTKRotaryEmbedding(
729
  dim=self.rotary_emb_dim,
730
  base=config.rotary_emb_base,
@@ -859,7 +859,6 @@ class NomicBertBlock(nn.Module):
859
  max_seq_len: Optional[int] = None,
860
  ):
861
  r"""Pass the input through the encoder layer.
862
-
863
  Args:
864
  hidden_states: the sequence to the encoder layer (required).
865
  residual: if postnorm, residual=None, If prenorm, hidden_states = Attn/MLP(LN(residual))
@@ -1055,10 +1054,11 @@ class NomicBertModel(NomicBertPreTrainedModel):
1055
  def forward(
1056
  self,
1057
  input_ids,
1058
- attention_mask=None,
1059
- token_type_ids=None,
1060
  position_ids=None,
 
 
1061
  return_dict=None,
 
1062
  ):
1063
  if token_type_ids is None:
1064
  token_type_ids = torch.zeros_like(input_ids)
@@ -1071,6 +1071,9 @@ class NomicBertModel(NomicBertPreTrainedModel):
1071
 
1072
  pooled_output = self.pooler(sequence_output) if self.pooler is not None else None
1073
 
 
 
 
1074
  return BaseModelOutputWithPoolingAndCrossAttentions(
1075
  last_hidden_state=sequence_output,
1076
  pooler_output=pooled_output,
@@ -1113,7 +1116,6 @@ class NomicBertForPreTraining(NomicBertPreTrainedModel):
1113
  Outputs a tuple comprising
1114
  - the masked language modeling logits of shape [batch_size, sequence_length, vocab_size], and
1115
  - the next sentence classification logits of shape [batch_size, 2].
1116
-
1117
  """
1118
  outputs = self.bert(
1119
  input_ids,
 
724
 
725
  self.rotary_emb_dim = self.head_dim * config.rotary_emb_fraction
726
  if self.rotary_emb_dim > 0:
727
+ if getattr(config, "rotary_scaling_factor", None):
728
  self.rotary_emb = NomicBertDynamicNTKRotaryEmbedding(
729
  dim=self.rotary_emb_dim,
730
  base=config.rotary_emb_base,
 
859
  max_seq_len: Optional[int] = None,
860
  ):
861
  r"""Pass the input through the encoder layer.
 
862
  Args:
863
  hidden_states: the sequence to the encoder layer (required).
864
  residual: if postnorm, residual=None, If prenorm, hidden_states = Attn/MLP(LN(residual))
 
1054
  def forward(
1055
  self,
1056
  input_ids,
 
 
1057
  position_ids=None,
1058
+ token_type_ids=None,
1059
+ attention_mask=None,
1060
  return_dict=None,
1061
+ matryoshka_dim=None,
1062
  ):
1063
  if token_type_ids is None:
1064
  token_type_ids = torch.zeros_like(input_ids)
 
1071
 
1072
  pooled_output = self.pooler(sequence_output) if self.pooler is not None else None
1073
 
1074
+ if matryoshka_dim:
1075
+ sequence_output = sequence_output[:, :matryoshka_dim]
1076
+
1077
  return BaseModelOutputWithPoolingAndCrossAttentions(
1078
  last_hidden_state=sequence_output,
1079
  pooler_output=pooled_output,
 
1116
  Outputs a tuple comprising
1117
  - the masked language modeling logits of shape [batch_size, sequence_length, vocab_size], and
1118
  - the next sentence classification logits of shape [batch_size, 2].
 
1119
  """
1120
  outputs = self.bert(
1121
  input_ids,