GinnM commited on
Commit
aef2858
·
verified ·
1 Parent(s): ebd64e7

Upload ProSSTXForMaskedLM

Browse files
Files changed (3) hide show
  1. config.json +4 -4
  2. configuration_prosst.py +3 -3
  3. modeling_prosst.py +46 -46
config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "_name_or_path": "/rgzn/limc/ProSST/oracle_checkpoint3/ss_2051_0_aa2pos_pos2aa_aa2ss_ss2aa_False/ProSSTX-2048",
3
  "architectures": [
4
- "ProSSTForMaskedLM"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "auto_map": {
8
- "AutoConfig": "configuration_prosst.ProSSTConfig",
9
- "AutoModelForMaskedLM": "modeling_prosst.ProSSTForMaskedLM"
10
  },
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
@@ -18,7 +18,7 @@
18
  "max_position_embeddings": -1,
19
  "max_relative_positions": 1024,
20
  "mlm_probability": 0.15,
21
- "model_type": "ProSST",
22
  "num_attention_heads": 16,
23
  "num_hidden_layers": 24,
24
  "pad_token_id": 0,
 
1
  {
2
  "_name_or_path": "/rgzn/limc/ProSST/oracle_checkpoint3/ss_2051_0_aa2pos_pos2aa_aa2ss_ss2aa_False/ProSSTX-2048",
3
  "architectures": [
4
+ "ProSSTXForMaskedLM"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "auto_map": {
8
+ "AutoConfig": "configuration_prosst.ProSSTXConfig",
9
+ "AutoModelForMaskedLM": "modeling_prosst.ProSSTXForMaskedLM"
10
  },
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
 
18
  "max_position_embeddings": -1,
19
  "max_relative_positions": 1024,
20
  "mlm_probability": 0.15,
21
+ "model_type": "ProSSTX",
22
  "num_attention_heads": 16,
23
  "num_hidden_layers": 24,
24
  "pad_token_id": 0,
configuration_prosst.py CHANGED
@@ -1,7 +1,7 @@
1
  from transformers import PretrainedConfig
2
 
3
- class ProSSTConfig(PretrainedConfig):
4
- model_type = "ProSST"
5
 
6
  def __init__(
7
  self,
@@ -68,4 +68,4 @@ class ProSSTConfig(PretrainedConfig):
68
  self.pooler_dropout = pooler_dropout
69
  self.pooler_hidden_act = pooler_hidden_act
70
 
71
- ProSSTConfig.register_for_auto_class()
 
1
  from transformers import PretrainedConfig
2
 
3
+ class ProSSTXConfig(PretrainedConfig):
4
+ model_type = "ProSSTX"
5
 
6
  def __init__(
7
  self,
 
68
  self.pooler_dropout = pooler_dropout
69
  self.pooler_hidden_act = pooler_hidden_act
70
 
71
+ ProSSTXConfig.register_for_auto_class()
modeling_prosst.py CHANGED
@@ -12,7 +12,7 @@ from transformers.modeling_outputs import (
12
  TokenClassifierOutput,
13
  )
14
  from transformers.modeling_utils import PreTrainedModel
15
- from .configuration_prosst import ProSSTConfig
16
  import torch.nn.functional as F
17
  from functools import partial
18
 
@@ -262,7 +262,7 @@ class ContextPooler(nn.Module):
262
  return self.config.hidden_size
263
 
264
 
265
- class ProSSTLayerNorm(nn.Module):
266
  """LayerNorm module in the TF style (epsilon inside the square root)."""
267
 
268
  def __init__(self, size, eps=1e-12):
@@ -286,7 +286,7 @@ class ProSSTLayerNorm(nn.Module):
286
 
287
  class DisentangledSelfAttention(nn.Module):
288
 
289
- def __init__(self, config: ProSSTConfig):
290
  super().__init__()
291
  self.config = config
292
  self.num_attention_heads = config.num_attention_heads
@@ -526,11 +526,11 @@ class DisentangledSelfAttention(nn.Module):
526
  return score, disentangled_attentions
527
 
528
 
529
- class ProSSTSelfOutput(nn.Module):
530
  def __init__(self, config):
531
  super().__init__()
532
  self.dense = nn.Linear(config.hidden_size, config.hidden_size)
533
- self.LayerNorm = ProSSTLayerNorm(config.hidden_size, config.layer_norm_eps)
534
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
535
 
536
  def forward(self, hidden_states, input_tensor):
@@ -540,12 +540,12 @@ class ProSSTSelfOutput(nn.Module):
540
  return hidden_states
541
 
542
 
543
- class ProSSTAttention(nn.Module):
544
  def __init__(self, config):
545
  super().__init__()
546
  self.config = config
547
  self.self = DisentangledSelfAttention(config)
548
- self.output = ProSSTSelfOutput(config)
549
 
550
  def forward(
551
  self,
@@ -573,7 +573,7 @@ class ProSSTAttention(nn.Module):
573
  return attention_output
574
 
575
 
576
- class ProSSTIntermediate(nn.Module):
577
  def __init__(self, config):
578
  super().__init__()
579
  self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
@@ -588,11 +588,11 @@ class ProSSTIntermediate(nn.Module):
588
  return hidden_states
589
 
590
 
591
- class ProSSTOutput(nn.Module):
592
  def __init__(self, config):
593
  super().__init__()
594
  self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
595
- self.LayerNorm = ProSSTLayerNorm(config.hidden_size, config.layer_norm_eps)
596
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
597
  self.config = config
598
 
@@ -603,13 +603,13 @@ class ProSSTOutput(nn.Module):
603
  return hidden_states
604
 
605
 
606
- class ProSSTLayer(nn.Module):
607
  def __init__(self, config):
608
  super().__init__()
609
  self.config = config
610
- self.attention = ProSSTAttention(config)
611
- self.intermediate = ProSSTIntermediate(config)
612
- self.output = ProSSTOutput(config)
613
 
614
  def forward(
615
  self,
@@ -638,13 +638,13 @@ class ProSSTLayer(nn.Module):
638
  return layer_output
639
 
640
 
641
- class ProSSTEncoder(nn.Module):
642
  """Modified BertEncoder with relative position bias support"""
643
 
644
  def __init__(self, config):
645
  super().__init__()
646
  self.layer = nn.ModuleList(
647
- [ProSSTLayer(config) for _ in range(config.num_hidden_layers)]
648
  )
649
  self.relative_attention = config.relative_attention
650
  if self.relative_attention:
@@ -709,7 +709,7 @@ class ProSSTEncoder(nn.Module):
709
  )
710
 
711
 
712
- class ProSSTEmbeddings(nn.Module):
713
  """Construct the embeddings from word, position and token_type embeddings."""
714
 
715
  def __init__(self, config):
@@ -720,7 +720,7 @@ class ProSSTEmbeddings(nn.Module):
720
  self.word_embeddings = nn.Embedding(
721
  config.vocab_size, self.embedding_size, padding_idx=self.pad_token_id
722
  )
723
- self.LayerNorm = ProSSTLayerNorm(config.hidden_size, config.layer_norm_eps)
724
 
725
  # 绝对位置编码
726
  self.position_biased_input = config.position_biased_input
@@ -742,7 +742,7 @@ class ProSSTEmbeddings(nn.Module):
742
  # SS embeddings
743
  if config.ss_vocab_size > 0:
744
  self.ss_embeddings = nn.Embedding(config.ss_vocab_size, self.embedding_size)
745
- self.ss_layer_norm = ProSSTLayerNorm(
746
  config.hidden_size, config.layer_norm_eps
747
  )
748
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
@@ -812,14 +812,14 @@ class ProSSTEmbeddings(nn.Module):
812
  return embeddings, None
813
 
814
 
815
- class ProSSTPreTrainedModel(PreTrainedModel):
816
  """
817
  An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
818
  models.
819
  """
820
 
821
- config_class = ProSSTConfig
822
- base_model_prefix = "ProSST"
823
  _keys_to_ignore_on_load_unexpected = ["position_embeddings"]
824
  supports_gradient_checkpointing = True
825
 
@@ -837,16 +837,16 @@ class ProSSTPreTrainedModel(PreTrainedModel):
837
  module.weight.data[module.padding_idx].zero_()
838
 
839
  def _set_gradient_checkpointing(self, module, value=False):
840
- if isinstance(module, ProSSTEncoder):
841
  module.gradient_checkpointing = value
842
 
843
 
844
- class ProSSTModel(ProSSTPreTrainedModel):
845
  def __init__(self, config):
846
  super().__init__(config)
847
  self.config = config
848
- self.embeddings = ProSSTEmbeddings(config)
849
- self.encoder = ProSSTEncoder(config)
850
  self.post_init()
851
 
852
  def forward(
@@ -882,7 +882,7 @@ class ProSSTModel(ProSSTPreTrainedModel):
882
  )
883
 
884
 
885
- class ProSSTPredictionHeadTransform(nn.Module):
886
  def __init__(self, config):
887
  super().__init__()
888
  self.embedding_size = getattr(config, "embedding_size", config.hidden_size)
@@ -900,11 +900,11 @@ class ProSSTPredictionHeadTransform(nn.Module):
900
  return hidden_states
901
 
902
 
903
- class ProSSTLMPredictionHead(nn.Module):
904
  def __init__(self, config):
905
  super().__init__()
906
  self.config = config
907
- self.transform = ProSSTPredictionHeadTransform(config)
908
  self.embedding_size = config.hidden_size
909
  self.decoder = nn.Linear(self.embedding_size, config.vocab_size, bias=False)
910
 
@@ -914,24 +914,24 @@ class ProSSTLMPredictionHead(nn.Module):
914
  return hidden_states
915
 
916
 
917
- class ProSSTOnlyMLMHead(nn.Module):
918
  def __init__(self, config):
919
  super().__init__()
920
- self.predictions = ProSSTLMPredictionHead(config)
921
 
922
  def forward(self, sequence_output):
923
  prediction_scores = self.predictions(sequence_output)
924
  return prediction_scores
925
 
926
 
927
- class ProSSTPreTrainedModel(PreTrainedModel):
928
  """
929
  An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
930
  models.
931
  """
932
 
933
- config_class = ProSSTConfig
934
- base_model_prefix = "ProSST"
935
  _keys_to_ignore_on_load_unexpected = ["position_embeddings"]
936
  supports_gradient_checkpointing = True
937
 
@@ -949,11 +949,11 @@ class ProSSTPreTrainedModel(PreTrainedModel):
949
  module.weight.data[module.padding_idx].zero_()
950
 
951
  def _set_gradient_checkpointing(self, module, value=False):
952
- if isinstance(module, ProSSTEncoder):
953
  module.gradient_checkpointing = value
954
 
955
 
956
- class ProSSTForMaskedLM(ProSSTPreTrainedModel):
957
  _tied_weights_keys = [
958
  "cls.predictions.decoder.weight",
959
  "cls.predictions.decoder.bias",
@@ -961,8 +961,8 @@ class ProSSTForMaskedLM(ProSSTPreTrainedModel):
961
 
962
  def __init__(self, config):
963
  super().__init__(config)
964
- self.prosst = ProSSTModel(config)
965
- self.cls = ProSSTOnlyMLMHead(config)
966
  self.post_init()
967
 
968
  def forward(
@@ -1005,14 +1005,14 @@ class ProSSTForMaskedLM(ProSSTPreTrainedModel):
1005
  )
1006
 
1007
 
1008
- class ProSSTForSequenceClassification(ProSSTPreTrainedModel):
1009
  def __init__(self, config):
1010
  super().__init__(config)
1011
 
1012
  num_labels = getattr(config, "num_labels", 2)
1013
  self.num_labels = num_labels
1014
  self.scale_hidden = getattr(config, "scale_hidden", 1)
1015
- self.prosst = ProSSTModel(config)
1016
  self.pooler = ContextPooler(config)
1017
  output_dim = self.pooler.output_dim * self.scale_hidden
1018
 
@@ -1125,12 +1125,12 @@ class ProSSTForSequenceClassification(ProSSTPreTrainedModel):
1125
  )
1126
 
1127
 
1128
- class ProSSTForTokenClassification(ProSSTPreTrainedModel):
1129
  def __init__(self, config):
1130
  super().__init__(config)
1131
  self.num_labels = config.num_labels
1132
 
1133
- self.prosst = ProSSTModel(config)
1134
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
1135
  self.classifier = nn.Linear(config.hidden_size, config.num_labels)
1136
 
@@ -1190,9 +1190,9 @@ class ProSSTForTokenClassification(ProSSTPreTrainedModel):
1190
  )
1191
 
1192
 
1193
- ProSSTModel.register_for_auto_class("AutoModel")
1194
- ProSSTForMaskedLM.register_for_auto_class("AutoModelForMaskedLM")
1195
- ProSSTForSequenceClassification.register_for_auto_class(
1196
  "AutoModelForSequenceClassification"
1197
  )
1198
- ProSSTForTokenClassification.register_for_auto_class("AutoModelForTokenClassification")
 
12
  TokenClassifierOutput,
13
  )
14
  from transformers.modeling_utils import PreTrainedModel
15
+ from .configuration_prosst import ProSSTXConfig
16
  import torch.nn.functional as F
17
  from functools import partial
18
 
 
262
  return self.config.hidden_size
263
 
264
 
265
+ class ProSSTXLayerNorm(nn.Module):
266
  """LayerNorm module in the TF style (epsilon inside the square root)."""
267
 
268
  def __init__(self, size, eps=1e-12):
 
286
 
287
  class DisentangledSelfAttention(nn.Module):
288
 
289
+ def __init__(self, config: ProSSTXConfig):
290
  super().__init__()
291
  self.config = config
292
  self.num_attention_heads = config.num_attention_heads
 
526
  return score, disentangled_attentions
527
 
528
 
529
+ class ProSSTXSelfOutput(nn.Module):
530
  def __init__(self, config):
531
  super().__init__()
532
  self.dense = nn.Linear(config.hidden_size, config.hidden_size)
533
+ self.LayerNorm = ProSSTXLayerNorm(config.hidden_size, config.layer_norm_eps)
534
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
535
 
536
  def forward(self, hidden_states, input_tensor):
 
540
  return hidden_states
541
 
542
 
543
+ class ProSSTXAttention(nn.Module):
544
  def __init__(self, config):
545
  super().__init__()
546
  self.config = config
547
  self.self = DisentangledSelfAttention(config)
548
+ self.output = ProSSTXSelfOutput(config)
549
 
550
  def forward(
551
  self,
 
573
  return attention_output
574
 
575
 
576
+ class ProSSTXIntermediate(nn.Module):
577
  def __init__(self, config):
578
  super().__init__()
579
  self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
 
588
  return hidden_states
589
 
590
 
591
+ class ProSSTXOutput(nn.Module):
592
  def __init__(self, config):
593
  super().__init__()
594
  self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
595
+ self.LayerNorm = ProSSTXLayerNorm(config.hidden_size, config.layer_norm_eps)
596
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
597
  self.config = config
598
 
 
603
  return hidden_states
604
 
605
 
606
+ class ProSSTXLayer(nn.Module):
607
  def __init__(self, config):
608
  super().__init__()
609
  self.config = config
610
+ self.attention = ProSSTXAttention(config)
611
+ self.intermediate = ProSSTXIntermediate(config)
612
+ self.output = ProSSTXOutput(config)
613
 
614
  def forward(
615
  self,
 
638
  return layer_output
639
 
640
 
641
+ class ProSSTXEncoder(nn.Module):
642
  """Modified BertEncoder with relative position bias support"""
643
 
644
  def __init__(self, config):
645
  super().__init__()
646
  self.layer = nn.ModuleList(
647
+ [ProSSTXLayer(config) for _ in range(config.num_hidden_layers)]
648
  )
649
  self.relative_attention = config.relative_attention
650
  if self.relative_attention:
 
709
  )
710
 
711
 
712
+ class ProSSTXEmbeddings(nn.Module):
713
  """Construct the embeddings from word, position and token_type embeddings."""
714
 
715
  def __init__(self, config):
 
720
  self.word_embeddings = nn.Embedding(
721
  config.vocab_size, self.embedding_size, padding_idx=self.pad_token_id
722
  )
723
+ self.LayerNorm = ProSSTXLayerNorm(config.hidden_size, config.layer_norm_eps)
724
 
725
  # 绝对位置编码
726
  self.position_biased_input = config.position_biased_input
 
742
  # SS embeddings
743
  if config.ss_vocab_size > 0:
744
  self.ss_embeddings = nn.Embedding(config.ss_vocab_size, self.embedding_size)
745
+ self.ss_layer_norm = ProSSTXLayerNorm(
746
  config.hidden_size, config.layer_norm_eps
747
  )
748
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
 
812
  return embeddings, None
813
 
814
 
815
+ class ProSSTXPreTrainedModel(PreTrainedModel):
816
  """
817
  An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
818
  models.
819
  """
820
 
821
+ config_class = ProSSTXConfig
822
+ base_model_prefix = "ProSSTX"
823
  _keys_to_ignore_on_load_unexpected = ["position_embeddings"]
824
  supports_gradient_checkpointing = True
825
 
 
837
  module.weight.data[module.padding_idx].zero_()
838
 
839
  def _set_gradient_checkpointing(self, module, value=False):
840
+ if isinstance(module, ProSSTXEncoder):
841
  module.gradient_checkpointing = value
842
 
843
 
844
+ class ProSSTXModel(ProSSTXPreTrainedModel):
845
  def __init__(self, config):
846
  super().__init__(config)
847
  self.config = config
848
+ self.embeddings = ProSSTXEmbeddings(config)
849
+ self.encoder = ProSSTXEncoder(config)
850
  self.post_init()
851
 
852
  def forward(
 
882
  )
883
 
884
 
885
+ class ProSSTXPredictionHeadTransform(nn.Module):
886
  def __init__(self, config):
887
  super().__init__()
888
  self.embedding_size = getattr(config, "embedding_size", config.hidden_size)
 
900
  return hidden_states
901
 
902
 
903
+ class ProSSTXLMPredictionHead(nn.Module):
904
  def __init__(self, config):
905
  super().__init__()
906
  self.config = config
907
+ self.transform = ProSSTXPredictionHeadTransform(config)
908
  self.embedding_size = config.hidden_size
909
  self.decoder = nn.Linear(self.embedding_size, config.vocab_size, bias=False)
910
 
 
914
  return hidden_states
915
 
916
 
917
+ class ProSSTXOnlyMLMHead(nn.Module):
918
  def __init__(self, config):
919
  super().__init__()
920
+ self.predictions = ProSSTXLMPredictionHead(config)
921
 
922
  def forward(self, sequence_output):
923
  prediction_scores = self.predictions(sequence_output)
924
  return prediction_scores
925
 
926
 
927
+ class ProSSTXPreTrainedModel(PreTrainedModel):
928
  """
929
  An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
930
  models.
931
  """
932
 
933
+ config_class = ProSSTXConfig
934
+ base_model_prefix = "ProSSTX"
935
  _keys_to_ignore_on_load_unexpected = ["position_embeddings"]
936
  supports_gradient_checkpointing = True
937
 
 
949
  module.weight.data[module.padding_idx].zero_()
950
 
951
  def _set_gradient_checkpointing(self, module, value=False):
952
+ if isinstance(module, ProSSTXEncoder):
953
  module.gradient_checkpointing = value
954
 
955
 
956
+ class ProSSTXForMaskedLM(ProSSTXPreTrainedModel):
957
  _tied_weights_keys = [
958
  "cls.predictions.decoder.weight",
959
  "cls.predictions.decoder.bias",
 
961
 
962
  def __init__(self, config):
963
  super().__init__(config)
964
+ self.prosst = ProSSTXModel(config)
965
+ self.cls = ProSSTXOnlyMLMHead(config)
966
  self.post_init()
967
 
968
  def forward(
 
1005
  )
1006
 
1007
 
1008
+ class ProSSTXForSequenceClassification(ProSSTXPreTrainedModel):
1009
  def __init__(self, config):
1010
  super().__init__(config)
1011
 
1012
  num_labels = getattr(config, "num_labels", 2)
1013
  self.num_labels = num_labels
1014
  self.scale_hidden = getattr(config, "scale_hidden", 1)
1015
+ self.prosst = ProSSTXModel(config)
1016
  self.pooler = ContextPooler(config)
1017
  output_dim = self.pooler.output_dim * self.scale_hidden
1018
 
 
1125
  )
1126
 
1127
 
1128
+ class ProSSTXForTokenClassification(ProSSTXPreTrainedModel):
1129
  def __init__(self, config):
1130
  super().__init__(config)
1131
  self.num_labels = config.num_labels
1132
 
1133
+ self.prosst = ProSSTXModel(config)
1134
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
1135
  self.classifier = nn.Linear(config.hidden_size, config.num_labels)
1136
 
 
1190
  )
1191
 
1192
 
1193
+ ProSSTXModel.register_for_auto_class("AutoModel")
1194
+ ProSSTXForMaskedLM.register_for_auto_class("AutoModelForMaskedLM")
1195
+ ProSSTXForSequenceClassification.register_for_auto_class(
1196
  "AutoModelForSequenceClassification"
1197
  )
1198
+ ProSSTXForTokenClassification.register_for_auto_class("AutoModelForTokenClassification")