Upload model
Browse files- config.json +0 -1
- configuration_multiheadcrf.py +0 -2
- model.safetensors +2 -2
- modeling_multiheadcrf.py +19 -11
config.json
CHANGED
@@ -48,7 +48,6 @@
|
|
48 |
"num_attention_heads": 12,
|
49 |
"num_hidden_layers": 12,
|
50 |
"number_of_layer_per_head": 3,
|
51 |
-
"p_augmentation": 0.5,
|
52 |
"pad_token_id": 1,
|
53 |
"percentage_tags": 0.5,
|
54 |
"position_embedding_type": "absolute",
|
|
|
48 |
"num_attention_heads": 12,
|
49 |
"num_hidden_layers": 12,
|
50 |
"number_of_layer_per_head": 3,
|
|
|
51 |
"pad_token_id": 1,
|
52 |
"percentage_tags": 0.5,
|
53 |
"position_embedding_type": "absolute",
|
configuration_multiheadcrf.py
CHANGED
@@ -13,7 +13,6 @@ class MultiHeadCRFConfig(PretrainedConfig):
|
|
13 |
augmentation = "random",
|
14 |
context_size = 64,
|
15 |
percentage_tags = 0.2,
|
16 |
-
p_augmentation = 0.5,
|
17 |
aug_prob = 0.5,
|
18 |
crf_reduction = "mean",
|
19 |
freeze = False,
|
@@ -26,7 +25,6 @@ class MultiHeadCRFConfig(PretrainedConfig):
|
|
26 |
self.augmentation = augmentation
|
27 |
self.context_size = context_size
|
28 |
self.percentage_tags = percentage_tags
|
29 |
-
self.p_augmentation = p_augmentation
|
30 |
self.aug_prob = aug_prob,
|
31 |
self.crf_reduction = crf_reduction
|
32 |
self.freeze=freeze
|
|
|
13 |
augmentation = "random",
|
14 |
context_size = 64,
|
15 |
percentage_tags = 0.2,
|
|
|
16 |
aug_prob = 0.5,
|
17 |
crf_reduction = "mean",
|
18 |
freeze = False,
|
|
|
25 |
self.augmentation = augmentation
|
26 |
self.context_size = context_size
|
27 |
self.percentage_tags = percentage_tags
|
|
|
28 |
self.aug_prob = aug_prob,
|
29 |
self.crf_reduction = crf_reduction
|
30 |
self.freeze=freeze
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec92277ec8a66dfcda1e5ea05184122eb9a767bc37367848e0525247a777a04f
|
3 |
+
size 531721800
|
modeling_multiheadcrf.py
CHANGED
@@ -12,9 +12,10 @@ NUM_PER_LAYER = 16
|
|
12 |
|
13 |
class RobertaMultiHeadCRFModel(PreTrainedModel):
|
14 |
config_class = MultiHeadCRFConfig
|
15 |
-
|
|
|
16 |
_keys_to_ignore_on_load_unexpected = [r"pooler"]
|
17 |
-
|
18 |
def __init__(self, config):
|
19 |
super().__init__(config)
|
20 |
self.num_labels = config.num_labels
|
@@ -24,7 +25,10 @@ class RobertaMultiHeadCRFModel(PreTrainedModel):
|
|
24 |
self.heads = config.classes #expected an array of classes we are predicting
|
25 |
|
26 |
# this can be BERT ROBERTA and other BERT-variants
|
27 |
-
|
|
|
|
|
|
|
28 |
#AutoModel(config, add_pooling_layer=False)
|
29 |
#AutoModel.from_pretrained(config._name_or_path, config=config, add_pooling_layer=False)
|
30 |
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
@@ -43,20 +47,23 @@ class RobertaMultiHeadCRFModel(PreTrainedModel):
|
|
43 |
self.manage_freezing()
|
44 |
|
45 |
def training_mode(self):
|
|
|
46 |
# for some reason these layers are not being correctly init
|
47 |
# probably related with the lifecycle of the hf .from_pretrained method
|
48 |
-
self.
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
52 |
|
53 |
def manage_freezing(self):
|
54 |
-
for _, param in self.
|
55 |
param.requires_grad = False
|
56 |
|
57 |
num_encoders_to_freeze = self.config.num_frozen_encoder
|
58 |
if num_encoders_to_freeze > 0:
|
59 |
-
for _, param in islice(self.
|
60 |
param.requires_grad = False
|
61 |
|
62 |
|
@@ -75,7 +82,7 @@ class RobertaMultiHeadCRFModel(PreTrainedModel):
|
|
75 |
# Default `model.config.use_return_dict´ is `True´
|
76 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
77 |
|
78 |
-
outputs = self.
|
79 |
attention_mask=attention_mask,
|
80 |
token_type_ids=token_type_ids,
|
81 |
position_ids=position_ids,
|
@@ -119,7 +126,8 @@ class RobertaMultiHeadCRFModel(PreTrainedModel):
|
|
119 |
|
120 |
class BertMultiHeadCRFModel(RobertaMultiHeadCRFModel):
|
121 |
config_class = MultiHeadCRFConfig
|
122 |
-
|
|
|
123 |
_keys_to_ignore_on_load_unexpected = [r"pooler"]
|
124 |
|
125 |
# Taken from https://github.com/kmkurn/pytorch-crf/blob/master/torchcrf/__init__.py and fixed got uint8 warning
|
|
|
12 |
|
13 |
class RobertaMultiHeadCRFModel(PreTrainedModel):
|
14 |
config_class = MultiHeadCRFConfig
|
15 |
+
transformers_backbone_name = "roberta"
|
16 |
+
transformers_backbone_class = RobertaModel
|
17 |
_keys_to_ignore_on_load_unexpected = [r"pooler"]
|
18 |
+
|
19 |
def __init__(self, config):
|
20 |
super().__init__(config)
|
21 |
self.num_labels = config.num_labels
|
|
|
25 |
self.heads = config.classes #expected an array of classes we are predicting
|
26 |
|
27 |
# this can be BERT ROBERTA and other BERT-variants
|
28 |
+
# THIS IS BC HF needs to have "roberta" for roberta models and "bert" for BERT models as var so tha I can load
|
29 |
+
# check https://github.com/huggingface/transformers/blob/b487096b02307cd6e0f132b676cdcc7255fe8e74/src/transformers/models/roberta/modeling_roberta.py#L1170C16-L1170C20
|
30 |
+
setattr(self, self.transformers_backbone_name, self.transformers_backbone_class(config, add_pooling_layer=False))
|
31 |
+
#self.roberta = self.transformer_backbone_class(config, add_pooling_layer=False)
|
32 |
#AutoModel(config, add_pooling_layer=False)
|
33 |
#AutoModel.from_pretrained(config._name_or_path, config=config, add_pooling_layer=False)
|
34 |
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
|
|
47 |
self.manage_freezing()
|
48 |
|
49 |
def training_mode(self):
|
50 |
+
|
51 |
# for some reason these layers are not being correctly init
|
52 |
# probably related with the lifecycle of the hf .from_pretrained method
|
53 |
+
for ent in self.heads:
|
54 |
+
for i in range(self.number_of_layer_per_head):
|
55 |
+
getattr(self, f"{ent}_dense_{i}").reset_parameters()
|
56 |
+
getattr(self, f"{ent}_classifier").reset_parameters()
|
57 |
+
getattr(self, f"{ent}_crf").reset_parameters()
|
58 |
+
getattr(self, f"{ent}_crf").mask_impossible_transitions()
|
59 |
|
60 |
def manage_freezing(self):
|
61 |
+
for _, param in getattr(self, self.transformers_backbone_name).embeddings.named_parameters():
|
62 |
param.requires_grad = False
|
63 |
|
64 |
num_encoders_to_freeze = self.config.num_frozen_encoder
|
65 |
if num_encoders_to_freeze > 0:
|
66 |
+
for _, param in islice(getattr(self, self.transformers_backbone_name).encoder.named_parameters(), num_encoders_to_freeze*NUM_PER_LAYER):
|
67 |
param.requires_grad = False
|
68 |
|
69 |
|
|
|
82 |
# Default `model.config.use_return_dict´ is `True´
|
83 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
84 |
|
85 |
+
outputs = getattr(self, self.transformers_backbone_name)(input_ids,
|
86 |
attention_mask=attention_mask,
|
87 |
token_type_ids=token_type_ids,
|
88 |
position_ids=position_ids,
|
|
|
126 |
|
127 |
class BertMultiHeadCRFModel(RobertaMultiHeadCRFModel):
|
128 |
config_class = MultiHeadCRFConfig
|
129 |
+
transformers_backbone_name = "bert"
|
130 |
+
transformers_backbone_class = BertModel
|
131 |
_keys_to_ignore_on_load_unexpected = [r"pooler"]
|
132 |
|
133 |
# Taken from https://github.com/kmkurn/pytorch-crf/blob/master/torchcrf/__init__.py and fixed got uint8 warning
|