Update spaCy pipeline
Browse files- README.md +1 -1
- custom_functions.py +178 -1
- en_engagement_spl_RoBERTa_acad_max1_do02-any-py3-none-any.whl +2 -2
- meta.json +3 -3
README.md
CHANGED
@@ -59,7 +59,7 @@ model-index:
|
|
59 |
| Feature | Description |
|
60 |
| --- | --- |
|
61 |
| **Name** | `en_engagement_spl_RoBERTa_acad_max1_do02` |
|
62 |
-
| **Version** | `0.2.
|
63 |
| **spaCy** | `>=3.3.0,<3.4.0` |
|
64 |
| **Default Pipeline** | `transformer`, `tagger`, `parser`, `ner`, `trainable_transformer`, `span_finder`, `spancat` |
|
65 |
| **Components** | `transformer`, `tagger`, `parser`, `ner`, `trainable_transformer`, `span_finder`, `spancat` |
|
|
|
59 |
| Feature | Description |
|
60 |
| --- | --- |
|
61 |
| **Name** | `en_engagement_spl_RoBERTa_acad_max1_do02` |
|
62 |
+
| **Version** | `0.2.6.1130` |
|
63 |
| **spaCy** | `>=3.3.0,<3.4.0` |
|
64 |
| **Default Pipeline** | `transformer`, `tagger`, `parser`, `ner`, `trainable_transformer`, `span_finder`, `spancat` |
|
65 |
| **Components** | `transformer`, `tagger`, `parser`, `ner`, `trainable_transformer`, `span_finder`, `spancat` |
|
custom_functions.py
CHANGED
@@ -10,4 +10,181 @@ from spacy.tokens import DocBin, Doc
|
|
10 |
|
11 |
# make the config work
|
12 |
# from scripts.rel_model import create_relation_model, create_classification_layer, create_instances, create_tensors
|
13 |
-
from scripts.custom_comps.SpanCat_extention import build_mean_max_reducer1, build_mean_max_reducer2, build_mean_max_reducer3, build_mean_max_reducer4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# make the config work
|
12 |
# from scripts.rel_model import create_relation_model, create_classification_layer, create_instances, create_tensors
|
13 |
+
# from scripts.custom_comps.SpanCat_extention import build_mean_max_reducer1, build_mean_max_reducer2, build_mean_max_reducer3, build_mean_max_reducer4
|
14 |
+
|
15 |
+
from typing import List, Tuple, cast
|
16 |
+
from thinc.api import Model, with_getitem, chain, list2ragged, Logistic
|
17 |
+
from thinc.api import Maxout, Linear, concatenate, glorot_uniform_init, PyTorchLSTM
|
18 |
+
from thinc.api import reduce_mean, reduce_max, reduce_first, reduce_last
|
19 |
+
from thinc.types import Ragged, Floats2d
|
20 |
+
|
21 |
+
from spacy.util import registry
|
22 |
+
from spacy.tokens import Doc
|
23 |
+
from spacy.ml.extract_spans import extract_spans
|
24 |
+
|
25 |
+
# @registry.layers("spacy.LinearLogistic.v1")
|
26 |
+
# def build_linear_logistic(nO=None, nI=None) -> Model[Floats2d, Floats2d]:
|
27 |
+
# """An output layer for multi-label classification. It uses a linear layer
|
28 |
+
# followed by a logistic activation.
|
29 |
+
# """
|
30 |
+
# return chain(Linear(nO=nO, nI=nI, init_W=glorot_uniform_init), Logistic())
|
31 |
+
|
32 |
+
|
33 |
+
@registry.layers("mean_max_reducer.v1.5")
|
34 |
+
def build_mean_max_reducer1(hidden_size: int,
|
35 |
+
dropout: float = 0.0) -> Model[Ragged, Floats2d]:
|
36 |
+
"""Reduce sequences by concatenating their mean and max pooled vectors,
|
37 |
+
and then combine the concatenated vectors with a hidden layer.
|
38 |
+
"""
|
39 |
+
return chain(
|
40 |
+
concatenate(
|
41 |
+
cast(Model[Ragged, Floats2d], reduce_last()),
|
42 |
+
cast(Model[Ragged, Floats2d], reduce_first()),
|
43 |
+
reduce_mean(),
|
44 |
+
reduce_max(),
|
45 |
+
),
|
46 |
+
Maxout(nO=hidden_size, normalize=True, dropout=dropout),
|
47 |
+
)
|
48 |
+
|
49 |
+
|
50 |
+
@registry.layers("mean_max_reducer.v2")
|
51 |
+
def build_mean_max_reducer2(hidden_size: int,
|
52 |
+
dropout: float = 0.0) -> Model[Ragged, Floats2d]:
|
53 |
+
"""Reduce sequences by concatenating their mean and max pooled vectors,
|
54 |
+
and then combine the concatenated vectors with a hidden layer.
|
55 |
+
"""
|
56 |
+
return chain(
|
57 |
+
concatenate(
|
58 |
+
cast(Model[Ragged, Floats2d], reduce_last()),
|
59 |
+
cast(Model[Ragged, Floats2d], reduce_first()),
|
60 |
+
reduce_mean(),
|
61 |
+
reduce_max(),
|
62 |
+
), Maxout(nO=hidden_size, normalize=True, dropout=dropout),
|
63 |
+
Maxout(nO=hidden_size, normalize=True, dropout=dropout))
|
64 |
+
|
65 |
+
|
66 |
+
# @registry.layers("mean_max_reducer.v2")
|
67 |
+
# def build_mean_max_reducer2(hidden_size: int,
|
68 |
+
# depth: int) -> Model[Ragged, Floats2d]:
|
69 |
+
# """Reduce sequences by concatenating their mean and max pooled vectors,
|
70 |
+
# and then combine the concatenated vectors with a hidden layer.
|
71 |
+
# """
|
72 |
+
# return chain(
|
73 |
+
# concatenate(
|
74 |
+
# cast(Model[Ragged, Floats2d], reduce_last()),
|
75 |
+
# cast(Model[Ragged, Floats2d], reduce_first()),
|
76 |
+
# reduce_mean(),
|
77 |
+
# reduce_max(),
|
78 |
+
# ), Maxout(nO=hidden_size, normalize=True, dropout=0.0),
|
79 |
+
# PyTorchLSTM(nO=64, nI=hidden_size, bi=True, depth=depth, dropout=0.2))
|
80 |
+
|
81 |
+
|
82 |
+
@registry.layers("mean_max_reducer.v3")
|
83 |
+
def build_mean_max_reducer3(hidden_size: int,
|
84 |
+
maxout_pieces: int = 3,
|
85 |
+
dropout: float = 0.0) -> Model[Ragged, Floats2d]:
|
86 |
+
"""Reduce sequences by concatenating their mean and max pooled vectors,
|
87 |
+
and then combine the concatenated vectors with a hidden layer.
|
88 |
+
"""
|
89 |
+
hidden_size2 = int(hidden_size / 2)
|
90 |
+
hidden_size3 = int(hidden_size / 2)
|
91 |
+
return chain(
|
92 |
+
concatenate(
|
93 |
+
cast(Model[Ragged, Floats2d], reduce_last()),
|
94 |
+
cast(Model[Ragged, Floats2d], reduce_first()),
|
95 |
+
reduce_mean(),
|
96 |
+
reduce_max(),
|
97 |
+
),
|
98 |
+
Maxout(nO=hidden_size,
|
99 |
+
nP=maxout_pieces,
|
100 |
+
normalize=True,
|
101 |
+
dropout=dropout),
|
102 |
+
Maxout(nO=hidden_size2,
|
103 |
+
nP=maxout_pieces,
|
104 |
+
normalize=True,
|
105 |
+
dropout=dropout),
|
106 |
+
Maxout(nO=hidden_size3,
|
107 |
+
nP=maxout_pieces,
|
108 |
+
normalize=True,
|
109 |
+
dropout=dropout))
|
110 |
+
|
111 |
+
|
112 |
+
@registry.layers("mean_max_reducer.v3.3")
|
113 |
+
def build_mean_max_reducer4(hidden_size: int,
|
114 |
+
depth: int) -> Model[Ragged, Floats2d]:
|
115 |
+
"""Reduce sequences by concatenating their mean and max pooled vectors,
|
116 |
+
and then combine the concatenated vectors with a hidden layer.
|
117 |
+
"""
|
118 |
+
hidden_size2 = int(hidden_size / 2)
|
119 |
+
hidden_size3 = int(hidden_size / 2)
|
120 |
+
return chain(
|
121 |
+
concatenate(
|
122 |
+
cast(Model[Ragged, Floats2d], reduce_last()),
|
123 |
+
cast(Model[Ragged, Floats2d], reduce_first()),
|
124 |
+
reduce_mean(),
|
125 |
+
reduce_max(),
|
126 |
+
), Maxout(nO=hidden_size, nP=3, normalize=True, dropout=0.0),
|
127 |
+
Maxout(nO=hidden_size2, nP=3, normalize=True, dropout=0.0),
|
128 |
+
Maxout(nO=hidden_size3, nP=3, normalize=True, dropout=0.0))
|
129 |
+
|
130 |
+
|
131 |
+
@registry.architectures("CustomSpanCategorizer.v2")
|
132 |
+
def build_spancat_model(
|
133 |
+
tok2vec: Model[List[Doc], List[Floats2d]],
|
134 |
+
reducer: Model[Ragged, Floats2d],
|
135 |
+
scorer: Model[Floats2d, Floats2d],
|
136 |
+
) -> Model[Tuple[List[Doc], Ragged], Floats2d]:
|
137 |
+
"""Build a span categorizer model, given a token-to-vector model, a
|
138 |
+
reducer model to map the sequence of vectors for each span down to a single
|
139 |
+
vector, and a scorer model to map the vectors to probabilities.
|
140 |
+
tok2vec (Model[List[Doc], List[Floats2d]]): The tok2vec model.
|
141 |
+
reducer (Model[Ragged, Floats2d]): The reducer model.
|
142 |
+
scorer (Model[Floats2d, Floats2d]): The scorer model.
|
143 |
+
"""
|
144 |
+
model = chain(
|
145 |
+
cast(
|
146 |
+
Model[Tuple[List[Doc], Ragged], Tuple[Ragged, Ragged]],
|
147 |
+
with_getitem(
|
148 |
+
0,
|
149 |
+
chain(tok2vec,
|
150 |
+
cast(Model[List[Floats2d], Ragged], list2ragged()))),
|
151 |
+
),
|
152 |
+
extract_spans(),
|
153 |
+
reducer,
|
154 |
+
scorer,
|
155 |
+
)
|
156 |
+
model.set_ref("tok2vec", tok2vec)
|
157 |
+
model.set_ref("reducer", reducer)
|
158 |
+
model.set_ref("scorer", scorer)
|
159 |
+
return model
|
160 |
+
|
161 |
+
|
162 |
+
# @registry.architectures("spacy.SpanCategorizer.v1")
|
163 |
+
# def build_spancat_model(
|
164 |
+
# tok2vec: Model[List[Doc], List[Floats2d]],
|
165 |
+
# reducer: Model[Ragged, Floats2d],
|
166 |
+
# scorer: Model[Floats2d, Floats2d],
|
167 |
+
# ) -> Model[Tuple[List[Doc], Ragged], Floats2d]:
|
168 |
+
# """Build a span categorizer model, given a token-to-vector model, a
|
169 |
+
# reducer model to map the sequence of vectors for each span down to a single
|
170 |
+
# vector, and a scorer model to map the vectors to probabilities.
|
171 |
+
# tok2vec (Model[List[Doc], List[Floats2d]]): The tok2vec model.
|
172 |
+
# reducer (Model[Ragged, Floats2d]): The reducer model.
|
173 |
+
# scorer (Model[Floats2d, Floats2d]): The scorer model.
|
174 |
+
# """
|
175 |
+
# model = chain(
|
176 |
+
# cast(
|
177 |
+
# Model[Tuple[List[Doc], Ragged], Tuple[Ragged, Ragged]],
|
178 |
+
# with_getitem(
|
179 |
+
# 0,
|
180 |
+
# chain(tok2vec,
|
181 |
+
# cast(Model[List[Floats2d], Ragged], list2ragged()))),
|
182 |
+
# ),
|
183 |
+
# extract_spans(),
|
184 |
+
# reducer,
|
185 |
+
# scorer,
|
186 |
+
# )
|
187 |
+
# model.set_ref("tok2vec", tok2vec)
|
188 |
+
# model.set_ref("reducer", reducer)
|
189 |
+
# model.set_ref("scorer", scorer)
|
190 |
+
# return model
|
en_engagement_spl_RoBERTa_acad_max1_do02-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4970ef292b571df7416f968d47a309e218448a3f35d4cd3eb9852542bd98da5
|
3 |
+
size 928614883
|
meta.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"lang":"en",
|
3 |
"name":"engagement_spl_RoBERTa_acad_max1_do02",
|
4 |
-
"version":"0.2.
|
5 |
"description":"",
|
6 |
"author":"",
|
7 |
"email":"",
|
@@ -330,7 +330,7 @@
|
|
330 |
"spancat_loss":359.1590412855
|
331 |
},
|
332 |
"requirements":[
|
333 |
-
"spacy-
|
334 |
-
"spacy-
|
335 |
]
|
336 |
}
|
|
|
1 |
{
|
2 |
"lang":"en",
|
3 |
"name":"engagement_spl_RoBERTa_acad_max1_do02",
|
4 |
+
"version":"0.2.6.1130",
|
5 |
"description":"",
|
6 |
"author":"",
|
7 |
"email":"",
|
|
|
330 |
"spancat_loss":359.1590412855
|
331 |
},
|
332 |
"requirements":[
|
333 |
+
"spacy-transformers>=1.1.7,<1.2.0",
|
334 |
+
"spacy-experimental>=0.6.1,<0.7.0"
|
335 |
]
|
336 |
}
|