Update spaCy pipeline
Browse files- config.cfg +83 -4
- en_tako_query_analyzer-any-py3-none-any.whl +2 -2
- lemmatizer/lookups/lookups.bin +3 -0
- meta.json +5 -0
config.cfg
CHANGED
@@ -15,7 +15,7 @@ seed = 0
|
|
15 |
|
16 |
[nlp]
|
17 |
lang = "en"
|
18 |
-
pipeline = ["tok2vec","ner","textcat_classify"]
|
19 |
batch_size = 1000
|
20 |
disabled = []
|
21 |
before_creation = null
|
@@ -26,6 +26,18 @@ vectors = {"@vectors":"spacy.Vectors.v1"}
|
|
26 |
|
27 |
[components]
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
[components.ner]
|
30 |
factory = "ner"
|
31 |
incorrect_spans_key = null
|
@@ -47,6 +59,45 @@ nO = null
|
|
47 |
width = 256
|
48 |
upstream = "*"
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
[components.textcat_classify]
|
51 |
factory = "weighted_textcat"
|
52 |
class_weights = [0.67,0.33]
|
@@ -102,6 +153,26 @@ window_size = 1
|
|
102 |
maxout_pieces = 3
|
103 |
depth = 8
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
[corpora]
|
106 |
|
107 |
[corpora.dev]
|
@@ -189,14 +260,22 @@ eps = 0.00000001
|
|
189 |
learn_rate = 0.001
|
190 |
|
191 |
[training.score_weights]
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
ents_p = 0.0
|
194 |
ents_r = 0.0
|
195 |
ents_per_type = null
|
196 |
-
cats_score = 0.
|
197 |
cats_score_desc = null
|
198 |
cats_micro_p = null
|
199 |
-
cats_micro_r = 0.
|
200 |
cats_micro_f = null
|
201 |
cats_macro_p = null
|
202 |
cats_macro_r = null
|
|
|
15 |
|
16 |
[nlp]
|
17 |
lang = "en"
|
18 |
+
pipeline = ["tok2vec_small","tagger","parser","attribute_ruler","lemmatizer","tok2vec","ner","textcat_classify"]
|
19 |
batch_size = 1000
|
20 |
disabled = []
|
21 |
before_creation = null
|
|
|
26 |
|
27 |
[components]
|
28 |
|
29 |
+
[components.attribute_ruler]
|
30 |
+
factory = "attribute_ruler"
|
31 |
+
scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
|
32 |
+
validate = false
|
33 |
+
|
34 |
+
[components.lemmatizer]
|
35 |
+
factory = "lemmatizer"
|
36 |
+
mode = "rule"
|
37 |
+
model = null
|
38 |
+
overwrite = false
|
39 |
+
scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
|
40 |
+
|
41 |
[components.ner]
|
42 |
factory = "ner"
|
43 |
incorrect_spans_key = null
|
|
|
59 |
width = 256
|
60 |
upstream = "*"
|
61 |
|
62 |
+
[components.parser]
|
63 |
+
factory = "parser"
|
64 |
+
learn_tokens = false
|
65 |
+
min_action_freq = 30
|
66 |
+
moves = null
|
67 |
+
scorer = {"@scorers":"spacy.parser_scorer.v1"}
|
68 |
+
update_with_oracle_cut_size = 100
|
69 |
+
|
70 |
+
[components.parser.model]
|
71 |
+
@architectures = "spacy.TransitionBasedParser.v2"
|
72 |
+
state_type = "parser"
|
73 |
+
extra_state_tokens = false
|
74 |
+
hidden_width = 64
|
75 |
+
maxout_pieces = 2
|
76 |
+
use_upper = true
|
77 |
+
nO = null
|
78 |
+
|
79 |
+
[components.parser.model.tok2vec]
|
80 |
+
@architectures = "spacy.Tok2VecListener.v1"
|
81 |
+
width = 96
|
82 |
+
upstream = "tok2vec"
|
83 |
+
|
84 |
+
[components.tagger]
|
85 |
+
factory = "tagger"
|
86 |
+
label_smoothing = 0.0
|
87 |
+
neg_prefix = "!"
|
88 |
+
overwrite = false
|
89 |
+
scorer = {"@scorers":"spacy.tagger_scorer.v1"}
|
90 |
+
|
91 |
+
[components.tagger.model]
|
92 |
+
@architectures = "spacy.Tagger.v2"
|
93 |
+
nO = null
|
94 |
+
normalize = false
|
95 |
+
|
96 |
+
[components.tagger.model.tok2vec]
|
97 |
+
@architectures = "spacy.Tok2VecListener.v1"
|
98 |
+
width = 96
|
99 |
+
upstream = "tok2vec"
|
100 |
+
|
101 |
[components.textcat_classify]
|
102 |
factory = "weighted_textcat"
|
103 |
class_weights = [0.67,0.33]
|
|
|
153 |
maxout_pieces = 3
|
154 |
depth = 8
|
155 |
|
156 |
+
[components.tok2vec_small]
|
157 |
+
factory = "tok2vec"
|
158 |
+
|
159 |
+
[components.tok2vec_small.model]
|
160 |
+
@architectures = "spacy.Tok2Vec.v2"
|
161 |
+
|
162 |
+
[components.tok2vec_small.model.embed]
|
163 |
+
@architectures = "spacy.MultiHashEmbed.v2"
|
164 |
+
width = 96
|
165 |
+
attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY","IS_SPACE"]
|
166 |
+
rows = [5000,1000,2500,2500,50,50]
|
167 |
+
include_static_vectors = false
|
168 |
+
|
169 |
+
[components.tok2vec_small.model.encode]
|
170 |
+
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
171 |
+
width = 96
|
172 |
+
depth = 4
|
173 |
+
window_size = 1
|
174 |
+
maxout_pieces = 3
|
175 |
+
|
176 |
[corpora]
|
177 |
|
178 |
[corpora.dev]
|
|
|
260 |
learn_rate = 0.001
|
261 |
|
262 |
[training.score_weights]
|
263 |
+
tag_acc = 0.25
|
264 |
+
dep_uas = 0.12
|
265 |
+
dep_las = 0.12
|
266 |
+
dep_las_per_type = null
|
267 |
+
sents_p = null
|
268 |
+
sents_r = null
|
269 |
+
sents_f = 0.0
|
270 |
+
lemma_acc = 0.25
|
271 |
+
ents_f = 0.12
|
272 |
ents_p = 0.0
|
273 |
ents_r = 0.0
|
274 |
ents_per_type = null
|
275 |
+
cats_score = 0.06
|
276 |
cats_score_desc = null
|
277 |
cats_micro_p = null
|
278 |
+
cats_micro_r = 0.06
|
279 |
cats_micro_f = null
|
280 |
cats_macro_p = null
|
281 |
cats_macro_r = null
|
en_tako_query_analyzer-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d276080166d535d192f52fef9138c4678a32445a14b0c27f9a43c974be5c3aca
|
3 |
+
size 619963181
|
lemmatizer/lookups/lookups.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb64f40c0f8396d1762730c0ddf4dad2a52d138f5a389f71a1a1d088173b7737
|
3 |
+
size 972893
|
meta.json
CHANGED
@@ -120,6 +120,9 @@
|
|
120 |
],
|
121 |
"attribute_ruler":[
|
122 |
|
|
|
|
|
|
|
123 |
],
|
124 |
"tok2vec":[
|
125 |
|
@@ -155,6 +158,7 @@
|
|
155 |
"tagger",
|
156 |
"parser",
|
157 |
"attribute_ruler",
|
|
|
158 |
"tok2vec",
|
159 |
"ner",
|
160 |
"textcat_classify"
|
@@ -164,6 +168,7 @@
|
|
164 |
"tagger",
|
165 |
"parser",
|
166 |
"attribute_ruler",
|
|
|
167 |
"tok2vec",
|
168 |
"ner",
|
169 |
"textcat_classify"
|
|
|
120 |
],
|
121 |
"attribute_ruler":[
|
122 |
|
123 |
+
],
|
124 |
+
"lemmatizer":[
|
125 |
+
|
126 |
],
|
127 |
"tok2vec":[
|
128 |
|
|
|
158 |
"tagger",
|
159 |
"parser",
|
160 |
"attribute_ruler",
|
161 |
+
"lemmatizer",
|
162 |
"tok2vec",
|
163 |
"ner",
|
164 |
"textcat_classify"
|
|
|
168 |
"tagger",
|
169 |
"parser",
|
170 |
"attribute_ruler",
|
171 |
+
"lemmatizer",
|
172 |
"tok2vec",
|
173 |
"ner",
|
174 |
"textcat_classify"
|