noahjax commited on
Commit
fb5cdfe
·
verified ·
1 Parent(s): 16849dc

Update spaCy pipeline

Browse files
config.cfg CHANGED
@@ -15,7 +15,7 @@ seed = 0
15
 
16
  [nlp]
17
  lang = "en"
18
- pipeline = ["tok2vec","ner","textcat_classify"]
19
  batch_size = 1000
20
  disabled = []
21
  before_creation = null
@@ -26,6 +26,18 @@ vectors = {"@vectors":"spacy.Vectors.v1"}
26
 
27
  [components]
28
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  [components.ner]
30
  factory = "ner"
31
  incorrect_spans_key = null
@@ -47,6 +59,45 @@ nO = null
47
  width = 256
48
  upstream = "*"
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  [components.textcat_classify]
51
  factory = "weighted_textcat"
52
  class_weights = [0.67,0.33]
@@ -102,6 +153,26 @@ window_size = 1
102
  maxout_pieces = 3
103
  depth = 8
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  [corpora]
106
 
107
  [corpora.dev]
@@ -189,14 +260,22 @@ eps = 0.00000001
189
  learn_rate = 0.001
190
 
191
  [training.score_weights]
192
- ents_f = 0.5
 
 
 
 
 
 
 
 
193
  ents_p = 0.0
194
  ents_r = 0.0
195
  ents_per_type = null
196
- cats_score = 0.25
197
  cats_score_desc = null
198
  cats_micro_p = null
199
- cats_micro_r = 0.25
200
  cats_micro_f = null
201
  cats_macro_p = null
202
  cats_macro_r = null
 
15
 
16
  [nlp]
17
  lang = "en"
18
+ pipeline = ["tok2vec_small","tagger","parser","attribute_ruler","lemmatizer","tok2vec","ner","textcat_classify"]
19
  batch_size = 1000
20
  disabled = []
21
  before_creation = null
 
26
 
27
  [components]
28
 
29
+ [components.attribute_ruler]
30
+ factory = "attribute_ruler"
31
+ scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
32
+ validate = false
33
+
34
+ [components.lemmatizer]
35
+ factory = "lemmatizer"
36
+ mode = "rule"
37
+ model = null
38
+ overwrite = false
39
+ scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
40
+
41
  [components.ner]
42
  factory = "ner"
43
  incorrect_spans_key = null
 
59
  width = 256
60
  upstream = "*"
61
 
62
+ [components.parser]
63
+ factory = "parser"
64
+ learn_tokens = false
65
+ min_action_freq = 30
66
+ moves = null
67
+ scorer = {"@scorers":"spacy.parser_scorer.v1"}
68
+ update_with_oracle_cut_size = 100
69
+
70
+ [components.parser.model]
71
+ @architectures = "spacy.TransitionBasedParser.v2"
72
+ state_type = "parser"
73
+ extra_state_tokens = false
74
+ hidden_width = 64
75
+ maxout_pieces = 2
76
+ use_upper = true
77
+ nO = null
78
+
79
+ [components.parser.model.tok2vec]
80
+ @architectures = "spacy.Tok2VecListener.v1"
81
+ width = 96
82
+ upstream = "tok2vec"
83
+
84
+ [components.tagger]
85
+ factory = "tagger"
86
+ label_smoothing = 0.0
87
+ neg_prefix = "!"
88
+ overwrite = false
89
+ scorer = {"@scorers":"spacy.tagger_scorer.v1"}
90
+
91
+ [components.tagger.model]
92
+ @architectures = "spacy.Tagger.v2"
93
+ nO = null
94
+ normalize = false
95
+
96
+ [components.tagger.model.tok2vec]
97
+ @architectures = "spacy.Tok2VecListener.v1"
98
+ width = 96
99
+ upstream = "tok2vec"
100
+
101
  [components.textcat_classify]
102
  factory = "weighted_textcat"
103
  class_weights = [0.67,0.33]
 
153
  maxout_pieces = 3
154
  depth = 8
155
 
156
+ [components.tok2vec_small]
157
+ factory = "tok2vec"
158
+
159
+ [components.tok2vec_small.model]
160
+ @architectures = "spacy.Tok2Vec.v2"
161
+
162
+ [components.tok2vec_small.model.embed]
163
+ @architectures = "spacy.MultiHashEmbed.v2"
164
+ width = 96
165
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY","IS_SPACE"]
166
+ rows = [5000,1000,2500,2500,50,50]
167
+ include_static_vectors = false
168
+
169
+ [components.tok2vec_small.model.encode]
170
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
171
+ width = 96
172
+ depth = 4
173
+ window_size = 1
174
+ maxout_pieces = 3
175
+
176
  [corpora]
177
 
178
  [corpora.dev]
 
260
  learn_rate = 0.001
261
 
262
  [training.score_weights]
263
+ tag_acc = 0.25
264
+ dep_uas = 0.12
265
+ dep_las = 0.12
266
+ dep_las_per_type = null
267
+ sents_p = null
268
+ sents_r = null
269
+ sents_f = 0.0
270
+ lemma_acc = 0.25
271
+ ents_f = 0.12
272
  ents_p = 0.0
273
  ents_r = 0.0
274
  ents_per_type = null
275
+ cats_score = 0.06
276
  cats_score_desc = null
277
  cats_micro_p = null
278
+ cats_micro_r = 0.06
279
  cats_micro_f = null
280
  cats_macro_p = null
281
  cats_macro_r = null
en_tako_query_analyzer-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:758f2f483a1f44bf0ff426f5c5e2abf5867e859261672a51f8759e97ca667a31
3
- size 619535137
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d276080166d535d192f52fef9138c4678a32445a14b0c27f9a43c974be5c3aca
3
+ size 619963181
lemmatizer/lookups/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb64f40c0f8396d1762730c0ddf4dad2a52d138f5a389f71a1a1d088173b7737
3
+ size 972893
meta.json CHANGED
@@ -120,6 +120,9 @@
120
  ],
121
  "attribute_ruler":[
122
 
 
 
 
123
  ],
124
  "tok2vec":[
125
 
@@ -155,6 +158,7 @@
155
  "tagger",
156
  "parser",
157
  "attribute_ruler",
 
158
  "tok2vec",
159
  "ner",
160
  "textcat_classify"
@@ -164,6 +168,7 @@
164
  "tagger",
165
  "parser",
166
  "attribute_ruler",
 
167
  "tok2vec",
168
  "ner",
169
  "textcat_classify"
 
120
  ],
121
  "attribute_ruler":[
122
 
123
+ ],
124
+ "lemmatizer":[
125
+
126
  ],
127
  "tok2vec":[
128
 
 
158
  "tagger",
159
  "parser",
160
  "attribute_ruler",
161
+ "lemmatizer",
162
  "tok2vec",
163
  "ner",
164
  "textcat_classify"
 
168
  "tagger",
169
  "parser",
170
  "attribute_ruler",
171
+ "lemmatizer",
172
  "tok2vec",
173
  "ner",
174
  "textcat_classify"