Tanor commited on
Commit
75e0538
·
verified ·
1 Parent(s): d3b3708

deleted label_smoothing

Browse files
Files changed (1) hide show
  1. config.cfg +206 -207
config.cfg CHANGED
@@ -1,208 +1,207 @@
1
- [paths]
2
- train = "02_train"
3
- dev = "03_valid"
4
- vectors = null
5
- init_tok2vec = null
6
-
7
- [system]
8
- gpu_allocator = null
9
- seed = 0
10
-
11
- [nlp]
12
- lang = "sr"
13
- pipeline = ["tok2vec","tagger","ner","sentencizer","entity_linker"]
14
- batch_size = 1000
15
- disabled = []
16
- before_creation = null
17
- after_creation = null
18
- after_pipeline_creation = null
19
- tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
-
21
- [components]
22
-
23
- [components.entity_linker]
24
- factory = "entity_linker"
25
- candidates_batch_size = 1
26
- entity_vector_length = 64
27
- generate_empty_kb = {"@misc":"spacy.EmptyKB.v2"}
28
- get_candidates = {"@misc":"spacy.CandidateGenerator.v1"}
29
- get_candidates_batch = {"@misc":"spacy.CandidateBatchGenerator.v1"}
30
- incl_context = true
31
- incl_prior = false
32
- labels_discard = []
33
- n_sents = 0
34
- overwrite = true
35
- scorer = {"@scorers":"spacy.entity_linker_scorer.v1"}
36
- threshold = null
37
- use_gold_ents = true
38
-
39
- [components.entity_linker.model]
40
- @architectures = "spacy.EntityLinker.v2"
41
- nO = null
42
-
43
- [components.entity_linker.model.tok2vec]
44
- @architectures = "spacy.HashEmbedCNN.v2"
45
- pretrained_vectors = null
46
- width = 96
47
- depth = 2
48
- embed_size = 2000
49
- window_size = 1
50
- maxout_pieces = 3
51
- subword_features = true
52
-
53
- [components.ner]
54
- factory = "ner"
55
- incorrect_spans_key = null
56
- moves = null
57
- scorer = {"@scorers":"spacy.ner_scorer.v1"}
58
- update_with_oracle_cut_size = 100
59
-
60
- [components.ner.model]
61
- @architectures = "spacy.TransitionBasedParser.v2"
62
- state_type = "ner"
63
- extra_state_tokens = true
64
- hidden_width = 300
65
- maxout_pieces = 2
66
- use_upper = true
67
- nO = null
68
-
69
- [components.ner.model.tok2vec]
70
- @architectures = "spacy.HashEmbedCNN.v2"
71
- pretrained_vectors = null
72
- width = 300
73
- depth = 8
74
- embed_size = 10000
75
- window_size = 1
76
- maxout_pieces = 3
77
- subword_features = true
78
-
79
- [components.sentencizer]
80
- factory = "sentencizer"
81
- overwrite = false
82
- punct_chars = null
83
- scorer = {"@scorers":"spacy.senter_scorer.v1"}
84
-
85
- [components.tagger]
86
- factory = "tagger"
87
- label_smoothing = 0.0
88
- neg_prefix = "!"
89
- overwrite = false
90
- scorer = {"@scorers":"spacy.tagger_scorer.v1"}
91
-
92
- [components.tagger.model]
93
- @architectures = "spacy.Tagger.v1"
94
- nO = null
95
-
96
- [components.tagger.model.tok2vec]
97
- @architectures = "spacy.Tok2VecListener.v1"
98
- width = 300
99
- upstream = "*"
100
-
101
- [components.tok2vec]
102
- factory = "tok2vec"
103
-
104
- [components.tok2vec.model]
105
- @architectures = "spacy.Tok2Vec.v2"
106
-
107
- [components.tok2vec.model.embed]
108
- @architectures = "spacy.MultiHashEmbed.v2"
109
- width = 300
110
- attrs = ["ORTH","SHAPE"]
111
- rows = [5000,2500]
112
- include_static_vectors = true
113
-
114
- [components.tok2vec.model.encode]
115
- @architectures = "spacy.MaxoutWindowEncoder.v2"
116
- width = 300
117
- depth = 4
118
- window_size = 1
119
- maxout_pieces = 3
120
-
121
- [corpora]
122
-
123
- [corpora.dev]
124
- @readers = "spacy.Corpus.v1"
125
- path = ${paths.dev}
126
- max_length = 0
127
- gold_preproc = false
128
- limit = 0
129
- augmenter = null
130
-
131
- [corpora.train]
132
- @readers = "spacy.Corpus.v1"
133
- path = ${paths.train}
134
- max_length = 2000
135
- gold_preproc = false
136
- limit = 0
137
- augmenter = null
138
-
139
- [training]
140
- dev_corpus = "corpora.dev"
141
- train_corpus = "corpora.train"
142
- seed = ${system.seed}
143
- gpu_allocator = ${system.gpu_allocator}
144
- dropout = 0.1
145
- accumulate_gradient = 1
146
- patience = 1600
147
- max_epochs = 0
148
- max_steps = 20000
149
- eval_frequency = 200
150
- frozen_components = []
151
- annotating_components = []
152
- before_to_disk = null
153
- before_update = null
154
-
155
- [training.batcher]
156
- @batchers = "spacy.batch_by_words.v1"
157
- discard_oversize = false
158
- tolerance = 0.2
159
- get_length = null
160
-
161
- [training.batcher.size]
162
- @schedules = "compounding.v1"
163
- start = 100
164
- stop = 1000
165
- compound = 1.001
166
- t = 0.0
167
-
168
- [training.logger]
169
- @loggers = "spacy.ConsoleLogger.v1"
170
- progress_bar = false
171
-
172
- [training.optimizer]
173
- @optimizers = "Adam.v1"
174
- beta1 = 0.9
175
- beta2 = 0.999
176
- L2_is_weight_decay = true
177
- L2 = 0.01
178
- grad_clip = 1.0
179
- use_averages = false
180
- eps = 0.00000001
181
- learn_rate = 0.001
182
-
183
- [training.score_weights]
184
- tag_acc = 0.17
185
- ents_f = 0.17
186
- ents_p = 0.0
187
- ents_r = 0.0
188
- ents_per_type = null
189
- sents_f = 0.33
190
- sents_p = 0.0
191
- sents_r = 0.0
192
- nel_micro_f = 0.33
193
- nel_micro_r = null
194
- nel_micro_p = null
195
-
196
- [pretraining]
197
-
198
- [initialize]
199
- vectors = null
200
- init_tok2vec = ${paths.init_tok2vec}
201
- vocab_data = null
202
- lookups = null
203
- before_init = null
204
- after_init = null
205
-
206
- [initialize.components]
207
-
208
  [initialize.tokenizer]
 
1
+ [paths]
2
+ train = "02_train"
3
+ dev = "03_valid"
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = null
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "sr"
13
+ pipeline = ["tok2vec","tagger","ner","sentencizer","entity_linker"]
14
+ batch_size = 1000
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+
21
+ [components]
22
+
23
+ [components.entity_linker]
24
+ factory = "entity_linker"
25
+ candidates_batch_size = 1
26
+ entity_vector_length = 64
27
+ generate_empty_kb = {"@misc":"spacy.EmptyKB.v2"}
28
+ get_candidates = {"@misc":"spacy.CandidateGenerator.v1"}
29
+ get_candidates_batch = {"@misc":"spacy.CandidateBatchGenerator.v1"}
30
+ incl_context = true
31
+ incl_prior = false
32
+ labels_discard = []
33
+ n_sents = 0
34
+ overwrite = true
35
+ scorer = {"@scorers":"spacy.entity_linker_scorer.v1"}
36
+ threshold = null
37
+ use_gold_ents = true
38
+
39
+ [components.entity_linker.model]
40
+ @architectures = "spacy.EntityLinker.v2"
41
+ nO = null
42
+
43
+ [components.entity_linker.model.tok2vec]
44
+ @architectures = "spacy.HashEmbedCNN.v2"
45
+ pretrained_vectors = null
46
+ width = 96
47
+ depth = 2
48
+ embed_size = 2000
49
+ window_size = 1
50
+ maxout_pieces = 3
51
+ subword_features = true
52
+
53
+ [components.ner]
54
+ factory = "ner"
55
+ incorrect_spans_key = null
56
+ moves = null
57
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
58
+ update_with_oracle_cut_size = 100
59
+
60
+ [components.ner.model]
61
+ @architectures = "spacy.TransitionBasedParser.v2"
62
+ state_type = "ner"
63
+ extra_state_tokens = true
64
+ hidden_width = 300
65
+ maxout_pieces = 2
66
+ use_upper = true
67
+ nO = null
68
+
69
+ [components.ner.model.tok2vec]
70
+ @architectures = "spacy.HashEmbedCNN.v2"
71
+ pretrained_vectors = null
72
+ width = 300
73
+ depth = 8
74
+ embed_size = 10000
75
+ window_size = 1
76
+ maxout_pieces = 3
77
+ subword_features = true
78
+
79
+ [components.sentencizer]
80
+ factory = "sentencizer"
81
+ overwrite = false
82
+ punct_chars = null
83
+ scorer = {"@scorers":"spacy.senter_scorer.v1"}
84
+
85
+ [components.tagger]
86
+ factory = "tagger"
87
+ neg_prefix = "!"
88
+ overwrite = false
89
+ scorer = {"@scorers":"spacy.tagger_scorer.v1"}
90
+
91
+ [components.tagger.model]
92
+ @architectures = "spacy.Tagger.v1"
93
+ nO = null
94
+
95
+ [components.tagger.model.tok2vec]
96
+ @architectures = "spacy.Tok2VecListener.v1"
97
+ width = 300
98
+ upstream = "*"
99
+
100
+ [components.tok2vec]
101
+ factory = "tok2vec"
102
+
103
+ [components.tok2vec.model]
104
+ @architectures = "spacy.Tok2Vec.v2"
105
+
106
+ [components.tok2vec.model.embed]
107
+ @architectures = "spacy.MultiHashEmbed.v2"
108
+ width = 300
109
+ attrs = ["ORTH","SHAPE"]
110
+ rows = [5000,2500]
111
+ include_static_vectors = true
112
+
113
+ [components.tok2vec.model.encode]
114
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
115
+ width = 300
116
+ depth = 4
117
+ window_size = 1
118
+ maxout_pieces = 3
119
+
120
+ [corpora]
121
+
122
+ [corpora.dev]
123
+ @readers = "spacy.Corpus.v1"
124
+ path = ${paths.dev}
125
+ max_length = 0
126
+ gold_preproc = false
127
+ limit = 0
128
+ augmenter = null
129
+
130
+ [corpora.train]
131
+ @readers = "spacy.Corpus.v1"
132
+ path = ${paths.train}
133
+ max_length = 2000
134
+ gold_preproc = false
135
+ limit = 0
136
+ augmenter = null
137
+
138
+ [training]
139
+ dev_corpus = "corpora.dev"
140
+ train_corpus = "corpora.train"
141
+ seed = ${system.seed}
142
+ gpu_allocator = ${system.gpu_allocator}
143
+ dropout = 0.1
144
+ accumulate_gradient = 1
145
+ patience = 1600
146
+ max_epochs = 0
147
+ max_steps = 20000
148
+ eval_frequency = 200
149
+ frozen_components = []
150
+ annotating_components = []
151
+ before_to_disk = null
152
+ before_update = null
153
+
154
+ [training.batcher]
155
+ @batchers = "spacy.batch_by_words.v1"
156
+ discard_oversize = false
157
+ tolerance = 0.2
158
+ get_length = null
159
+
160
+ [training.batcher.size]
161
+ @schedules = "compounding.v1"
162
+ start = 100
163
+ stop = 1000
164
+ compound = 1.001
165
+ t = 0.0
166
+
167
+ [training.logger]
168
+ @loggers = "spacy.ConsoleLogger.v1"
169
+ progress_bar = false
170
+
171
+ [training.optimizer]
172
+ @optimizers = "Adam.v1"
173
+ beta1 = 0.9
174
+ beta2 = 0.999
175
+ L2_is_weight_decay = true
176
+ L2 = 0.01
177
+ grad_clip = 1.0
178
+ use_averages = false
179
+ eps = 0.00000001
180
+ learn_rate = 0.001
181
+
182
+ [training.score_weights]
183
+ tag_acc = 0.17
184
+ ents_f = 0.17
185
+ ents_p = 0.0
186
+ ents_r = 0.0
187
+ ents_per_type = null
188
+ sents_f = 0.33
189
+ sents_p = 0.0
190
+ sents_r = 0.0
191
+ nel_micro_f = 0.33
192
+ nel_micro_r = null
193
+ nel_micro_p = null
194
+
195
+ [pretraining]
196
+
197
+ [initialize]
198
+ vectors = null
199
+ init_tok2vec = ${paths.init_tok2vec}
200
+ vocab_data = null
201
+ lookups = null
202
+ before_init = null
203
+ after_init = null
204
+
205
+ [initialize.components]
206
+
 
207
  [initialize.tokenizer]