Update spaCy pipeline
Browse files- README.md +12 -10
- config.cfg +15 -13
- meta.json +12 -10
- relationFactory.py +27 -12
- relation_extractor/model +0 -0
- ru_patents_rel_tiny-any-py3-none-any.whl +2 -2
- transformer/cfg +1 -1
- transformer/model +2 -2
README.md
CHANGED
@@ -35,15 +35,17 @@ model-index:
|
|
35 |
|
36 |
| Type | Score |
|
37 |
| --- | --- |
|
38 |
-
| `REL_MICRO_P` |
|
39 |
-
| `REL_MICRO_R` |
|
40 |
-
| `REL_MICRO_F` |
|
41 |
-
| `REL_MACRO_F` |
|
42 |
-
| `REL_WEIGHTED_F` |
|
43 |
| `F1_PART-OF` | 37.96 |
|
44 |
-
| `F1_LOCATED-AT` |
|
45 |
-
| `F1_CONNECTED-WITH` |
|
46 |
| `F1_IN-MANNER-OF` | 0.00 |
|
47 |
-
| `F1_ATTRIBUTE-FOR` |
|
48 |
-
| `
|
49 |
-
| `
|
|
|
|
|
|
35 |
|
36 |
| Type | Score |
|
37 |
| --- | --- |
|
38 |
+
| `REL_MICRO_P` | 46.91 |
|
39 |
+
| `REL_MICRO_R` | 15.40 |
|
40 |
+
| `REL_MICRO_F` | 23.18 |
|
41 |
+
| `REL_MACRO_F` | 12.91 |
|
42 |
+
| `REL_WEIGHTED_F` | 21.04 |
|
43 |
| `F1_PART-OF` | 37.96 |
|
44 |
+
| `F1_LOCATED-AT` | 12.87 |
|
45 |
+
| `F1_CONNECTED-WITH` | 5.75 |
|
46 |
| `F1_IN-MANNER-OF` | 0.00 |
|
47 |
+
| `F1_ATTRIBUTE-FOR` | 7.94 |
|
48 |
+
| `F1_MACRO` | 0.00 |
|
49 |
+
| `F1_WEIGHTED` | 0.00 |
|
50 |
+
| `TRANSFORMER_LOSS` | 2.90 |
|
51 |
+
| `RELATION_EXTRACTOR_LOSS` | 132.27 |
|
config.cfg
CHANGED
@@ -17,7 +17,7 @@ before_creation = null
|
|
17 |
after_creation = null
|
18 |
after_pipeline_creation = null
|
19 |
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
20 |
-
batch_size =
|
21 |
vectors = {"@vectors":"spacy.Vectors.v1"}
|
22 |
|
23 |
[components]
|
@@ -41,7 +41,7 @@ pooling = {"@layers":"reduce_mean.v1"}
|
|
41 |
|
42 |
[components.relation_extractor.model.create_instance_tensor.get_instances]
|
43 |
@misc = "rel_instance_generator.v1"
|
44 |
-
max_length =
|
45 |
|
46 |
[components.relation_extractor.model.create_instance_tensor.tok2vec]
|
47 |
@architectures = "spacy-transformers.TransformerListener.v1"
|
@@ -51,7 +51,7 @@ upstream = "*"
|
|
51 |
|
52 |
[components.transformer]
|
53 |
factory = "transformer"
|
54 |
-
max_batch_items =
|
55 |
set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
|
56 |
|
57 |
[components.transformer.model]
|
@@ -88,8 +88,8 @@ dropout = 0.2
|
|
88 |
accumulate_gradient = 1
|
89 |
patience = 1600000
|
90 |
max_epochs = 0
|
91 |
-
max_steps =
|
92 |
-
eval_frequency =
|
93 |
frozen_components = []
|
94 |
dev_corpus = "corpora.dev"
|
95 |
train_corpus = "corpora.train"
|
@@ -127,14 +127,16 @@ initial_rate = 0.00005
|
|
127 |
[training.score_weights]
|
128 |
rel_micro_p = 0.0
|
129 |
rel_micro_r = 0.0
|
130 |
-
rel_micro_f = 0.
|
131 |
-
rel_macro_f = 0.
|
132 |
-
rel_weighted_f = 0.
|
133 |
-
f1_PART-OF = 0.
|
134 |
-
f1_LOCATED-AT = 0.
|
135 |
-
f1_CONNECTED-WITH = 0.
|
136 |
-
f1_IN-MANNER-OF = 0.
|
137 |
-
f1_ATTRIBUTE-FOR = 0.
|
|
|
|
|
138 |
|
139 |
[pretraining]
|
140 |
|
|
|
17 |
after_creation = null
|
18 |
after_pipeline_creation = null
|
19 |
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
20 |
+
batch_size = 200
|
21 |
vectors = {"@vectors":"spacy.Vectors.v1"}
|
22 |
|
23 |
[components]
|
|
|
41 |
|
42 |
[components.relation_extractor.model.create_instance_tensor.get_instances]
|
43 |
@misc = "rel_instance_generator.v1"
|
44 |
+
max_length = 100
|
45 |
|
46 |
[components.relation_extractor.model.create_instance_tensor.tok2vec]
|
47 |
@architectures = "spacy-transformers.TransformerListener.v1"
|
|
|
51 |
|
52 |
[components.transformer]
|
53 |
factory = "transformer"
|
54 |
+
max_batch_items = 2096
|
55 |
set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
|
56 |
|
57 |
[components.transformer.model]
|
|
|
88 |
accumulate_gradient = 1
|
89 |
patience = 1600000
|
90 |
max_epochs = 0
|
91 |
+
max_steps = 5000
|
92 |
+
eval_frequency = 50
|
93 |
frozen_components = []
|
94 |
dev_corpus = "corpora.dev"
|
95 |
train_corpus = "corpora.train"
|
|
|
127 |
[training.score_weights]
|
128 |
rel_micro_p = 0.0
|
129 |
rel_micro_r = 0.0
|
130 |
+
rel_micro_f = 0.1
|
131 |
+
rel_macro_f = 0.1
|
132 |
+
rel_weighted_f = 0.1
|
133 |
+
f1_PART-OF = 0.1
|
134 |
+
f1_LOCATED-AT = 0.1
|
135 |
+
f1_CONNECTED-WITH = 0.1
|
136 |
+
f1_IN-MANNER-OF = 0.1
|
137 |
+
f1_ATTRIBUTE-FOR = 0.1
|
138 |
+
f1_macro = 0.1
|
139 |
+
f1_weighted = 0.1
|
140 |
|
141 |
[pretraining]
|
142 |
|
meta.json
CHANGED
@@ -39,18 +39,20 @@
|
|
39 |
|
40 |
],
|
41 |
"performance":{
|
42 |
-
"rel_micro_p":0.
|
43 |
-
"rel_micro_r":0.
|
44 |
-
"rel_micro_f":0.
|
45 |
-
"rel_macro_f":0.
|
46 |
-
"rel_weighted_f":0.
|
47 |
"f1_PART-OF":0.3796196627,
|
48 |
-
"f1_LOCATED-AT":0.
|
49 |
-
"f1_CONNECTED-WITH":0.
|
50 |
"f1_IN-MANNER-OF":0.0,
|
51 |
-
"f1_ATTRIBUTE-FOR":0.
|
52 |
-
"
|
53 |
-
"
|
|
|
|
|
54 |
},
|
55 |
"requirements":[
|
56 |
"spacy-transformers>=1.3.8,<1.4.0",
|
|
|
39 |
|
40 |
],
|
41 |
"performance":{
|
42 |
+
"rel_micro_p":0.4690909091,
|
43 |
+
"rel_micro_r":0.1539746956,
|
44 |
+
"rel_micro_f":0.2318475917,
|
45 |
+
"rel_macro_f":0.1290549706,
|
46 |
+
"rel_weighted_f":0.2104325211,
|
47 |
"f1_PART-OF":0.3796196627,
|
48 |
+
"f1_LOCATED-AT":0.1286863271,
|
49 |
+
"f1_CONNECTED-WITH":0.0575296108,
|
50 |
"f1_IN-MANNER-OF":0.0,
|
51 |
+
"f1_ATTRIBUTE-FOR":0.0794392523,
|
52 |
+
"f1_macro":0.0,
|
53 |
+
"f1_weighted":0.0,
|
54 |
+
"transformer_loss":0.0290014347,
|
55 |
+
"relation_extractor_loss":1.322729256
|
56 |
},
|
57 |
"requirements":[
|
58 |
"spacy-transformers>=1.3.8,<1.4.0",
|
relationFactory.py
CHANGED
@@ -1,29 +1,34 @@
|
|
|
|
1 |
from typing import Tuple, List, Iterable, Optional, Dict, Callable, Any
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import spacy
|
4 |
from spacy.tokens import Doc, Span
|
5 |
from thinc.types import Floats2d, Ints1d, Ragged, cast
|
6 |
-
from thinc.api import Model, Linear, chain, Logistic
|
7 |
|
8 |
import json
|
9 |
import os
|
10 |
import time
|
11 |
-
from itertools import islice
|
12 |
from pathlib import Path
|
13 |
|
14 |
from sklearn.metrics import precision_recall_fscore_support, f1_score
|
15 |
-
import numpy
|
16 |
-
from spacy.training.example import Example
|
17 |
-
from spacy.tokens.doc import Doc
|
18 |
-
from spacy.pipeline.trainable_pipe import TrainablePipe
|
19 |
-
from spacy.vocab import Vocab
|
20 |
-
from spacy import Language
|
21 |
-
from thinc.model import set_dropout_rate
|
22 |
-
from wasabi import Printer
|
23 |
import plotly.express as px
|
24 |
import plotly.graph_objects as go
|
25 |
|
26 |
-
|
27 |
@spacy.registry.architectures("rel_model.v1")
|
28 |
def create_relation_model(
|
29 |
create_instance_tensor: Model[List[Doc], Floats2d],
|
@@ -265,6 +270,17 @@ class RelationExtractor(TrainablePipe):
|
|
265 |
self.set_annotations(docs, predictions)
|
266 |
return losses
|
267 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]:
|
269 |
"""Find the loss and gradient of loss for the batch of documents and
|
270 |
their predicted scores."""
|
@@ -452,4 +468,3 @@ def score_relations(examples: Iterable[Example], threshold: float) -> Dict[str,
|
|
452 |
result["rel_weighted_f"] = f1_score(y_true, y_pred, average="weighted", labels=labels, zero_division=0)
|
453 |
|
454 |
return result
|
455 |
-
|
|
|
1 |
+
from itertools import islice
|
2 |
from typing import Tuple, List, Iterable, Optional, Dict, Callable, Any
|
3 |
|
4 |
+
from spacy.scorer import PRFScore
|
5 |
+
from thinc.types import Floats2d
|
6 |
+
import numpy
|
7 |
+
from spacy.training.example import Example
|
8 |
+
from thinc.api import Model, Optimizer
|
9 |
+
from spacy.tokens.doc import Doc
|
10 |
+
from spacy.pipeline.trainable_pipe import TrainablePipe
|
11 |
+
from spacy.vocab import Vocab
|
12 |
+
from spacy import Language
|
13 |
+
from thinc.model import set_dropout_rate
|
14 |
+
from wasabi import Printer
|
15 |
+
|
16 |
+
from typing import List, Tuple, Callable
|
17 |
+
|
18 |
import spacy
|
19 |
from spacy.tokens import Doc, Span
|
20 |
from thinc.types import Floats2d, Ints1d, Ragged, cast
|
21 |
+
from thinc.api import Model, Linear, chain, Logistic
|
22 |
|
23 |
import json
|
24 |
import os
|
25 |
import time
|
|
|
26 |
from pathlib import Path
|
27 |
|
28 |
from sklearn.metrics import precision_recall_fscore_support, f1_score
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
import plotly.express as px
|
30 |
import plotly.graph_objects as go
|
31 |
|
|
|
32 |
@spacy.registry.architectures("rel_model.v1")
|
33 |
def create_relation_model(
|
34 |
create_instance_tensor: Model[List[Doc], Floats2d],
|
|
|
270 |
self.set_annotations(docs, predictions)
|
271 |
return losses
|
272 |
|
273 |
+
def get_focal_loss(self, examples: Iterable[Example], scores, gamma=3.0, alpha=0.25, eps=1e-8) -> Tuple[float, float]:
|
274 |
+
truths = self._examples_to_truth(examples)
|
275 |
+
scores_2 = numpy.clip(scores, eps, 1. - eps)
|
276 |
+
p_t = numpy.clip(scores_2 * truths + (1 - scores_2) * (1 - truths), eps, 1. - eps)
|
277 |
+
|
278 |
+
focal_loss = -(1 - p_t) ** gamma * numpy.log(p_t)
|
279 |
+
loss = numpy.mean(numpy.sum(focal_loss, axis=1))
|
280 |
+
gradient = focal_loss * (1 - 2 * truths)
|
281 |
+
return float(loss), gradient
|
282 |
+
|
283 |
+
|
284 |
def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]:
|
285 |
"""Find the loss and gradient of loss for the batch of documents and
|
286 |
their predicted scores."""
|
|
|
468 |
result["rel_weighted_f"] = f1_score(y_true, y_pred, average="weighted", labels=labels, zero_division=0)
|
469 |
|
470 |
return result
|
|
relation_extractor/model
CHANGED
Binary files a/relation_extractor/model and b/relation_extractor/model differ
|
|
ru_patents_rel_tiny-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4054040e76b605f22e2513c94ab0a96dc601bdd49d0defdd85a3ace67f830aea
|
3 |
+
size 108770148
|
transformer/cfg
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
{
|
2 |
-
"max_batch_items":
|
3 |
}
|
|
|
1 |
{
|
2 |
+
"max_batch_items":2096
|
3 |
}
|
transformer/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35244535987be2a96005cdc63d93ae46ff5c0ab749f47435c212c65b94d176a2
|
3 |
+
size 120294214
|