Delicalib commited on
Commit
91f9df2
·
verified ·
1 Parent(s): 47bf036

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -35,15 +35,17 @@ model-index:
35
 
36
  | Type | Score |
37
  | --- | --- |
38
- | `REL_MICRO_P` | 54.47 |
39
- | `REL_MICRO_R` | 13.27 |
40
- | `REL_MICRO_F` | 21.35 |
41
- | `REL_MACRO_F` | 8.61 |
42
- | `REL_WEIGHTED_F` | 17.96 |
43
  | `F1_PART-OF` | 37.96 |
44
- | `F1_LOCATED-AT` | 0.00 |
45
- | `F1_CONNECTED-WITH` | 0.00 |
46
  | `F1_IN-MANNER-OF` | 0.00 |
47
- | `F1_ATTRIBUTE-FOR` | 5.09 |
48
- | `TRANSFORMER_LOSS` | 6.31 |
49
- | `RELATION_EXTRACTOR_LOSS` | 271.72 |
 
 
 
35
 
36
  | Type | Score |
37
  | --- | --- |
38
+ | `REL_MICRO_P` | 46.91 |
39
+ | `REL_MICRO_R` | 15.40 |
40
+ | `REL_MICRO_F` | 23.18 |
41
+ | `REL_MACRO_F` | 12.91 |
42
+ | `REL_WEIGHTED_F` | 21.04 |
43
  | `F1_PART-OF` | 37.96 |
44
+ | `F1_LOCATED-AT` | 12.87 |
45
+ | `F1_CONNECTED-WITH` | 5.75 |
46
  | `F1_IN-MANNER-OF` | 0.00 |
47
+ | `F1_ATTRIBUTE-FOR` | 7.94 |
48
+ | `F1_MACRO` | 0.00 |
49
+ | `F1_WEIGHTED` | 0.00 |
50
+ | `TRANSFORMER_LOSS` | 2.90 |
51
+ | `RELATION_EXTRACTOR_LOSS` | 132.27 |
config.cfg CHANGED
@@ -17,7 +17,7 @@ before_creation = null
17
  after_creation = null
18
  after_pipeline_creation = null
19
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
- batch_size = 300
21
  vectors = {"@vectors":"spacy.Vectors.v1"}
22
 
23
  [components]
@@ -41,7 +41,7 @@ pooling = {"@layers":"reduce_mean.v1"}
41
 
42
  [components.relation_extractor.model.create_instance_tensor.get_instances]
43
  @misc = "rel_instance_generator.v1"
44
- max_length = 200
45
 
46
  [components.relation_extractor.model.create_instance_tensor.tok2vec]
47
  @architectures = "spacy-transformers.TransformerListener.v1"
@@ -51,7 +51,7 @@ upstream = "*"
51
 
52
  [components.transformer]
53
  factory = "transformer"
54
- max_batch_items = 4096
55
  set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
56
 
57
  [components.transformer.model]
@@ -88,8 +88,8 @@ dropout = 0.2
88
  accumulate_gradient = 1
89
  patience = 1600000
90
  max_epochs = 0
91
- max_steps = 20000
92
- eval_frequency = 100
93
  frozen_components = []
94
  dev_corpus = "corpora.dev"
95
  train_corpus = "corpora.train"
@@ -127,14 +127,16 @@ initial_rate = 0.00005
127
  [training.score_weights]
128
  rel_micro_p = 0.0
129
  rel_micro_r = 0.0
130
- rel_micro_f = 0.12
131
- rel_macro_f = 0.12
132
- rel_weighted_f = 0.12
133
- f1_PART-OF = 0.12
134
- f1_LOCATED-AT = 0.12
135
- f1_CONNECTED-WITH = 0.12
136
- f1_IN-MANNER-OF = 0.12
137
- f1_ATTRIBUTE-FOR = 0.12
 
 
138
 
139
  [pretraining]
140
 
 
17
  after_creation = null
18
  after_pipeline_creation = null
19
  tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+ batch_size = 200
21
  vectors = {"@vectors":"spacy.Vectors.v1"}
22
 
23
  [components]
 
41
 
42
  [components.relation_extractor.model.create_instance_tensor.get_instances]
43
  @misc = "rel_instance_generator.v1"
44
+ max_length = 100
45
 
46
  [components.relation_extractor.model.create_instance_tensor.tok2vec]
47
  @architectures = "spacy-transformers.TransformerListener.v1"
 
51
 
52
  [components.transformer]
53
  factory = "transformer"
54
+ max_batch_items = 2096
55
  set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
56
 
57
  [components.transformer.model]
 
88
  accumulate_gradient = 1
89
  patience = 1600000
90
  max_epochs = 0
91
+ max_steps = 5000
92
+ eval_frequency = 50
93
  frozen_components = []
94
  dev_corpus = "corpora.dev"
95
  train_corpus = "corpora.train"
 
127
  [training.score_weights]
128
  rel_micro_p = 0.0
129
  rel_micro_r = 0.0
130
+ rel_micro_f = 0.1
131
+ rel_macro_f = 0.1
132
+ rel_weighted_f = 0.1
133
+ f1_PART-OF = 0.1
134
+ f1_LOCATED-AT = 0.1
135
+ f1_CONNECTED-WITH = 0.1
136
+ f1_IN-MANNER-OF = 0.1
137
+ f1_ATTRIBUTE-FOR = 0.1
138
+ f1_macro = 0.1
139
+ f1_weighted = 0.1
140
 
141
  [pretraining]
142
 
meta.json CHANGED
@@ -39,18 +39,20 @@
39
 
40
  ],
41
  "performance":{
42
- "rel_micro_p":0.5447470817,
43
- "rel_micro_r":0.1327328751,
44
- "rel_micro_f":0.2134553078,
45
- "rel_macro_f":0.0861129219,
46
- "rel_weighted_f":0.1795895562,
47
  "f1_PART-OF":0.3796196627,
48
- "f1_LOCATED-AT":0.0,
49
- "f1_CONNECTED-WITH":0.0,
50
  "f1_IN-MANNER-OF":0.0,
51
- "f1_ATTRIBUTE-FOR":0.0509449466,
52
- "transformer_loss":0.0630906408,
53
- "relation_extractor_loss":2.7171609234
 
 
54
  },
55
  "requirements":[
56
  "spacy-transformers>=1.3.8,<1.4.0",
 
39
 
40
  ],
41
  "performance":{
42
+ "rel_micro_p":0.4690909091,
43
+ "rel_micro_r":0.1539746956,
44
+ "rel_micro_f":0.2318475917,
45
+ "rel_macro_f":0.1290549706,
46
+ "rel_weighted_f":0.2104325211,
47
  "f1_PART-OF":0.3796196627,
48
+ "f1_LOCATED-AT":0.1286863271,
49
+ "f1_CONNECTED-WITH":0.0575296108,
50
  "f1_IN-MANNER-OF":0.0,
51
+ "f1_ATTRIBUTE-FOR":0.0794392523,
52
+ "f1_macro":0.0,
53
+ "f1_weighted":0.0,
54
+ "transformer_loss":0.0290014347,
55
+ "relation_extractor_loss":1.322729256
56
  },
57
  "requirements":[
58
  "spacy-transformers>=1.3.8,<1.4.0",
relationFactory.py CHANGED
@@ -1,29 +1,34 @@
 
1
  from typing import Tuple, List, Iterable, Optional, Dict, Callable, Any
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import spacy
4
  from spacy.tokens import Doc, Span
5
  from thinc.types import Floats2d, Ints1d, Ragged, cast
6
- from thinc.api import Model, Linear, chain, Logistic, Optimizer
7
 
8
  import json
9
  import os
10
  import time
11
- from itertools import islice
12
  from pathlib import Path
13
 
14
  from sklearn.metrics import precision_recall_fscore_support, f1_score
15
- import numpy
16
- from spacy.training.example import Example
17
- from spacy.tokens.doc import Doc
18
- from spacy.pipeline.trainable_pipe import TrainablePipe
19
- from spacy.vocab import Vocab
20
- from spacy import Language
21
- from thinc.model import set_dropout_rate
22
- from wasabi import Printer
23
  import plotly.express as px
24
  import plotly.graph_objects as go
25
 
26
-
27
  @spacy.registry.architectures("rel_model.v1")
28
  def create_relation_model(
29
  create_instance_tensor: Model[List[Doc], Floats2d],
@@ -265,6 +270,17 @@ class RelationExtractor(TrainablePipe):
265
  self.set_annotations(docs, predictions)
266
  return losses
267
 
 
 
 
 
 
 
 
 
 
 
 
268
  def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]:
269
  """Find the loss and gradient of loss for the batch of documents and
270
  their predicted scores."""
@@ -452,4 +468,3 @@ def score_relations(examples: Iterable[Example], threshold: float) -> Dict[str,
452
  result["rel_weighted_f"] = f1_score(y_true, y_pred, average="weighted", labels=labels, zero_division=0)
453
 
454
  return result
455
-
 
1
+ from itertools import islice
2
  from typing import Tuple, List, Iterable, Optional, Dict, Callable, Any
3
 
4
+ from spacy.scorer import PRFScore
5
+ from thinc.types import Floats2d
6
+ import numpy
7
+ from spacy.training.example import Example
8
+ from thinc.api import Model, Optimizer
9
+ from spacy.tokens.doc import Doc
10
+ from spacy.pipeline.trainable_pipe import TrainablePipe
11
+ from spacy.vocab import Vocab
12
+ from spacy import Language
13
+ from thinc.model import set_dropout_rate
14
+ from wasabi import Printer
15
+
16
+ from typing import List, Tuple, Callable
17
+
18
  import spacy
19
  from spacy.tokens import Doc, Span
20
  from thinc.types import Floats2d, Ints1d, Ragged, cast
21
+ from thinc.api import Model, Linear, chain, Logistic
22
 
23
  import json
24
  import os
25
  import time
 
26
  from pathlib import Path
27
 
28
  from sklearn.metrics import precision_recall_fscore_support, f1_score
 
 
 
 
 
 
 
 
29
  import plotly.express as px
30
  import plotly.graph_objects as go
31
 
 
32
  @spacy.registry.architectures("rel_model.v1")
33
  def create_relation_model(
34
  create_instance_tensor: Model[List[Doc], Floats2d],
 
270
  self.set_annotations(docs, predictions)
271
  return losses
272
 
273
+ def get_focal_loss(self, examples: Iterable[Example], scores, gamma=3.0, alpha=0.25, eps=1e-8) -> Tuple[float, float]:
274
+ truths = self._examples_to_truth(examples)
275
+ scores_2 = numpy.clip(scores, eps, 1. - eps)
276
+ p_t = numpy.clip(scores_2 * truths + (1 - scores_2) * (1 - truths), eps, 1. - eps)
277
+
278
+ focal_loss = -(1 - p_t) ** gamma * numpy.log(p_t)
279
+ loss = numpy.mean(numpy.sum(focal_loss, axis=1))
280
+ gradient = focal_loss * (1 - 2 * truths)
281
+ return float(loss), gradient
282
+
283
+
284
  def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]:
285
  """Find the loss and gradient of loss for the batch of documents and
286
  their predicted scores."""
 
468
  result["rel_weighted_f"] = f1_score(y_true, y_pred, average="weighted", labels=labels, zero_division=0)
469
 
470
  return result
 
relation_extractor/model CHANGED
Binary files a/relation_extractor/model and b/relation_extractor/model differ
 
ru_patents_rel_tiny-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9a8bc5b8b749e4abfea1d8f93625b03fa857b049a9c1116f6aa0577ea141b82
3
- size 108767847
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4054040e76b605f22e2513c94ab0a96dc601bdd49d0defdd85a3ace67f830aea
3
+ size 108770148
transformer/cfg CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "max_batch_items":4096
3
  }
 
1
  {
2
+ "max_batch_items":2096
3
  }
transformer/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1281b5d341bd033984cf7f1fc5841b036641ae35a698beebbd2bc15bfca0f29d
3
- size 120293881
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35244535987be2a96005cdc63d93ae46ff5c0ab749f47435c212c65b94d176a2
3
+ size 120294214