noahjax commited on
Commit
bca1e5f
·
verified ·
1 Parent(s): 6bc2011

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -39,3 +39,7 @@ textcat_classify/model filter=lfs diff=lfs merge=lfs -text
39
  tok2vec/model filter=lfs diff=lfs merge=lfs -text
40
  vocab/key2row filter=lfs diff=lfs merge=lfs -text
41
  vocab/vectors filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
39
  tok2vec/model filter=lfs diff=lfs merge=lfs -text
40
  vocab/key2row filter=lfs diff=lfs merge=lfs -text
41
  vocab/vectors filter=lfs diff=lfs merge=lfs -text
42
+ ner/model filter=lfs diff=lfs merge=lfs -text
43
+ parser/model filter=lfs diff=lfs merge=lfs -text
44
+ tok2vec_small/model filter=lfs diff=lfs merge=lfs -text
45
+ vocab/strings.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -2,7 +2,6 @@
2
  tags:
3
  - spacy
4
  - token-classification
5
- - text-classification
6
  language:
7
  - en
8
  model-index:
@@ -25,10 +24,10 @@ model-index:
25
  | Feature | Description |
26
  | --- | --- |
27
  | **Name** | `en_tako_query_filter` |
28
- | **Version** | `0.0.1` |
29
  | **spaCy** | `>=3.7.5,<3.8.0` |
30
- | **Default Pipeline** | `tok2vec`, `ner`, `textcat`, `textcat_classify` |
31
- | **Components** | `tok2vec`, `ner`, `textcat`, `textcat_classify` |
32
  | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
33
  | **Sources** | n/a |
34
  | **License** | n/a |
@@ -38,12 +37,11 @@ model-index:
38
 
39
  <details>
40
 
41
- <summary>View label scheme (35 labels for 3 components)</summary>
42
 
43
  | Component | Labels |
44
  | --- | --- |
45
- | **`ner`** | `CARDINAL`, `CUSTOM_ATTRIBUTE`, `CUSTOM_SEMANTIC_FUNCTION`, `CUSTOM_SPORTS_CONFERENCE`, `CUSTOM_SPORTS_LEAGUE`, `CUSTOM_SPORTS_ROLE`, `CUSTOM_STOCK_TICKER`, `CUSTOM_TEAM`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `TIME`, `WORK_OF_ART` |
46
- | **`textcat`** | `Business and Finance`, `Arts, Culture, and Entertainment`, `Crime`, `Sports`, `Politics`, `Science and Technology`, `Health and Wellness`, `Lifestyle and Fashion` |
47
  | **`textcat_classify`** | `ACCEPT`, `REJECT` |
48
 
49
  </details>
@@ -56,12 +54,12 @@ model-index:
56
  | `ENTS_P` | 0.00 |
57
  | `ENTS_R` | 0.00 |
58
  | `ENTS_PER_TYPE` | 0.00 |
59
- | `CATS_SCORE` | 86.07 |
60
- | `CATS_MICRO_P` | 86.21 |
61
- | `CATS_MICRO_R` | 86.21 |
62
- | `CATS_MICRO_F` | 86.21 |
63
- | `CATS_MACRO_P` | 86.21 |
64
- | `CATS_MACRO_R` | 86.22 |
65
- | `CATS_MACRO_F` | 86.21 |
66
- | `CATS_MACRO_AUC` | 93.19 |
67
- | `TEXTCAT_CLASSIFY_LOSS` | 84.03 |
 
2
  tags:
3
  - spacy
4
  - token-classification
 
5
  language:
6
  - en
7
  model-index:
 
24
  | Feature | Description |
25
  | --- | --- |
26
  | **Name** | `en_tako_query_filter` |
27
+ | **Version** | `0.0.2` |
28
  | **spaCy** | `>=3.7.5,<3.8.0` |
29
+ | **Default Pipeline** | `tok2vec`, `ner`, `textcat_classify` |
30
+ | **Components** | `tok2vec`, `ner`, `textcat_classify` |
31
  | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
32
  | **Sources** | n/a |
33
  | **License** | n/a |
 
37
 
38
  <details>
39
 
40
+ <summary>View label scheme (21 labels for 2 components)</summary>
41
 
42
  | Component | Labels |
43
  | --- | --- |
44
+ | **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `STOCK_TICKER`, `TIME`, `WORK_OF_ART` |
 
45
  | **`textcat_classify`** | `ACCEPT`, `REJECT` |
46
 
47
  </details>
 
54
  | `ENTS_P` | 0.00 |
55
  | `ENTS_R` | 0.00 |
56
  | `ENTS_PER_TYPE` | 0.00 |
57
+ | `CATS_SCORE` | 85.07 |
58
+ | `CATS_MICRO_P` | 85.31 |
59
+ | `CATS_MICRO_R` | 85.31 |
60
+ | `CATS_MICRO_F` | 85.31 |
61
+ | `CATS_MACRO_P` | 85.35 |
62
+ | `CATS_MACRO_R` | 85.31 |
63
+ | `CATS_MACRO_F` | 85.31 |
64
+ | `CATS_MACRO_AUC` | 91.67 |
65
+ | `TEXTCAT_CLASSIFY_LOSS` | 94.04 |
attribute_ruler/patterns ADDED
Binary file (14.7 kB). View file
 
config.cfg CHANGED
@@ -1,12 +1,13 @@
1
  [paths]
2
- train = "corpus/classify-train.spacy"
3
- dev = "corpus/classify-test.spacy"
4
  vectors = "en_core_web_lg"
5
  init_tok2vec = null
6
 
7
  [variables]
8
  wandb_project_name = "tako-query-filter"
9
  wandb_team_name = "tako-team"
 
10
 
11
  [system]
12
  gpu_allocator = "pytorch"
@@ -14,7 +15,7 @@ seed = 0
14
 
15
  [nlp]
16
  lang = "en"
17
- pipeline = ["tok2vec","ner","textcat","textcat_classify"]
18
  batch_size = 1000
19
  disabled = []
20
  before_creation = null
@@ -46,30 +47,9 @@ nO = null
46
  width = 256
47
  upstream = "*"
48
 
49
- [components.textcat]
50
- factory = "textcat"
51
- scorer = {"@scorers":"spacy.textcat_scorer.v2"}
52
- threshold = 0.0
53
-
54
- [components.textcat.model]
55
- @architectures = "spacy.TextCatEnsemble.v2"
56
- nO = null
57
-
58
- [components.textcat.model.linear_model]
59
- @architectures = "spacy.TextCatBOW.v3"
60
- exclusive_classes = false
61
- length = 262144
62
- ngram_size = 1
63
- no_output_layer = false
64
- nO = null
65
-
66
- [components.textcat.model.tok2vec]
67
- @architectures = "spacy.Tok2VecListener.v1"
68
- width = 256
69
- upstream = "*"
70
-
71
  [components.textcat_classify]
72
- factory = "textcat"
 
73
  scorer = {"@scorers":"spacy.textcat_scorer.v2"}
74
  threshold = 0.0
75
 
@@ -90,17 +70,17 @@ nO = null
90
 
91
  [components.textcat_classify.model.tok2vec.embed]
92
  @architectures = "spacy.MultiHashEmbed.v2"
93
- width = 256
94
  attrs = ["NORM","PREFIX","SUFFIX","SHAPE","ENT_TYPE"]
95
- rows = [5000,1000,2500,2500,1000]
96
  include_static_vectors = true
97
 
98
  [components.textcat_classify.model.tok2vec.encode]
99
  @architectures = "spacy.MaxoutWindowEncoder.v2"
100
- width = 256
101
  window_size = 1
102
  maxout_pieces = 3
103
- depth = 8
104
 
105
  [components.tok2vec]
106
  factory = "tok2vec"
@@ -150,12 +130,12 @@ seed = ${system.seed}
150
  gpu_allocator = ${system.gpu_allocator}
151
  dropout = 0.1
152
  accumulate_gradient = 1
153
- patience = 3000
154
  max_epochs = 0
155
  max_steps = 20000
156
  eval_frequency = 100
157
- frozen_components = ["tok2vec","ner","textcat"]
158
- annotating_components = ["ner","textcat"]
159
  before_to_disk = null
160
  before_update = null
161
 
 
1
  [paths]
2
+ train = "corpus/filter-train.spacy"
3
+ dev = "corpus/filter-test.spacy"
4
  vectors = "en_core_web_lg"
5
  init_tok2vec = null
6
 
7
  [variables]
8
  wandb_project_name = "tako-query-filter"
9
  wandb_team_name = "tako-team"
10
+ base_model = "ner/dashing-wind"
11
 
12
  [system]
13
  gpu_allocator = "pytorch"
 
15
 
16
  [nlp]
17
  lang = "en"
18
+ pipeline = ["tok2vec","ner","textcat_classify"]
19
  batch_size = 1000
20
  disabled = []
21
  before_creation = null
 
47
  width = 256
48
  upstream = "*"
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  [components.textcat_classify]
51
+ factory = "weighted_textcat"
52
+ class_weights = [0.67,0.33]
53
  scorer = {"@scorers":"spacy.textcat_scorer.v2"}
54
  threshold = 0.0
55
 
 
70
 
71
  [components.textcat_classify.model.tok2vec.embed]
72
  @architectures = "spacy.MultiHashEmbed.v2"
73
+ width = 128
74
  attrs = ["NORM","PREFIX","SUFFIX","SHAPE","ENT_TYPE"]
75
+ rows = [2000,500,1000,500,500]
76
  include_static_vectors = true
77
 
78
  [components.textcat_classify.model.tok2vec.encode]
79
  @architectures = "spacy.MaxoutWindowEncoder.v2"
80
+ width = 128
81
  window_size = 1
82
  maxout_pieces = 3
83
+ depth = 4
84
 
85
  [components.tok2vec]
86
  factory = "tok2vec"
 
130
  gpu_allocator = ${system.gpu_allocator}
131
  dropout = 0.1
132
  accumulate_gradient = 1
133
+ patience = 1000
134
  max_epochs = 0
135
  max_steps = 20000
136
  eval_frequency = 100
137
+ frozen_components = ["tagger","attribute_ruler","parser","tok2vec","ner"]
138
+ annotating_components = ["ner"]
139
  before_to_disk = null
140
  before_update = null
141
 
custom_textcat.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
2
+ from spacy.util import registry
3
+ from thinc.types import Floats2d
4
+ from spacy.tokens import Doc
5
+ from spacy.pipeline import TextCategorizer
6
+ from spacy.training import Example, validate_examples
7
+ from spacy.pipeline.textcat import textcat_score
8
+ from spacy.vocab import Vocab
9
+ from spacy.scorer import Scorer
10
+ from spacy.language import Language
11
+ from thinc.api import Model
12
+ import numpy
13
+
14
+
15
+ @Language.factory(
16
+ "weighted_textcat",
17
+ assigns=["doc.cats"],
18
+ default_config={
19
+ "threshold": 0.0,
20
+ "scorer": {"@scorers": "spacy.textcat_scorer.v2"},
21
+ },
22
+ default_score_weights={
23
+ "cats_score": 1.0,
24
+ "cats_score_desc": None,
25
+ "cats_micro_p": None,
26
+ "cats_micro_r": None,
27
+ "cats_micro_f": None,
28
+ "cats_macro_p": None,
29
+ "cats_macro_r": None,
30
+ "cats_macro_f": None,
31
+ "cats_macro_auc": None,
32
+ "cats_f_per_type": None,
33
+ },
34
+ )
35
+ def make_textcat(
36
+ nlp: Language,
37
+ name: str,
38
+ model: Model[List[Doc], List[Floats2d]],
39
+ threshold: float,
40
+ scorer: Optional[Callable],
41
+ class_weights: Optional[List],
42
+ ) -> "TextCategorizer":
43
+ """Create a TextCategorizer component. The text categorizer predicts categories
44
+ over a whole document. It can learn one or more labels, and the labels are considered
45
+ to be mutually exclusive (i.e. one true label per doc).
46
+
47
+ model (Model[List[Doc], List[Floats2d]]): A model instance that predicts
48
+ scores for each category.
49
+ threshold (float): Cutoff to consider a prediction "positive".
50
+ scorer (Optional[Callable]): The scoring method.
51
+ """
52
+ if class_weights == "null":
53
+ class_weights = None
54
+ return CustomTextcat(
55
+ nlp.vocab,
56
+ model,
57
+ name,
58
+ threshold=threshold,
59
+ scorer=scorer,
60
+ weights=class_weights,
61
+ )
62
+
63
+
64
+ def textcat_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
65
+ return Scorer.score_cats(
66
+ examples,
67
+ "cats",
68
+ multi_label=False,
69
+ **kwargs,
70
+ )
71
+
72
+
73
+ @registry.scorers("spacy.textcat_scorer.v2")
74
+ def make_textcat_scorer():
75
+ return textcat_score
76
+
77
+
78
+ class CustomTextcat(TextCategorizer):
79
+ def __init__(
80
+ self,
81
+ vocab: Vocab,
82
+ model: Model,
83
+ name: str = "textcat",
84
+ *,
85
+ threshold: float,
86
+ scorer: Optional[Callable] = textcat_score,
87
+ weights: Optional[List[float]] = None,
88
+ ) -> None:
89
+ """Initialize a text categorizer for single-label classification.
90
+
91
+ vocab (Vocab): The shared vocabulary.
92
+ model (thinc.api.Model): The Thinc Model powering the pipeline component.
93
+ name (str): The component instance name, used to add entries to the
94
+ losses during training.
95
+ threshold (float): Unused, not needed for single-label (exclusive
96
+ classes) classification.
97
+ scorer (Optional[Callable]): The scoring method. Defaults to
98
+ Scorer.score_cats for the attribute "cats".
99
+
100
+ DOCS: https://spacy.io/api/textcategorizer#init
101
+ """
102
+ self.vocab = vocab
103
+ self.model = model
104
+ self.name = name
105
+ self._rehearsal_model = None
106
+ cfg: Dict[str, Any] = {
107
+ "labels": [],
108
+ "threshold": threshold,
109
+ "positive_label": None,
110
+ }
111
+ self.cfg = dict(cfg)
112
+ self.scorer = scorer
113
+ if weights is not None:
114
+ print(f"Using weights: {weights}")
115
+ self.weights = numpy.array(weights)
116
+
117
+ def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]:
118
+ """Find the loss and gradient of loss for the batch of documents and
119
+ their predicted scores.
120
+
121
+ examples (Iterable[Examples]): The batch of examples.
122
+ scores: Scores representing the model's predictions.
123
+ RETURNS (Tuple[float, float]): The loss and the gradient.
124
+
125
+ DOCS: https://spacy.io/api/textcategorizer#get_loss
126
+ """
127
+ validate_examples(examples, "TextCategorizer.get_loss")
128
+ self._validate_categories(examples)
129
+ truths, not_missing = self._examples_to_truth(examples)
130
+ not_missing = self.model.ops.asarray(not_missing) # type: ignore
131
+ d_scores = scores - truths
132
+ d_scores *= not_missing
133
+ weights = self.model.ops.asarray(self.weights) # type: ignore
134
+ if weights is not None:
135
+ squared = d_scores**2
136
+ mean_square_error = numpy.sum(squared * weights) / (
137
+ numpy.sum(weights) * len(squared)
138
+ )
139
+ d_scores *= weights
140
+ else:
141
+ mean_square_error = (d_scores**2).mean()
142
+ return float(mean_square_error), d_scores
en_tako_query_filter-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ac65b0e4d1a3ec3bba78a9d9a540110cb3ae0bb0e3696a724bd3d5267510e79
3
- size 643426692
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be4b077cc99a883fdd1970bcf64c5c31e1ed0de729d07dc2bde0ee67d5faaabe
3
+ size 619534225
meta.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "lang":"en",
3
  "name":"tako_query_filter",
4
- "version":"0.0.1",
5
  "description":"",
6
  "author":"",
7
  "email":"",
@@ -16,18 +16,116 @@
16
  "name":"en_vectors"
17
  },
18
  "labels":{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "tok2vec":[
20
 
21
  ],
22
  "ner":[
23
  "CARDINAL",
24
- "CUSTOM_ATTRIBUTE",
25
- "CUSTOM_SEMANTIC_FUNCTION",
26
- "CUSTOM_SPORTS_CONFERENCE",
27
- "CUSTOM_SPORTS_LEAGUE",
28
- "CUSTOM_SPORTS_ROLE",
29
- "CUSTOM_STOCK_TICKER",
30
- "CUSTOM_TEAM",
31
  "DATE",
32
  "EVENT",
33
  "FAC",
@@ -43,34 +141,31 @@
43
  "PERSON",
44
  "PRODUCT",
45
  "QUANTITY",
 
46
  "TIME",
47
  "WORK_OF_ART"
48
  ],
49
- "textcat":[
50
- "Business and Finance",
51
- "Arts, Culture, and Entertainment",
52
- "Crime",
53
- "Sports",
54
- "Politics",
55
- "Science and Technology",
56
- "Health and Wellness",
57
- "Lifestyle and Fashion"
58
- ],
59
  "textcat_classify":[
60
  "ACCEPT",
61
  "REJECT"
62
  ]
63
  },
64
  "pipeline":[
 
 
 
 
65
  "tok2vec",
66
  "ner",
67
- "textcat",
68
  "textcat_classify"
69
  ],
70
  "components":[
 
 
 
 
71
  "tok2vec",
72
  "ner",
73
- "textcat",
74
  "textcat_classify"
75
  ],
76
  "disabled":[
@@ -81,28 +176,28 @@
81
  "ents_p":0.0,
82
  "ents_r":0.0,
83
  "ents_per_type":0.0,
84
- "cats_score":0.8606750392,
85
  "cats_score_desc":"F (ACCEPT)",
86
- "cats_micro_p":0.8620823621,
87
- "cats_micro_r":0.8620823621,
88
- "cats_micro_f":0.8620823621,
89
- "cats_macro_p":0.862090731,
90
- "cats_macro_r":0.8622499456,
91
- "cats_macro_f":0.8620682889,
92
- "cats_macro_auc":0.9319097902,
93
  "cats_f_per_type":{
94
  "ACCEPT":{
95
- "p":0.8513198758,
96
- "r":0.8702380952,
97
- "f":0.8606750392
98
  },
99
  "REJECT":{
100
- "p":0.8728615863,
101
- "r":0.854261796,
102
- "f":0.8634615385
103
  }
104
  },
105
- "textcat_classify_loss":0.8403124975
106
  },
107
  "requirements":[
108
 
 
1
  {
2
  "lang":"en",
3
  "name":"tako_query_filter",
4
+ "version":"0.0.2",
5
  "description":"",
6
  "author":"",
7
  "email":"",
 
16
  "name":"en_vectors"
17
  },
18
  "labels":{
19
+ "tok2vec_small":[
20
+
21
+ ],
22
+ "tagger":[
23
+ "$",
24
+ "''",
25
+ ",",
26
+ "-LRB-",
27
+ "-RRB-",
28
+ ".",
29
+ ":",
30
+ "ADD",
31
+ "AFX",
32
+ "CC",
33
+ "CD",
34
+ "DT",
35
+ "EX",
36
+ "FW",
37
+ "HYPH",
38
+ "IN",
39
+ "JJ",
40
+ "JJR",
41
+ "JJS",
42
+ "LS",
43
+ "MD",
44
+ "NFP",
45
+ "NN",
46
+ "NNP",
47
+ "NNPS",
48
+ "NNS",
49
+ "PDT",
50
+ "POS",
51
+ "PRP",
52
+ "PRP$",
53
+ "RB",
54
+ "RBR",
55
+ "RBS",
56
+ "RP",
57
+ "SYM",
58
+ "TO",
59
+ "UH",
60
+ "VB",
61
+ "VBD",
62
+ "VBG",
63
+ "VBN",
64
+ "VBP",
65
+ "VBZ",
66
+ "WDT",
67
+ "WP",
68
+ "WP$",
69
+ "WRB",
70
+ "XX",
71
+ "_SP",
72
+ "``"
73
+ ],
74
+ "parser":[
75
+ "ROOT",
76
+ "acl",
77
+ "acomp",
78
+ "advcl",
79
+ "advmod",
80
+ "agent",
81
+ "amod",
82
+ "appos",
83
+ "attr",
84
+ "aux",
85
+ "auxpass",
86
+ "case",
87
+ "cc",
88
+ "ccomp",
89
+ "compound",
90
+ "conj",
91
+ "csubj",
92
+ "csubjpass",
93
+ "dative",
94
+ "dep",
95
+ "det",
96
+ "dobj",
97
+ "expl",
98
+ "intj",
99
+ "mark",
100
+ "meta",
101
+ "neg",
102
+ "nmod",
103
+ "npadvmod",
104
+ "nsubj",
105
+ "nsubjpass",
106
+ "nummod",
107
+ "oprd",
108
+ "parataxis",
109
+ "pcomp",
110
+ "pobj",
111
+ "poss",
112
+ "preconj",
113
+ "predet",
114
+ "prep",
115
+ "prt",
116
+ "punct",
117
+ "quantmod",
118
+ "relcl",
119
+ "xcomp"
120
+ ],
121
+ "attribute_ruler":[
122
+
123
+ ],
124
  "tok2vec":[
125
 
126
  ],
127
  "ner":[
128
  "CARDINAL",
 
 
 
 
 
 
 
129
  "DATE",
130
  "EVENT",
131
  "FAC",
 
141
  "PERSON",
142
  "PRODUCT",
143
  "QUANTITY",
144
+ "STOCK_TICKER",
145
  "TIME",
146
  "WORK_OF_ART"
147
  ],
 
 
 
 
 
 
 
 
 
 
148
  "textcat_classify":[
149
  "ACCEPT",
150
  "REJECT"
151
  ]
152
  },
153
  "pipeline":[
154
+ "tok2vec_small",
155
+ "tagger",
156
+ "parser",
157
+ "attribute_ruler",
158
  "tok2vec",
159
  "ner",
 
160
  "textcat_classify"
161
  ],
162
  "components":[
163
+ "tok2vec_small",
164
+ "tagger",
165
+ "parser",
166
+ "attribute_ruler",
167
  "tok2vec",
168
  "ner",
 
169
  "textcat_classify"
170
  ],
171
  "disabled":[
 
176
  "ents_p":0.0,
177
  "ents_r":0.0,
178
  "ents_per_type":0.0,
179
+ "cats_score":0.8507157464,
180
  "cats_score_desc":"F (ACCEPT)",
181
+ "cats_micro_p":0.8531187123,
182
+ "cats_micro_r":0.8531187123,
183
+ "cats_micro_f":0.8531187123,
184
+ "cats_macro_p":0.853485064,
185
+ "cats_macro_r":0.8531187123,
186
+ "cats_macro_f":0.8530806455,
187
+ "cats_macro_auc":0.9167497439,
188
  "cats_f_per_type":{
189
  "ACCEPT":{
190
+ "p":0.8648648649,
191
+ "r":0.8370221328,
192
+ "f":0.8507157464
193
  },
194
  "REJECT":{
195
+ "p":0.8421052632,
196
+ "r":0.8692152918,
197
+ "f":0.8554455446
198
  }
199
  },
200
+ "textcat_classify_loss":0.9403656576
201
  },
202
  "requirements":[
203
 
ner/model CHANGED
Binary files a/ner/model and b/ner/model differ
 
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves�l{"0":{},"1":{"CUSTOM_ATTRIBUTE":8802,"GPE":3351,"DATE":2617,"ORG":2570,"PRODUCT":1459,"CUSTOM_SEMANTIC_FUNCTION":995,"PERSON":760,"EVENT":594,"CUSTOM_TEAM":518,"CUSTOM_STOCK_TICKER":394,"CUSTOM_SPORTS_LEAGUE":322,"NORP":260,"LOC":233,"MONEY":199,"CUSTOM_SPORTS_ROLE":125,"FAC":111,"LAW":96,"QUANTITY":90,"WORK_OF_ART":68,"PERCENT":33,"CARDINAL":19,"LANGUAGE":13,"TIME":5,"CUSTOM_SPORTS_CONFERENCE":5,"ORDINAL":2},"2":{"CUSTOM_ATTRIBUTE":8802,"GPE":3351,"DATE":2617,"ORG":2570,"PRODUCT":1459,"CUSTOM_SEMANTIC_FUNCTION":995,"PERSON":760,"EVENT":594,"CUSTOM_TEAM":518,"CUSTOM_STOCK_TICKER":394,"CUSTOM_SPORTS_LEAGUE":322,"NORP":260,"LOC":233,"MONEY":199,"CUSTOM_SPORTS_ROLE":125,"FAC":111,"LAW":96,"QUANTITY":90,"WORK_OF_ART":68,"PERCENT":33,"CARDINAL":19,"LANGUAGE":13,"TIME":5,"CUSTOM_SPORTS_CONFERENCE":5,"ORDINAL":2},"3":{"CUSTOM_ATTRIBUTE":8802,"GPE":3351,"DATE":2617,"ORG":2570,"PRODUCT":1459,"CUSTOM_SEMANTIC_FUNCTION":995,"PERSON":760,"EVENT":594,"CUSTOM_TEAM":518,"CUSTOM_STOCK_TICKER":394,"CUSTOM_SPORTS_LEAGUE":322,"NORP":260,"LOC":233,"MONEY":199,"CUSTOM_SPORTS_ROLE":125,"FAC":111,"LAW":96,"QUANTITY":90,"WORK_OF_ART":68,"PERCENT":33,"CARDINAL":19,"LANGUAGE":13,"TIME":5,"CUSTOM_SPORTS_CONFERENCE":5,"ORDINAL":2},"4":{"CUSTOM_ATTRIBUTE":8802,"GPE":3351,"DATE":2617,"ORG":2570,"PRODUCT":1459,"CUSTOM_SEMANTIC_FUNCTION":995,"PERSON":760,"EVENT":594,"CUSTOM_TEAM":518,"CUSTOM_STOCK_TICKER":394,"CUSTOM_SPORTS_LEAGUE":322,"NORP":260,"LOC":233,"MONEY":199,"CUSTOM_SPORTS_ROLE":125,"FAC":111,"LAW":96,"QUANTITY":90,"WORK_OF_ART":68,"PERCENT":33,"CARDINAL":19,"LANGUAGE":13,"TIME":5,"CUSTOM_SPORTS_CONFERENCE":5,"ORDINAL":2,"":1},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves��{"0":{},"1":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25},"2":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25},"3":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25},"4":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25,"":1},"5":{"":1}}�cfg��neg_key�
parser/cfg ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":30,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
parser/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1836fbc02b3924b2fd5f65325c58ae852ff112db1090ca724e5a801e68b85fd
3
+ size 319909
parser/moves ADDED
@@ -0,0 +1 @@
 
 
1
+ ��moves� {"0":{"":994332},"1":{"":999432},"2":{"det":172595,"nsubj":165748,"compound":116623,"amod":105184,"aux":86667,"punct":65478,"advmod":62763,"poss":36443,"mark":27941,"nummod":22598,"auxpass":15594,"prep":14001,"nsubjpass":13856,"neg":12357,"cc":10739,"nmod":9562,"advcl":9062,"npadvmod":8168,"quantmod":7101,"intj":6464,"ccomp":5896,"dobj":3427,"expl":3360,"dep":2871,"predet":1944,"parataxis":1837,"csubj":1428,"preconj":621,"pobj||prep":616,"attr":578,"meta":376,"advmod||conj":368,"dobj||xcomp":352,"acomp":284,"nsubj||ccomp":224,"dative":206,"advmod||xcomp":149,"dobj||ccomp":70,"csubjpass":64,"dobj||conj":62,"prep||conj":51,"acl":48,"prep||nsubj":41,"prep||dobj":36,"xcomp":34,"advmod||ccomp":32,"oprd":31},"3":{"punct":183790,"pobj":182191,"prep":174008,"dobj":89615,"conj":59687,"cc":51930,"ccomp":30385,"advmod":22861,"xcomp":21021,"relcl":20969,"advcl":19828,"attr":17741,"acomp":16922,"appos":15265,"case":13388,"acl":12085,"pcomp":10324,"dep":10116,"npadvmod":9796,"prt":8179,"agent":3903,"dative":3866,"nsubj":3470,"neg":2906,"amod":2839,"intj":2819,"nummod":2732,"oprd":2301,"parataxis":1261,"quantmod":319,"nmod":294,"acl||dobj":200,"prep||dobj":190,"prep||nsubj":162,"acl||nsubj":159,"appos||nsubj":145,"relcl||dobj":134,"relcl||nsubj":111,"aux":103,"expl":96,"meta":92,"appos||dobj":86,"preconj":71,"csubj":65,"prep||nsubjpass":55,"prep||advmod":54,"prep||acomp":53,"det":51,"nsubjpass":45,"relcl||pobj":42,"acl||nsubjpass":42,"mark":40,"auxpass":39,"prep||pobj":36,"relcl||nsubjpass":32,"appos||nsubjpass":31},"4":{"ROOT":111664}}�cfg��neg_key�
tagger/cfg ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "label_smoothing":0.0,
3
+ "labels":[
4
+ "$",
5
+ "''",
6
+ ",",
7
+ "-LRB-",
8
+ "-RRB-",
9
+ ".",
10
+ ":",
11
+ "ADD",
12
+ "AFX",
13
+ "CC",
14
+ "CD",
15
+ "DT",
16
+ "EX",
17
+ "FW",
18
+ "HYPH",
19
+ "IN",
20
+ "JJ",
21
+ "JJR",
22
+ "JJS",
23
+ "LS",
24
+ "MD",
25
+ "NFP",
26
+ "NN",
27
+ "NNP",
28
+ "NNPS",
29
+ "NNS",
30
+ "PDT",
31
+ "POS",
32
+ "PRP",
33
+ "PRP$",
34
+ "RB",
35
+ "RBR",
36
+ "RBS",
37
+ "RP",
38
+ "SYM",
39
+ "TO",
40
+ "UH",
41
+ "VB",
42
+ "VBD",
43
+ "VBG",
44
+ "VBN",
45
+ "VBP",
46
+ "VBZ",
47
+ "WDT",
48
+ "WP",
49
+ "WP$",
50
+ "WRB",
51
+ "XX",
52
+ "_SP",
53
+ "``"
54
+ ],
55
+ "neg_prefix":"!",
56
+ "overwrite":false
57
+ }
tagger/model ADDED
Binary file (19.8 kB). View file
 
textcat_classify/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:697c5683560f9c380ce9683e1f8be56b3548e2f7d95b9ef205dcb77ba7a3fc5b
3
- size 39143049
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c65c611aa01b463b7f99116d0b1a53cd75effb9d0bac5febef70bf3b85f0b075
3
+ size 8319359
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b26ab00bd800730dbd5328c6603e549fd33426b18a10ca6c4efd1bf2e68c7e84
3
  size 34434008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8db1e5a93c4f955f990b7f6005b11c65ac6b9efa20f2c02291ac2013d06a203
3
  size 34434008
tok2vec_small/cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+
3
+ }
tok2vec_small/model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42d8414521eaf75f817bd1b351b26039a22a912bb2617f95ead305420f2ebffd
3
+ size 6269370
vocab/strings.json CHANGED
The diff for this file is too large to render. See raw diff