noahjax commited on 17 days ago

Commit

bca1e5f

verified ·

1 Parent(s): 6bc2011

Update spaCy pipeline

Browse files

Files changed (19) hide show

.gitattributes +4 -0
README.md +14 -16
attribute_ruler/patterns +0 -0
config.cfg +13 -33
custom_textcat.py +142 -0
en_tako_query_filter-any-py3-none-any.whl +2 -2
meta.json +130 -35
ner/model +0 -0
ner/moves +1 -1
parser/cfg +13 -0
parser/model +3 -0
parser/moves +1 -0
tagger/cfg +57 -0
tagger/model +0 -0
textcat_classify/model +2 -2
tok2vec/model +1 -1
tok2vec_small/cfg +3 -0
tok2vec_small/model +3 -0
vocab/strings.json +0 -0

.gitattributes CHANGED Viewed

@@ -39,3 +39,7 @@ textcat_classify/model filter=lfs diff=lfs merge=lfs -text
 tok2vec/model filter=lfs diff=lfs merge=lfs -text
 vocab/key2row filter=lfs diff=lfs merge=lfs -text
 vocab/vectors filter=lfs diff=lfs merge=lfs -text

 tok2vec/model filter=lfs diff=lfs merge=lfs -text
 vocab/key2row filter=lfs diff=lfs merge=lfs -text
 vocab/vectors filter=lfs diff=lfs merge=lfs -text
+ner/model filter=lfs diff=lfs merge=lfs -text
+parser/model filter=lfs diff=lfs merge=lfs -text
+tok2vec_small/model filter=lfs diff=lfs merge=lfs -text
+vocab/strings.json filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -2,7 +2,6 @@
 tags:
 - spacy
 - token-classification
-- text-classification
 language:
 - en
 model-index:
@@ -25,10 +24,10 @@ model-index:
 | Feature | Description |
 | --- | --- |
 | **Name** | `en_tako_query_filter` |
-| **Version** | `0.0.1` |
 | **spaCy** | `>=3.7.5,<3.8.0` |
-| **Default Pipeline** | `tok2vec`, `ner`, `textcat`, `textcat_classify` |
-| **Components** | `tok2vec`, `ner`, `textcat`, `textcat_classify` |
 | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
 | **Sources** | n/a |
 | **License** | n/a |
@@ -38,12 +37,11 @@ model-index:
 <details>
-<summary>View label scheme (35 labels for 3 components)</summary>
 | Component | Labels |
 | --- | --- |
-| **`ner`** | `CARDINAL`, `CUSTOM_ATTRIBUTE`, `CUSTOM_SEMANTIC_FUNCTION`, `CUSTOM_SPORTS_CONFERENCE`, `CUSTOM_SPORTS_LEAGUE`, `CUSTOM_SPORTS_ROLE`, `CUSTOM_STOCK_TICKER`, `CUSTOM_TEAM`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `TIME`, `WORK_OF_ART` |
-| **`textcat`** | `Business and Finance`, `Arts, Culture, and Entertainment`, `Crime`, `Sports`, `Politics`, `Science and Technology`, `Health and Wellness`, `Lifestyle and Fashion` |
 | **`textcat_classify`** | `ACCEPT`, `REJECT` |
 </details>
@@ -56,12 +54,12 @@ model-index:
 | `ENTS_P` | 0.00 |
 | `ENTS_R` | 0.00 |
 | `ENTS_PER_TYPE` | 0.00 |
-| `CATS_SCORE` | 86.07 |
-| `CATS_MICRO_P` | 86.21 |
-| `CATS_MICRO_R` | 86.21 |
-| `CATS_MICRO_F` | 86.21 |
-| `CATS_MACRO_P` | 86.21 |
-| `CATS_MACRO_R` | 86.22 |
-| `CATS_MACRO_F` | 86.21 |
-| `CATS_MACRO_AUC` | 93.19 |
-| `TEXTCAT_CLASSIFY_LOSS` | 84.03 |

 tags:
 - spacy
 - token-classification
 language:
 - en
 model-index:
 | Feature | Description |
 | --- | --- |
 | **Name** | `en_tako_query_filter` |
+| **Version** | `0.0.2` |
 | **spaCy** | `>=3.7.5,<3.8.0` |
+| **Default Pipeline** | `tok2vec`, `ner`, `textcat_classify` |
+| **Components** | `tok2vec`, `ner`, `textcat_classify` |
 | **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
 | **Sources** | n/a |
 | **License** | n/a |
 <details>
+<summary>View label scheme (21 labels for 2 components)</summary>
 | Component | Labels |
 | --- | --- |
+| **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `STOCK_TICKER`, `TIME`, `WORK_OF_ART` |
 | **`textcat_classify`** | `ACCEPT`, `REJECT` |
 </details>
 | `ENTS_P` | 0.00 |
 | `ENTS_R` | 0.00 |
 | `ENTS_PER_TYPE` | 0.00 |
+| `CATS_SCORE` | 85.07 |
+| `CATS_MICRO_P` | 85.31 |
+| `CATS_MICRO_R` | 85.31 |
+| `CATS_MICRO_F` | 85.31 |
+| `CATS_MACRO_P` | 85.35 |
+| `CATS_MACRO_R` | 85.31 |
+| `CATS_MACRO_F` | 85.31 |
+| `CATS_MACRO_AUC` | 91.67 |
+| `TEXTCAT_CLASSIFY_LOSS` | 94.04 |

attribute_ruler/patterns ADDED Viewed

Binary file (14.7 kB). View file

config.cfg CHANGED Viewed

@@ -1,12 +1,13 @@
 [paths]
-train = "corpus/classify-train.spacy"
-dev = "corpus/classify-test.spacy"
 vectors = "en_core_web_lg"
 init_tok2vec = null
 [variables]
 wandb_project_name = "tako-query-filter"
 wandb_team_name = "tako-team"
 [system]
 gpu_allocator = "pytorch"
@@ -14,7 +15,7 @@ seed = 0
 [nlp]
 lang = "en"
-pipeline = ["tok2vec","ner","textcat","textcat_classify"]
 batch_size = 1000
 disabled = []
 before_creation = null
@@ -46,30 +47,9 @@ nO = null
 width = 256
 upstream = "*"
-[components.textcat]
-factory = "textcat"
-scorer = {"@scorers":"spacy.textcat_scorer.v2"}
-threshold = 0.0
-[components.textcat.model]
-@architectures = "spacy.TextCatEnsemble.v2"
-nO = null
-[components.textcat.model.linear_model]
-@architectures = "spacy.TextCatBOW.v3"
-exclusive_classes = false
-length = 262144
-ngram_size = 1
-no_output_layer = false
-nO = null
-[components.textcat.model.tok2vec]
-@architectures = "spacy.Tok2VecListener.v1"
-width = 256
-upstream = "*"
 [components.textcat_classify]
-factory = "textcat"
 scorer = {"@scorers":"spacy.textcat_scorer.v2"}
 threshold = 0.0
@@ -90,17 +70,17 @@ nO = null
 [components.textcat_classify.model.tok2vec.embed]
 @architectures = "spacy.MultiHashEmbed.v2"
-width = 256
 attrs = ["NORM","PREFIX","SUFFIX","SHAPE","ENT_TYPE"]
-rows = [5000,1000,2500,2500,1000]
 include_static_vectors = true
 [components.textcat_classify.model.tok2vec.encode]
 @architectures = "spacy.MaxoutWindowEncoder.v2"
-width = 256
 window_size = 1
 maxout_pieces = 3
-depth = 8
 [components.tok2vec]
 factory = "tok2vec"
@@ -150,12 +130,12 @@ seed = ${system.seed}
 gpu_allocator = ${system.gpu_allocator}
 dropout = 0.1
 accumulate_gradient = 1
-patience = 3000
 max_epochs = 0
 max_steps = 20000
 eval_frequency = 100
-frozen_components = ["tok2vec","ner","textcat"]
-annotating_components = ["ner","textcat"]
 before_to_disk = null
 before_update = null

 [paths]
+train = "corpus/filter-train.spacy"
+dev = "corpus/filter-test.spacy"
 vectors = "en_core_web_lg"
 init_tok2vec = null
 [variables]
 wandb_project_name = "tako-query-filter"
 wandb_team_name = "tako-team"
+base_model = "ner/dashing-wind"
 [system]
 gpu_allocator = "pytorch"
 [nlp]
 lang = "en"
+pipeline = ["tok2vec","ner","textcat_classify"]
 batch_size = 1000
 disabled = []
 before_creation = null
 width = 256
 upstream = "*"
 [components.textcat_classify]
+factory = "weighted_textcat"
+class_weights = [0.67,0.33]
 scorer = {"@scorers":"spacy.textcat_scorer.v2"}
 threshold = 0.0
 [components.textcat_classify.model.tok2vec.embed]
 @architectures = "spacy.MultiHashEmbed.v2"
+width = 128
 attrs = ["NORM","PREFIX","SUFFIX","SHAPE","ENT_TYPE"]
+rows = [2000,500,1000,500,500]
 include_static_vectors = true
 [components.textcat_classify.model.tok2vec.encode]
 @architectures = "spacy.MaxoutWindowEncoder.v2"
+width = 128
 window_size = 1
 maxout_pieces = 3
+depth = 4
 [components.tok2vec]
 factory = "tok2vec"
 gpu_allocator = ${system.gpu_allocator}
 dropout = 0.1
 accumulate_gradient = 1
+patience = 1000
 max_epochs = 0
 max_steps = 20000
 eval_frequency = 100
+frozen_components = ["tagger","attribute_ruler","parser","tok2vec","ner"]
+annotating_components = ["ner"]
 before_to_disk = null
 before_update = null

custom_textcat.py ADDED Viewed

	@@ -0,0 +1,142 @@

+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+from spacy.util import registry
+from thinc.types import Floats2d
+from spacy.tokens import Doc
+from spacy.pipeline import TextCategorizer
+from spacy.training import Example, validate_examples
+from spacy.pipeline.textcat import textcat_score
+from spacy.vocab import Vocab
+from spacy.scorer import Scorer
+from spacy.language import Language
+from thinc.api import Model
+import numpy
+@Language.factory(
+    "weighted_textcat",
+    assigns=["doc.cats"],
+    default_config={
+        "threshold": 0.0,
+        "scorer": {"@scorers": "spacy.textcat_scorer.v2"},
+    },
+    default_score_weights={
+        "cats_score": 1.0,
+        "cats_score_desc": None,
+        "cats_micro_p": None,
+        "cats_micro_r": None,
+        "cats_micro_f": None,
+        "cats_macro_p": None,
+        "cats_macro_r": None,
+        "cats_macro_f": None,
+        "cats_macro_auc": None,
+        "cats_f_per_type": None,
+    },
+)
+def make_textcat(
+    nlp: Language,
+    name: str,
+    model: Model[List[Doc], List[Floats2d]],
+    threshold: float,
+    scorer: Optional[Callable],
+    class_weights: Optional[List],
+) -> "TextCategorizer":
+    """Create a TextCategorizer component. The text categorizer predicts categories
+    over a whole document. It can learn one or more labels, and the labels are considered
+    to be mutually exclusive (i.e. one true label per doc).
+    model (Model[List[Doc], List[Floats2d]]): A model instance that predicts
+        scores for each category.
+    threshold (float): Cutoff to consider a prediction "positive".
+    scorer (Optional[Callable]): The scoring method.
+    """
+    if class_weights == "null":
+        class_weights = None
+    return CustomTextcat(
+        nlp.vocab,
+        model,
+        name,
+        threshold=threshold,
+        scorer=scorer,
+        weights=class_weights,
+    )
+def textcat_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
+    return Scorer.score_cats(
+        examples,
+        "cats",
+        multi_label=False,
+        **kwargs,
+    )
+@registry.scorers("spacy.textcat_scorer.v2")
+def make_textcat_scorer():
+    return textcat_score
+class CustomTextcat(TextCategorizer):
+    def __init__(
+        self,
+        vocab: Vocab,
+        model: Model,
+        name: str = "textcat",
+        *,
+        threshold: float,
+        scorer: Optional[Callable] = textcat_score,
+        weights: Optional[List[float]] = None,
+    ) -> None:
+        """Initialize a text categorizer for single-label classification.
+        vocab (Vocab): The shared vocabulary.
+        model (thinc.api.Model): The Thinc Model powering the pipeline component.
+        name (str): The component instance name, used to add entries to the
+            losses during training.
+        threshold (float): Unused, not needed for single-label (exclusive
+            classes) classification.
+        scorer (Optional[Callable]): The scoring method. Defaults to
+                Scorer.score_cats for the attribute "cats".
+        DOCS: https://spacy.io/api/textcategorizer#init
+        """
+        self.vocab = vocab
+        self.model = model
+        self.name = name
+        self._rehearsal_model = None
+        cfg: Dict[str, Any] = {
+            "labels": [],
+            "threshold": threshold,
+            "positive_label": None,
+        }
+        self.cfg = dict(cfg)
+        self.scorer = scorer
+        if weights is not None:
+            print(f"Using weights: {weights}")
+            self.weights = numpy.array(weights)
+    def get_loss(self, examples: Iterable[Example], scores) -> Tuple[float, float]:
+        """Find the loss and gradient of loss for the batch of documents and
+        their predicted scores.
+        examples (Iterable[Examples]): The batch of examples.
+        scores: Scores representing the model's predictions.
+        RETURNS (Tuple[float, float]): The loss and the gradient.
+        DOCS: https://spacy.io/api/textcategorizer#get_loss
+        """
+        validate_examples(examples, "TextCategorizer.get_loss")
+        self._validate_categories(examples)
+        truths, not_missing = self._examples_to_truth(examples)
+        not_missing = self.model.ops.asarray(not_missing)  # type: ignore
+        d_scores = scores - truths
+        d_scores *= not_missing
+        weights = self.model.ops.asarray(self.weights)  # type: ignore
+        if weights is not None:
+            squared = d_scores**2
+            mean_square_error = numpy.sum(squared * weights) / (
+                numpy.sum(weights) * len(squared)
+            )
+            d_scores *= weights
+        else:
+            mean_square_error = (d_scores**2).mean()
+        return float(mean_square_error), d_scores

en_tako_query_filter-any-py3-none-any.whl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ac65b0e4d1a3ec3bba78a9d9a540110cb3ae0bb0e3696a724bd3d5267510e79
-size 643426692

 version https://git-lfs.github.com/spec/v1
+oid sha256:be4b077cc99a883fdd1970bcf64c5c31e1ed0de729d07dc2bde0ee67d5faaabe
+size 619534225

meta.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "lang":"en",
   "name":"tako_query_filter",
-  "version":"0.0.1",
   "description":"",
   "author":"",
   "email":"",
@@ -16,18 +16,116 @@
     "name":"en_vectors"
   },
   "labels":{
     "tok2vec":[
     ],
     "ner":[
       "CARDINAL",
-      "CUSTOM_ATTRIBUTE",
-      "CUSTOM_SEMANTIC_FUNCTION",
-      "CUSTOM_SPORTS_CONFERENCE",
-      "CUSTOM_SPORTS_LEAGUE",
-      "CUSTOM_SPORTS_ROLE",
-      "CUSTOM_STOCK_TICKER",
-      "CUSTOM_TEAM",
       "DATE",
       "EVENT",
       "FAC",
@@ -43,34 +141,31 @@
       "PERSON",
       "PRODUCT",
       "QUANTITY",
       "TIME",
       "WORK_OF_ART"
     ],
-    "textcat":[
-      "Business and Finance",
-      "Arts, Culture, and Entertainment",
-      "Crime",
-      "Sports",
-      "Politics",
-      "Science and Technology",
-      "Health and Wellness",
-      "Lifestyle and Fashion"
-    ],
     "textcat_classify":[
       "ACCEPT",
       "REJECT"
     ]
   },
   "pipeline":[
     "tok2vec",
     "ner",
-    "textcat",
     "textcat_classify"
   ],
   "components":[
     "tok2vec",
     "ner",
-    "textcat",
     "textcat_classify"
   ],
   "disabled":[
@@ -81,28 +176,28 @@
     "ents_p":0.0,
     "ents_r":0.0,
     "ents_per_type":0.0,
-    "cats_score":0.8606750392,
     "cats_score_desc":"F (ACCEPT)",
-    "cats_micro_p":0.8620823621,
-    "cats_micro_r":0.8620823621,
-    "cats_micro_f":0.8620823621,
-    "cats_macro_p":0.862090731,
-    "cats_macro_r":0.8622499456,
-    "cats_macro_f":0.8620682889,
-    "cats_macro_auc":0.9319097902,
     "cats_f_per_type":{
       "ACCEPT":{
-        "p":0.8513198758,
-        "r":0.8702380952,
-        "f":0.8606750392
       },
       "REJECT":{
-        "p":0.8728615863,
-        "r":0.854261796,
-        "f":0.8634615385
       }
     },
-    "textcat_classify_loss":0.8403124975
   },
   "requirements":[

 {
   "lang":"en",
   "name":"tako_query_filter",
+  "version":"0.0.2",
   "description":"",
   "author":"",
   "email":"",
     "name":"en_vectors"
   },
   "labels":{
+    "tok2vec_small":[
+    ],
+    "tagger":[
+      "$",
+      "''",
+      ",",
+      "-LRB-",
+      "-RRB-",
+      ".",
+      ":",
+      "ADD",
+      "AFX",
+      "CC",
+      "CD",
+      "DT",
+      "EX",
+      "FW",
+      "HYPH",
+      "IN",
+      "JJ",
+      "JJR",
+      "JJS",
+      "LS",
+      "MD",
+      "NFP",
+      "NN",
+      "NNP",
+      "NNPS",
+      "NNS",
+      "PDT",
+      "POS",
+      "PRP",
+      "PRP$",
+      "RB",
+      "RBR",
+      "RBS",
+      "RP",
+      "SYM",
+      "TO",
+      "UH",
+      "VB",
+      "VBD",
+      "VBG",
+      "VBN",
+      "VBP",
+      "VBZ",
+      "WDT",
+      "WP",
+      "WP$",
+      "WRB",
+      "XX",
+      "_SP",
+      "``"
+    ],
+    "parser":[
+      "ROOT",
+      "acl",
+      "acomp",
+      "advcl",
+      "advmod",
+      "agent",
+      "amod",
+      "appos",
+      "attr",
+      "aux",
+      "auxpass",
+      "case",
+      "cc",
+      "ccomp",
+      "compound",
+      "conj",
+      "csubj",
+      "csubjpass",
+      "dative",
+      "dep",
+      "det",
+      "dobj",
+      "expl",
+      "intj",
+      "mark",
+      "meta",
+      "neg",
+      "nmod",
+      "npadvmod",
+      "nsubj",
+      "nsubjpass",
+      "nummod",
+      "oprd",
+      "parataxis",
+      "pcomp",
+      "pobj",
+      "poss",
+      "preconj",
+      "predet",
+      "prep",
+      "prt",
+      "punct",
+      "quantmod",
+      "relcl",
+      "xcomp"
+    ],
+    "attribute_ruler":[
+    ],
     "tok2vec":[
     ],
     "ner":[
       "CARDINAL",
       "DATE",
       "EVENT",
       "FAC",
       "PERSON",
       "PRODUCT",
       "QUANTITY",
+      "STOCK_TICKER",
       "TIME",
       "WORK_OF_ART"
     ],
     "textcat_classify":[
       "ACCEPT",
       "REJECT"
     ]
   },
   "pipeline":[
+    "tok2vec_small",
+    "tagger",
+    "parser",
+    "attribute_ruler",
     "tok2vec",
     "ner",
     "textcat_classify"
   ],
   "components":[
+    "tok2vec_small",
+    "tagger",
+    "parser",
+    "attribute_ruler",
     "tok2vec",
     "ner",
     "textcat_classify"
   ],
   "disabled":[
     "ents_p":0.0,
     "ents_r":0.0,
     "ents_per_type":0.0,
+    "cats_score":0.8507157464,
     "cats_score_desc":"F (ACCEPT)",
+    "cats_micro_p":0.8531187123,
+    "cats_micro_r":0.8531187123,
+    "cats_micro_f":0.8531187123,
+    "cats_macro_p":0.853485064,
+    "cats_macro_r":0.8531187123,
+    "cats_macro_f":0.8530806455,
+    "cats_macro_auc":0.9167497439,
     "cats_f_per_type":{
       "ACCEPT":{
+        "p":0.8648648649,
+        "r":0.8370221328,
+        "f":0.8507157464
       },
       "REJECT":{
+        "p":0.8421052632,
+        "r":0.8692152918,
+        "f":0.8554455446
       }
     },
+    "textcat_classify_loss":0.9403656576
   },
   "requirements":[

ner/model CHANGED Viewed

Binary files a/ner/model and b/ner/model differ

ner/moves CHANGED Viewed

@@ -1 +1 @@

- ��moves~~�l~~{"0":{},"1":{"~~CUSTOM_ATTRIBUTE~~":~~8802~~,"GPE":~~3351~~,"~~DATE~~":~~2617~~,"~~ORG~~":~~2570~~,"~~PRODUCT~~":~~1459~~,"~~CUSTOM_SEMANTIC_FUNCTION~~":~~995~~,"~~PERSON":760,"~~EVENT":~~594~~,"~~CUSTOM_TEAM~~":~~518~~,"~~CUSTOM_STOCK_TICKER~~":~~394~~,"~~CUSTOM_SPORTS_LEAGUE~~":~~322~~,"~~NORP~~":~~260~~,"LOC":~~233~~,"~~MONEY~~":~~199~~,"~~CUSTOM_SPORTS_ROLE":125,"~~FAC":~~111~~,"~~LAW":96,"~~QUANTITY":90,"~~WORK_OF_ART":68,"PERCENT":33,"~~CARDINAL":19,"~~LANGUAGE~~":13,"TIME":5,"~~CUSTOM_SPORTS_CONFERENCE~~":~~5,"ORDINAL":2~~},"2":{"~~CUSTOM_ATTRIBUTE~~":~~8802~~,"GPE":~~3351~~,"~~DATE~~":~~2617~~,"~~ORG~~":~~2570~~,"~~PRODUCT~~":~~1459~~,"~~CUSTOM_SEMANTIC_FUNCTION~~":~~995~~,"~~PERSON":760,"~~EVENT":~~594~~,"~~CUSTOM_TEAM~~":~~518~~,"~~CUSTOM_STOCK_TICKER~~":~~394~~,"~~CUSTOM_SPORTS_LEAGUE~~":~~322~~,"~~NORP~~":~~260~~,"LOC":~~233~~,"~~MONEY~~":~~199~~,"~~CUSTOM_SPORTS_ROLE":125,"~~FAC":~~111~~,"~~LAW":96,"~~QUANTITY":90,"~~WORK_OF_ART":68,"PERCENT":33,"~~CARDINAL":19,"~~LANGUAGE~~":13,"TIME":5,"~~CUSTOM_SPORTS_CONFERENCE~~":~~5,"ORDINAL":2~~},"3":{"~~CUSTOM_ATTRIBUTE~~":~~8802~~,"GPE":~~3351~~,"~~DATE~~":~~2617~~,"~~ORG~~":~~2570~~,"~~PRODUCT~~":~~1459~~,"~~CUSTOM_SEMANTIC_FUNCTION~~":~~995~~,"~~PERSON":760,"~~EVENT":~~594~~,"~~CUSTOM_TEAM~~":~~518~~,"~~CUSTOM_STOCK_TICKER~~":~~394~~,"~~CUSTOM_SPORTS_LEAGUE~~":~~322~~,"~~NORP~~":~~260~~,"LOC":~~233~~,"~~MONEY~~":~~199~~,"~~CUSTOM_SPORTS_ROLE":125,"~~FAC":~~111~~,"~~LAW":96,"~~QUANTITY":90,"~~WORK_OF_ART":68,"PERCENT":33,"~~CARDINAL":19,"~~LANGUAGE~~":13,"TIME":5,"~~CUSTOM_SPORTS_CONFERENCE~~":~~5,"ORDINAL":2~~},"4":{"~~CUSTOM_ATTRIBUTE~~":~~8802~~,"GPE":~~3351~~,"~~DATE~~":~~2617~~,"~~ORG~~":~~2570~~,"~~PRODUCT~~":~~1459~~,"~~CUSTOM_SEMANTIC_FUNCTION~~":~~995~~,"~~PERSON":760,"~~EVENT":~~594~~,"~~CUSTOM_TEAM~~":~~518~~,"~~CUSTOM_STOCK_TICKER~~":~~394~~,"~~CUSTOM_SPORTS_LEAGUE~~":~~322~~,"~~NORP~~":~~260~~,"LOC":~~233~~,"~~MONEY~~":~~199~~,"~~CUSTOM_SPORTS_ROLE":125,"~~FAC":~~111~~,"~~LAW":96,"~~QUANTITY":90,"~~WORK_OF_ART":68,"PERCENT":33,"~~CARDINAL":19,"~~LANGUAGE~~":13,"TIME":5,"~~CUSTOM_SPORTS_CONFERENCE~~":5,"~~ORDINAL~~":~~2,"":~~1},"5":{"":1}}�cfg��neg_key�

+ ��moves��{"0":{},"1":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25},"2":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25},"3":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25},"4":{"ORG":32008,"GPE":3728,"PERSON":1105,"DATE":850,"WORK_OF_ART":686,"PRODUCT":585,"EVENT":283,"MONEY":214,"NORP":179,"STOCK_TICKER":156,"LAW":129,"LOC":111,"PERCENT":88,"FAC":75,"QUANTITY":60,"CARDINAL":57,"ORDINAL":42,"TIME":27,"LANGUAGE":25,"":1},"5":{"":1}}�cfg��neg_key�

parser/cfg ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "moves":null,
+  "update_with_oracle_cut_size":100,
+  "multitasks":[
+  ],
+  "min_action_freq":30,
+  "learn_tokens":false,
+  "beam_width":1,
+  "beam_density":0.0,
+  "beam_update_prob":0.0,
+  "incorrect_spans_key":null
+}

parser/model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1836fbc02b3924b2fd5f65325c58ae852ff112db1090ca724e5a801e68b85fd
+size 319909

parser/moves ADDED Viewed

	@@ -0,0 +1 @@

+ ��moves�{"0":{"":994332},"1":{"":999432},"2":{"det":172595,"nsubj":165748,"compound":116623,"amod":105184,"aux":86667,"punct":65478,"advmod":62763,"poss":36443,"mark":27941,"nummod":22598,"auxpass":15594,"prep":14001,"nsubjpass":13856,"neg":12357,"cc":10739,"nmod":9562,"advcl":9062,"npadvmod":8168,"quantmod":7101,"intj":6464,"ccomp":5896,"dobj":3427,"expl":3360,"dep":2871,"predet":1944,"parataxis":1837,"csubj":1428,"preconj":621,"pobj||prep":616,"attr":578,"meta":376,"advmod||conj":368,"dobj||xcomp":352,"acomp":284,"nsubj||ccomp":224,"dative":206,"advmod||xcomp":149,"dobj||ccomp":70,"csubjpass":64,"dobj||conj":62,"prep||conj":51,"acl":48,"prep||nsubj":41,"prep||dobj":36,"xcomp":34,"advmod||ccomp":32,"oprd":31},"3":{"punct":183790,"pobj":182191,"prep":174008,"dobj":89615,"conj":59687,"cc":51930,"ccomp":30385,"advmod":22861,"xcomp":21021,"relcl":20969,"advcl":19828,"attr":17741,"acomp":16922,"appos":15265,"case":13388,"acl":12085,"pcomp":10324,"dep":10116,"npadvmod":9796,"prt":8179,"agent":3903,"dative":3866,"nsubj":3470,"neg":2906,"amod":2839,"intj":2819,"nummod":2732,"oprd":2301,"parataxis":1261,"quantmod":319,"nmod":294,"acl||dobj":200,"prep||dobj":190,"prep||nsubj":162,"acl||nsubj":159,"appos||nsubj":145,"relcl||dobj":134,"relcl||nsubj":111,"aux":103,"expl":96,"meta":92,"appos||dobj":86,"preconj":71,"csubj":65,"prep||nsubjpass":55,"prep||advmod":54,"prep||acomp":53,"det":51,"nsubjpass":45,"relcl||pobj":42,"acl||nsubjpass":42,"mark":40,"auxpass":39,"prep||pobj":36,"relcl||nsubjpass":32,"appos||nsubjpass":31},"4":{"ROOT":111664}}�cfg��neg_key�

tagger/cfg ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "label_smoothing":0.0,
+  "labels":[
+    "$",
+    "''",
+    ",",
+    "-LRB-",
+    "-RRB-",
+    ".",
+    ":",
+    "ADD",
+    "AFX",
+    "CC",
+    "CD",
+    "DT",
+    "EX",
+    "FW",
+    "HYPH",
+    "IN",
+    "JJ",
+    "JJR",
+    "JJS",
+    "LS",
+    "MD",
+    "NFP",
+    "NN",
+    "NNP",
+    "NNPS",
+    "NNS",
+    "PDT",
+    "POS",
+    "PRP",
+    "PRP$",
+    "RB",
+    "RBR",
+    "RBS",
+    "RP",
+    "SYM",
+    "TO",
+    "UH",
+    "VB",
+    "VBD",
+    "VBG",
+    "VBN",
+    "VBP",
+    "VBZ",
+    "WDT",
+    "WP",
+    "WP$",
+    "WRB",
+    "XX",
+    "_SP",
+    "``"
+  ],
+  "neg_prefix":"!",
+  "overwrite":false
+}

tagger/model ADDED Viewed

Binary file (19.8 kB). View file

textcat_classify/model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:697c5683560f9c380ce9683e1f8be56b3548e2f7d95b9ef205dcb77ba7a3fc5b
-size 39143049

 version https://git-lfs.github.com/spec/v1
+oid sha256:c65c611aa01b463b7f99116d0b1a53cd75effb9d0bac5febef70bf3b85f0b075
+size 8319359

tok2vec/model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b26ab00bd800730dbd5328c6603e549fd33426b18a10ca6c4efd1bf2e68c7e84
 size 34434008

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8db1e5a93c4f955f990b7f6005b11c65ac6b9efa20f2c02291ac2013d06a203
 size 34434008

tok2vec_small/cfg ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ {
2	+
3	+ }

tok2vec_small/model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42d8414521eaf75f817bd1b351b26039a22a912bb2617f95ead305420f2ebffd
+size 6269370

vocab/strings.json CHANGED Viewed

The diff for this file is too large to render. See raw diff