Update spaCy pipeline

Browse files

Files changed (12) hide show

README.md +61 -35
config.cfg +89 -4
id_core_news_sm-any-py3-none-any.whl +2 -2
meta.json +478 -109
morphologizer/model +0 -0
ner/model +0 -0
ner/moves +1 -1
parser/model +1 -1
tagger/model +0 -0
tok2vec/model +1 -1
trainable_lemmatizer/model +0 -0
vocab/strings.json +0 -0

README.md CHANGED Viewed

@@ -13,43 +13,70 @@ model-index:
     metrics:
     - name: NER Precision
       type: precision
-      value: 0.6721056721
     - name: NER Recall
       type: recall
-      value: 0.6040502793
     - name: NER F Score
       type: f_score
-      value: 0.6362633321
     - name: TAG (XPOS) Accuracy
       type: accuracy
-      value: 0.9051536414
     - name: POS (UPOS) Accuracy
       type: accuracy
-      value: 0.9125297415
     - name: Morph (UFeats) Accuracy
       type: accuracy
-      value: 0.9296115526
     - name: Lemma Accuracy
       type: accuracy
-      value: 0.9369920335
     - name: Unlabeled Attachment Score (UAS)
       type: f_score
-      value: 0.7753785754
     - name: Labeled Attachment Score (LAS)
       type: f_score
-      value: 0.6871555348
     - name: Sentences F-Score
       type: f_score
-      value: 0.857881137
 ---
 | Feature | Description |
 | --- | --- |
 | **Name** | `id_core_news_sm` |
-| **Version** | `0.0.1` |
 | **spaCy** | `>=3.7.4,<3.8.0` |
-| **Default Pipeline** | `tok2vec`, `ner` |
-| **Components** | `tok2vec`, `ner` |
 | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
 | **Sources** | n/a |
 | **License** | n/a |
@@ -59,11 +86,10 @@ model-index:
 <details>
-<summary>View label scheme (18 labels for 1 components)</summary>
 | Component | Labels |
 | --- | --- |
-| **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `TIME`, `WORK_OF_ART` |
 | **`tagger`** | `APP`, `ASP`, `ASP+PS3`, `ASS`, `B--`, `B--+PS3`, `CC-`, `CCONJ`, `CD-`, `CO-`, `D--`, `D--+PS3`, `F--`, `F--+PS2`, `G--`, `G--+PS3`, `H--`, `I--`, `M--`, `M--+PS3`, `NOUN`, `NPD`, `NSD`, `NSD+PS3`, `NSF`, `NSM`, `NUM`, `O--`, `PP1`, `PP2`, `PP3`, `PROPN`, `PS1`, `PS1+VSA`, `PS2`, `PS3`, `R--`, `R--+PS3`, `S--`, `SYM`, `T--`, `VERB`, `VPA`, `VSA`, `VSA+PS2`, `VSA+PS3`, `VSP`, `W--`, `X--`, `Z--` |
 | **`morphologizer`** | `POS=PROPN`, `POS=AUX`, `Definite=Ind\|POS=DET\|PronType=Art`, `Number=Sing\|POS=NOUN`, `POS=PRON\|PronType=Rel`, `Mood=Ind\|POS=VERB\|Voice=Pass`, `POS=ADP`, `POS=PUNCT`, `POS=NOUN`, `POS=ADV`, `POS=CCONJ`, `POS=SCONJ`, `Mood=Ind\|POS=VERB\|Voice=Act`, `POS=VERB`, `POS=DET\|PronType=Tot`, `Number=Sing\|POS=PRON\|Person=3\|PronType=Prs`, `Number=Plur\|POS=PRON\|Person=3\|PronType=Prs`, `POS=PRON\|PronType=Prs\|Reflex=Yes`, `POS=DET\|PronType=Dem`, `NumType=Card\|POS=NUM`, `POS=ADJ`, `Number=Plur\|POS=DET\|PronType=Ind`, `NumType=Card\|POS=NUM\|PronType=Tot`, `POS=PART\|Polarity=Neg`, `POS=PRON\|PronType=Int`, `NumType=Ord\|POS=ADJ`, `POS=PART`, `POS=PRON\|PronType=Dem`, `POS=DET\|PronType=Ind`, `Number=Plur\|POS=NOUN`, `Number=Sing\|POS=PRON\|Person=1\|Polite=Form\|PronType=Prs`, `POS=ADV\|PronType=Int`, `Clusivity=In\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Definite=Def\|POS=DET\|PronType=Art`, `POS=SYM`, `Degree=Sup\|POS=ADJ`, `POS=INTJ`, `Number=Sing\|POS=PRON\|Person=2\|Polite=Infm\|PronType=Prs`, `POS=ADV\|PronType=Ind`, `Number=Sing\|POS=PRON\|Person=3\|Polite=Form\|PronType=Prs`, `Number=Sing\|POS=PRON\|Person=1\|Polite=Infm\|PronType=Prs`, `Number=Sing\|POS=PRON\|PronType=Ind`, `POS=VERB\|Voice=Act`, `POS=DET\|PronType=Emp`, `POS=VERB\|Voice=Pass`, `POS=ADV\|PronType=Dem`, `POS=NOUN\|Typo=Yes`, `POS=ADP\|Typo=Yes`, `Number=Plur\|POS=PRON\|PronType=Ind`, `POS=VERB\|Typo=Yes\|Voice=Pass`, `POS=X`, `POS=PRON\|PronType=Tot`, `POS=SCONJ\|Typo=Yes`, `Number=Plur\|POS=PRON\|Person=2\|Polite=Infm\|PronType=Prs`, `NumType=Card\|POS=NUM\|Typo=Yes`, `Clusivity=Ex\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Number=Sing\|POS=PRON\|Person=2\|Polite=Form\|PronType=Prs`, `Foreign=Yes\|POS=X`, `POS=ADV\|PronType=Rel`, `Mood=Imp\|POS=VERB\|Voice=Act`, `Number=Sing\|POS=NOUN\|Typo=Yes`, `POS=PROPN\|Typo=Yes`, `POS=DET`, `Number=Sing\|POS=DET\|PronType=Ind`, `POS=DET\|PronType=Ind\|Typo=Yes`, `Abbr=Yes\|POS=DET\|PronType=Dem`, `POS=PRON\|PronType=Ind`, `POS=VERB\|Typo=Yes`, `Abbr=Yes\|POS=PROPN`, `Abbr=Yes\|POS=PRON\|PronType=Rel`, `Number=Plur\|POS=PRON\|PronType=Int`, `Abbr=Yes\|POS=PART\|Polarity=Neg`, `POS=ADV\|PronType=Tot`, `Abbr=Yes\|POS=ADV`, `POS=ADV\|Typo=Yes`, `POS=X\|Typo=Yes`, `Number=Sing\|POS=PRON\|Person=2\|PronType=Prs`, `POS=ADV\|PronType=Int\|Typo=Yes`, `NumType=Ord\|POS=ADJ\|Typo=Yes` |
 | **`parser`** | `ROOT`, `acl`, `acl:relcl`, `advcl`, `advmod`, `advmod:emph`, `amod`, `appos`, `aux`, `case`, `case:adv`, `cc`, `ccomp`, `compound`, `conj`, `cop`, `dep`, `det`, `fixed`, `flat`, `flat:foreign`, `flat:name`, `mark`, `nmod`, `nmod:lmod`, `nmod:poss`, `nmod:tmod`, `nsubj`, `nsubj:pass`, `nummod`, `obj`, `obl`, `obl:agent`, `obl:tmod`, `parataxis`, `punct`, `xcomp` |
@@ -74,22 +100,22 @@ model-index:
 | Type | Score |
 | --- | --- |
-| `ENTS_F` | 63.63 |
-| `ENTS_P` | 67.21 |
-| `ENTS_R` | 60.41 |
-| `TOK2VEC_LOSS` | 45767.58 |
-| `NER_LOSS` | 127721.43 |
-| `TAG_ACC` | 90.52 |
-| `POS_ACC` | 91.25 |
-| `MORPH_ACC` | 92.96 |
-| `LEMMA_ACC` | 93.70 |
-| `DEP_UAS` | 77.54 |
-| `DEP_LAS` | 68.72 |
-| `SENTS_P` | 82.72 |
-| `SENTS_R` | 89.09 |
-| `SENTS_F` | 85.79 |
-| `TOK2VEC_LOSS` | 756743.38 |
-| `TAGGER_LOSS` | 73614.38 |
-| `MORPHOLOGIZER_LOSS` | 155689.33 |
-| `TRAINABLE_LEMMATIZER_LOSS` | 35033.93 |
-| `PARSER_LOSS` | 1037857.66 |

     metrics:
     - name: NER Precision
       type: precision
+      value: 0.0
     - name: NER Recall
       type: recall
+      value: 0.0
     - name: NER F Score
       type: f_score
+      value: 0.0
+  - task:
+      name: TAG
+      type: token-classification
+    metrics:
     - name: TAG (XPOS) Accuracy
       type: accuracy
+      value: 0.9058429775
+  - task:
+      name: POS
+      type: token-classification
+    metrics:
     - name: POS (UPOS) Accuracy
       type: accuracy
+      value: 0.911077953
+  - task:
+      name: MORPH
+      type: token-classification
+    metrics:
     - name: Morph (UFeats) Accuracy
       type: accuracy
+      value: 0.924529063
+  - task:
+      name: LEMMA
+      type: token-classification
+    metrics:
     - name: Lemma Accuracy
       type: accuracy
+      value: 0.9356240444
+  - task:
+      name: UNLABELED_DEPENDENCIES
+      type: token-classification
+    metrics:
     - name: Unlabeled Attachment Score (UAS)
       type: f_score
+      value: 0.779082277
+  - task:
+      name: LABELED_DEPENDENCIES
+      type: token-classification
+    metrics:
     - name: Labeled Attachment Score (LAS)
       type: f_score
+      value: 0.6971807939
+  - task:
+      name: SENTS
+      type: token-classification
+    metrics:
     - name: Sentences F-Score
       type: f_score
+      value: 0.9099018733
 ---
 | Feature | Description |
 | --- | --- |
 | **Name** | `id_core_news_sm` |
+| **Version** | `0.0.2` |
 | **spaCy** | `>=3.7.4,<3.8.0` |
+| **Default Pipeline** | `tok2vec`, `ner`, `tagger`, `morphologizer`, `trainable_lemmatizer`, `parser` |
+| **Components** | `tok2vec`, `ner`, `tagger`, `morphologizer`, `trainable_lemmatizer`, `parser` |
 | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
 | **Sources** | n/a |
 | **License** | n/a |
 <details>
+<summary>View label scheme (166 labels for 3 components)</summary>
 | Component | Labels |
 | --- | --- |
 | **`tagger`** | `APP`, `ASP`, `ASP+PS3`, `ASS`, `B--`, `B--+PS3`, `CC-`, `CCONJ`, `CD-`, `CO-`, `D--`, `D--+PS3`, `F--`, `F--+PS2`, `G--`, `G--+PS3`, `H--`, `I--`, `M--`, `M--+PS3`, `NOUN`, `NPD`, `NSD`, `NSD+PS3`, `NSF`, `NSM`, `NUM`, `O--`, `PP1`, `PP2`, `PP3`, `PROPN`, `PS1`, `PS1+VSA`, `PS2`, `PS3`, `R--`, `R--+PS3`, `S--`, `SYM`, `T--`, `VERB`, `VPA`, `VSA`, `VSA+PS2`, `VSA+PS3`, `VSP`, `W--`, `X--`, `Z--` |
 | **`morphologizer`** | `POS=PROPN`, `POS=AUX`, `Definite=Ind\|POS=DET\|PronType=Art`, `Number=Sing\|POS=NOUN`, `POS=PRON\|PronType=Rel`, `Mood=Ind\|POS=VERB\|Voice=Pass`, `POS=ADP`, `POS=PUNCT`, `POS=NOUN`, `POS=ADV`, `POS=CCONJ`, `POS=SCONJ`, `Mood=Ind\|POS=VERB\|Voice=Act`, `POS=VERB`, `POS=DET\|PronType=Tot`, `Number=Sing\|POS=PRON\|Person=3\|PronType=Prs`, `Number=Plur\|POS=PRON\|Person=3\|PronType=Prs`, `POS=PRON\|PronType=Prs\|Reflex=Yes`, `POS=DET\|PronType=Dem`, `NumType=Card\|POS=NUM`, `POS=ADJ`, `Number=Plur\|POS=DET\|PronType=Ind`, `NumType=Card\|POS=NUM\|PronType=Tot`, `POS=PART\|Polarity=Neg`, `POS=PRON\|PronType=Int`, `NumType=Ord\|POS=ADJ`, `POS=PART`, `POS=PRON\|PronType=Dem`, `POS=DET\|PronType=Ind`, `Number=Plur\|POS=NOUN`, `Number=Sing\|POS=PRON\|Person=1\|Polite=Form\|PronType=Prs`, `POS=ADV\|PronType=Int`, `Clusivity=In\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Definite=Def\|POS=DET\|PronType=Art`, `POS=SYM`, `Degree=Sup\|POS=ADJ`, `POS=INTJ`, `Number=Sing\|POS=PRON\|Person=2\|Polite=Infm\|PronType=Prs`, `POS=ADV\|PronType=Ind`, `Number=Sing\|POS=PRON\|Person=3\|Polite=Form\|PronType=Prs`, `Number=Sing\|POS=PRON\|Person=1\|Polite=Infm\|PronType=Prs`, `Number=Sing\|POS=PRON\|PronType=Ind`, `POS=VERB\|Voice=Act`, `POS=DET\|PronType=Emp`, `POS=VERB\|Voice=Pass`, `POS=ADV\|PronType=Dem`, `POS=NOUN\|Typo=Yes`, `POS=ADP\|Typo=Yes`, `Number=Plur\|POS=PRON\|PronType=Ind`, `POS=VERB\|Typo=Yes\|Voice=Pass`, `POS=X`, `POS=PRON\|PronType=Tot`, `POS=SCONJ\|Typo=Yes`, `Number=Plur\|POS=PRON\|Person=2\|Polite=Infm\|PronType=Prs`, `NumType=Card\|POS=NUM\|Typo=Yes`, `Clusivity=Ex\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Number=Sing\|POS=PRON\|Person=2\|Polite=Form\|PronType=Prs`, `Foreign=Yes\|POS=X`, `POS=ADV\|PronType=Rel`, `Mood=Imp\|POS=VERB\|Voice=Act`, `Number=Sing\|POS=NOUN\|Typo=Yes`, `POS=PROPN\|Typo=Yes`, `POS=DET`, `Number=Sing\|POS=DET\|PronType=Ind`, `POS=DET\|PronType=Ind\|Typo=Yes`, `Abbr=Yes\|POS=DET\|PronType=Dem`, `POS=PRON\|PronType=Ind`, `POS=VERB\|Typo=Yes`, `Abbr=Yes\|POS=PROPN`, `Abbr=Yes\|POS=PRON\|PronType=Rel`, `Number=Plur\|POS=PRON\|PronType=Int`, `Abbr=Yes\|POS=PART\|Polarity=Neg`, `POS=ADV\|PronType=Tot`, `Abbr=Yes\|POS=ADV`, `POS=ADV\|Typo=Yes`, `POS=X\|Typo=Yes`, `Number=Sing\|POS=PRON\|Person=2\|PronType=Prs`, `POS=ADV\|PronType=Int\|Typo=Yes`, `NumType=Ord\|POS=ADJ\|Typo=Yes` |
 | **`parser`** | `ROOT`, `acl`, `acl:relcl`, `advcl`, `advmod`, `advmod:emph`, `amod`, `appos`, `aux`, `case`, `case:adv`, `cc`, `ccomp`, `compound`, `conj`, `cop`, `dep`, `det`, `fixed`, `flat`, `flat:foreign`, `flat:name`, `mark`, `nmod`, `nmod:lmod`, `nmod:poss`, `nmod:tmod`, `nsubj`, `nsubj:pass`, `nummod`, `obj`, `obl`, `obl:agent`, `obl:tmod`, `parataxis`, `punct`, `xcomp` |
 | Type | Score |
 | --- | --- |
+| `ENTS_F` | 0.00 |
+| `ENTS_P` | 0.00 |
+| `ENTS_R` | 0.00 |
+| `ENTS_PER_TYPE` | 0.00 |
+| `TAG_ACC` | 90.58 |
+| `POS_ACC` | 91.11 |
+| `MORPH_ACC` | 92.45 |
+| `LEMMA_ACC` | 93.56 |
+| `DEP_UAS` | 77.91 |
+| `DEP_LAS` | 69.72 |
+| `SENTS_P` | 90.75 |
+| `SENTS_R` | 91.23 |
+| `SENTS_F` | 90.99 |
+| `TOK2VEC_LOSS` | 834213.16 |
+| `NER_LOSS` | 0.00 |
+| `TAGGER_LOSS` | 57215.06 |
+| `MORPHOLOGIZER_LOSS` | 124661.48 |
+| `TRAINABLE_LEMMATIZER_LOSS` | 24560.71 |
+| `PARSER_LOSS` | 882915.56 |

config.cfg CHANGED Viewed

@@ -1,6 +1,6 @@
 [paths]
-train = "./ner_id_train.spacy"
-dev = "./ner_id_dev.spacy"
 vectors = null
 init_tok2vec = null
@@ -10,7 +10,7 @@ seed = 0
 [nlp]
 lang = "id"
-pipeline = ["tok2vec","ner"]
 batch_size = 1000
 disabled = []
 before_creation = null
@@ -21,6 +21,23 @@ vectors = {"@vectors":"spacy.Vectors.v1"}
 [components]
 [components.ner]
 factory = "ner"
 incorrect_spans_key = null
@@ -42,6 +59,45 @@ nO = null
 width = ${components.tok2vec.model.encode.width}
 upstream = "*"
 [components.tok2vec]
 factory = "tok2vec"
@@ -62,6 +118,24 @@ depth = 8
 window_size = 1
 maxout_pieces = 3
 [corpora]
 [corpora.dev]
@@ -125,10 +199,21 @@ eps = 0.00000001
 learn_rate = 0.001
 [training.score_weights]
-ents_f = 1.0
 ents_p = 0.0
 ents_r = 0.0
 ents_per_type = null
 [pretraining]

 [paths]
+train = "./id_gsd-ud-train.spacy"
+dev = "./id_gsd-ud-dev.spacy"
 vectors = null
 init_tok2vec = null
 [nlp]
 lang = "id"
+pipeline = ["tok2vec","ner","tagger","morphologizer","trainable_lemmatizer","parser"]
 batch_size = 1000
 disabled = []
 before_creation = null
 [components]
+[components.morphologizer]
+factory = "morphologizer"
+extend = false
+label_smoothing = 0.05
+overwrite = true
+scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
+[components.morphologizer.model]
+@architectures = "spacy.Tagger.v2"
+nO = null
+normalize = false
+[components.morphologizer.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+upstream = "*"
 [components.ner]
 factory = "ner"
 incorrect_spans_key = null
 width = ${components.tok2vec.model.encode.width}
 upstream = "*"
+[components.parser]
+factory = "parser"
+learn_tokens = false
+min_action_freq = 30
+moves = null
+scorer = {"@scorers":"spacy.parser_scorer.v1"}
+update_with_oracle_cut_size = 100
+[components.parser.model]
+@architectures = "spacy.TransitionBasedParser.v2"
+state_type = "parser"
+extra_state_tokens = false
+hidden_width = 128
+maxout_pieces = 3
+use_upper = true
+nO = null
+[components.parser.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+upstream = "*"
+[components.tagger]
+factory = "tagger"
+label_smoothing = 0.05
+neg_prefix = "!"
+overwrite = false
+scorer = {"@scorers":"spacy.tagger_scorer.v1"}
+[components.tagger.model]
+@architectures = "spacy.Tagger.v2"
+nO = null
+normalize = false
+[components.tagger.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+upstream = "*"
 [components.tok2vec]
 factory = "tok2vec"
 window_size = 1
 maxout_pieces = 3
+[components.trainable_lemmatizer]
+factory = "trainable_lemmatizer"
+backoff = "orth"
+min_tree_freq = 3
+overwrite = false
+scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
+top_k = 1
+[components.trainable_lemmatizer.model]
+@architectures = "spacy.Tagger.v2"
+nO = null
+normalize = false
+[components.trainable_lemmatizer.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+upstream = "*"
 [corpora]
 [corpora.dev]
 learn_rate = 0.001
 [training.score_weights]
+ents_f = 0.2
 ents_p = 0.0
 ents_r = 0.0
 ents_per_type = null
+tag_acc = 0.2
+pos_acc = 0.1
+morph_acc = 0.1
+morph_per_feat = null
+lemma_acc = 0.2
+dep_uas = 0.1
+dep_las = 0.1
+dep_las_per_type = null
+sents_p = null
+sents_r = null
+sents_f = 0.0
 [pretraining]

id_core_news_sm-any-py3-none-any.whl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f5b707caf40cf9fdea315f2e5d792baa07e4a95b1ca2d7b66447a5dffb556b9
-size 32101183

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c89d501e10ca94b399fb58b812f28a8d55848c6c8f3b58ed0d8f06465957d91
+size 34239071

meta.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "lang":"id",
   "name":"core_news_sm",
-  "version":"0.0.1",
   "description":"",
   "author":"",
   "email":"",
@@ -20,135 +20,504 @@
     ],
     "ner":[
-      "CARDINAL",
-      "DATE",
-      "EVENT",
-      "FAC",
-      "GPE",
-      "LANGUAGE",
-      "LAW",
-      "LOC",
-      "MONEY",
-      "NORP",
-      "ORDINAL",
-      "ORG",
-      "PERCENT",
-      "PERSON",
-      "PRODUCT",
-      "QUANTITY",
-      "TIME",
-      "WORK_OF_ART"
     ]
   },
   "pipeline":[
     "tok2vec",
-    "ner"
   ],
   "components":[
     "tok2vec",
-    "ner"
   ],
   "disabled":[
   ],
   "performance":{
-    "ents_f":0.6362633321,
-    "ents_p":0.6721056721,
-    "ents_r":0.6040502793,
-    "ents_per_type":{
-      "PRODUCT":{
-        "p":0.3571428571,
-        "r":0.0704225352,
-        "f":0.1176470588
-      },
-      "GPE":{
-        "p":0.7411167513,
-        "r":0.7934782609,
-        "f":0.7664041995
-      },
-      "LOC":{
-        "p":0.7142857143,
-        "r":0.4651162791,
-        "f":0.5633802817
-      },
-      "FAC":{
-        "p":0.6666666667,
-        "r":0.1666666667,
-        "f":0.2666666667
       },
-      "ORG":{
-        "p":0.4651162791,
-        "r":0.3174603175,
-        "f":0.3773584906
-      },
-      "PERSON":{
-        "p":0.6809815951,
-        "r":0.6434782609,
-        "f":0.6616989568
-      },
-      "WORK_OF_ART":{
-        "p":0.1785714286,
-        "r":0.4,
-        "f":0.2469135802
-      },
-      "DATE":{
-        "p":0.75,
-        "r":0.7804878049,
-        "f":0.764940239
-      },
-      "ORDINAL":{
-        "p":0.6956521739,
-        "r":0.5925925926,
-        "f":0.64
-      },
-      "CARDINAL":{
-        "p":0.7155963303,
-        "r":0.75,
-        "f":0.7323943662
-      },
-      "NORP":{
-        "p":0.7894736842,
-        "r":0.4918032787,
-        "f":0.6060606061
-      },
-      "LAW":{
-        "p":0.8,
-        "r":0.3333333333,
-        "f":0.4705882353
-      },
-      "QUANTITY":{
-        "p":0.6666666667,
-        "r":0.5925925926,
-        "f":0.6274509804
-      },
-      "EVENT":{
-        "p":0.6875,
-        "r":0.2619047619,
-        "f":0.3793103448
-      },
-      "PERCENT":{
-        "p":0.5555555556,
-        "r":0.7142857143,
-        "f":0.625
-      },
-      "LANGUAGE":{
-        "p":0.8,
-        "r":0.9230769231,
-        "f":0.8571428571
-      },
-      "MONEY":{
         "p":0.0,
         "r":0.0,
         "f":0.0
       },
-      "TIME":{
-        "p":0.5,
         "r":1.0,
         "f":0.6666666667
       }
     },
-    "tok2vec_loss":457.675804386,
-    "ner_loss":1277.2143377495
   },
   "requirements":[

 {
   "lang":"id",
   "name":"core_news_sm",
+  "version":"0.0.2",
   "description":"",
   "author":"",
   "email":"",
     ],
     "ner":[
+    ],
+    "tagger":[
+      "APP",
+      "ASP",
+      "ASP+PS3",
+      "ASS",
+      "B--",
+      "B--+PS3",
+      "CC-",
+      "CCONJ",
+      "CD-",
+      "CO-",
+      "D--",
+      "D--+PS3",
+      "F--",
+      "F--+PS2",
+      "G--",
+      "G--+PS3",
+      "H--",
+      "I--",
+      "M--",
+      "M--+PS3",
+      "NOUN",
+      "NPD",
+      "NSD",
+      "NSD+PS3",
+      "NSF",
+      "NSM",
+      "NUM",
+      "O--",
+      "PP1",
+      "PP2",
+      "PP3",
+      "PROPN",
+      "PS1",
+      "PS1+VSA",
+      "PS2",
+      "PS3",
+      "R--",
+      "R--+PS3",
+      "S--",
+      "SYM",
+      "T--",
+      "VERB",
+      "VPA",
+      "VSA",
+      "VSA+PS2",
+      "VSA+PS3",
+      "VSP",
+      "W--",
+      "X--",
+      "Z--"
+    ],
+    "morphologizer":[
+      "POS=PROPN",
+      "POS=AUX",
+      "Definite=Ind|POS=DET|PronType=Art",
+      "Number=Sing|POS=NOUN",
+      "POS=PRON|PronType=Rel",
+      "Mood=Ind|POS=VERB|Voice=Pass",
+      "POS=ADP",
+      "POS=PUNCT",
+      "POS=NOUN",
+      "POS=ADV",
+      "POS=CCONJ",
+      "POS=SCONJ",
+      "Mood=Ind|POS=VERB|Voice=Act",
+      "POS=VERB",
+      "POS=DET|PronType=Tot",
+      "Number=Sing|POS=PRON|Person=3|PronType=Prs",
+      "Number=Plur|POS=PRON|Person=3|PronType=Prs",
+      "POS=PRON|PronType=Prs|Reflex=Yes",
+      "POS=DET|PronType=Dem",
+      "NumType=Card|POS=NUM",
+      "POS=ADJ",
+      "Number=Plur|POS=DET|PronType=Ind",
+      "NumType=Card|POS=NUM|PronType=Tot",
+      "POS=PART|Polarity=Neg",
+      "POS=PRON|PronType=Int",
+      "NumType=Ord|POS=ADJ",
+      "POS=PART",
+      "POS=PRON|PronType=Dem",
+      "POS=DET|PronType=Ind",
+      "Number=Plur|POS=NOUN",
+      "Number=Sing|POS=PRON|Person=1|Polite=Form|PronType=Prs",
+      "POS=ADV|PronType=Int",
+      "Clusivity=In|Number=Plur|POS=PRON|Person=1|PronType=Prs",
+      "Definite=Def|POS=DET|PronType=Art",
+      "POS=SYM",
+      "Degree=Sup|POS=ADJ",
+      "POS=INTJ",
+      "Number=Sing|POS=PRON|Person=2|Polite=Infm|PronType=Prs",
+      "POS=ADV|PronType=Ind",
+      "Number=Sing|POS=PRON|Person=3|Polite=Form|PronType=Prs",
+      "Number=Sing|POS=PRON|Person=1|Polite=Infm|PronType=Prs",
+      "Number=Sing|POS=PRON|PronType=Ind",
+      "POS=VERB|Voice=Act",
+      "POS=DET|PronType=Emp",
+      "POS=VERB|Voice=Pass",
+      "POS=ADV|PronType=Dem",
+      "POS=NOUN|Typo=Yes",
+      "POS=ADP|Typo=Yes",
+      "Number=Plur|POS=PRON|PronType=Ind",
+      "POS=VERB|Typo=Yes|Voice=Pass",
+      "POS=X",
+      "POS=PRON|PronType=Tot",
+      "POS=SCONJ|Typo=Yes",
+      "Number=Plur|POS=PRON|Person=2|Polite=Infm|PronType=Prs",
+      "NumType=Card|POS=NUM|Typo=Yes",
+      "Clusivity=Ex|Number=Plur|POS=PRON|Person=1|PronType=Prs",
+      "Number=Sing|POS=PRON|Person=2|Polite=Form|PronType=Prs",
+      "Foreign=Yes|POS=X",
+      "POS=ADV|PronType=Rel",
+      "Mood=Imp|POS=VERB|Voice=Act",
+      "Number=Sing|POS=NOUN|Typo=Yes",
+      "POS=PROPN|Typo=Yes",
+      "POS=DET",
+      "Number=Sing|POS=DET|PronType=Ind",
+      "POS=DET|PronType=Ind|Typo=Yes",
+      "Abbr=Yes|POS=DET|PronType=Dem",
+      "POS=PRON|PronType=Ind",
+      "POS=VERB|Typo=Yes",
+      "Abbr=Yes|POS=PROPN",
+      "Abbr=Yes|POS=PRON|PronType=Rel",
+      "Number=Plur|POS=PRON|PronType=Int",
+      "Abbr=Yes|POS=PART|Polarity=Neg",
+      "POS=ADV|PronType=Tot",
+      "Abbr=Yes|POS=ADV",
+      "POS=ADV|Typo=Yes",
+      "POS=X|Typo=Yes",
+      "Number=Sing|POS=PRON|Person=2|PronType=Prs",
+      "POS=ADV|PronType=Int|Typo=Yes",
+      "NumType=Ord|POS=ADJ|Typo=Yes"
+    ],
+    "parser":[
+      "ROOT",
+      "acl",
+      "acl:relcl",
+      "advcl",
+      "advmod",
+      "advmod:emph",
+      "amod",
+      "appos",
+      "aux",
+      "case",
+      "case:adv",
+      "cc",
+      "ccomp",
+      "compound",
+      "conj",
+      "cop",
+      "dep",
+      "det",
+      "fixed",
+      "flat",
+      "flat:foreign",
+      "flat:name",
+      "mark",
+      "nmod",
+      "nmod:lmod",
+      "nmod:poss",
+      "nmod:tmod",
+      "nsubj",
+      "nsubj:pass",
+      "nummod",
+      "obj",
+      "obl",
+      "obl:agent",
+      "obl:tmod",
+      "parataxis",
+      "punct",
+      "xcomp"
     ]
   },
   "pipeline":[
     "tok2vec",
+    "ner",
+    "tagger",
+    "morphologizer",
+    "trainable_lemmatizer",
+    "parser"
   ],
   "components":[
     "tok2vec",
+    "ner",
+    "tagger",
+    "morphologizer",
+    "trainable_lemmatizer",
+    "parser"
   ],
   "disabled":[
   ],
   "performance":{
+    "ents_f":0.0,
+    "ents_p":0.0,
+    "ents_r":0.0,
+    "ents_per_type":0.0,
+    "tag_acc":0.9058429775,
+    "pos_acc":0.911077953,
+    "morph_acc":0.924529063,
+    "morph_per_feat":{
+      "Number":{
+        "p":0.978250591,
+        "r":0.8123282293,
+        "f":0.8876018876
+      },
+      "Mood":{
+        "p":0.9941520468,
+        "r":0.9147982063,
+        "f":0.9528257823
+      },
+      "Voice":{
+        "p":0.9902723735,
+        "r":0.9113697404,
+        "f":0.9491841492
+      },
+      "PronType":{
+        "p":0.991576414,
+        "r":0.7795648061,
+        "f":0.8728813559
+      },
+      "Polarity":{
+        "p":1.0,
+        "r":0.862745098,
+        "f":0.9263157895
+      },
+      "Person":{
+        "p":1.0,
+        "r":0.3973509934,
+        "f":0.5687203791
+      },
+      "NumType":{
+        "p":0.9952718676,
+        "r":0.9503386005,
+        "f":0.9722863741
       },
+      "Typo":{
+        "p":1.0,
+        "r":0.4666666667,
+        "f":0.6363636364
+      },
+      "Definite":{
+        "p":0.9838709677,
+        "r":0.7922077922,
+        "f":0.8776978417
+      },
+      "Polite":{
+        "p":1.0,
+        "r":0.65625,
+        "f":0.7924528302
+      },
+      "Reflex":{
+        "p":1.0,
+        "r":0.5,
+        "f":0.6666666667
+      },
+      "Degree":{
+        "p":0.9375,
+        "r":0.8823529412,
+        "f":0.9090909091
+      },
+      "Foreign":{
         "p":0.0,
         "r":0.0,
         "f":0.0
       },
+      "Clusivity":{
+        "p":1.0,
         "r":1.0,
+        "f":1.0
+      },
+      "Abbr":{
+        "p":1.0,
+        "r":0.2,
+        "f":0.3333333333
+      }
+    },
+    "lemma_acc":0.9356240444,
+    "dep_uas":0.779082277,
+    "dep_las":0.6971807939,
+    "dep_las_per_type":{
+      "nsubj":{
+        "p":0.8022759602,
+        "r":0.7621621622,
+        "f":0.7817047817
+      },
+      "compound":{
+        "p":0.6970849176,
+        "r":0.6988564168,
+        "f":0.6979695431
+      },
+      "root":{
+        "p":0.8139963168,
+        "r":0.7906976744,
+        "f":0.8021778584
+      },
+      "obj":{
+        "p":0.8152985075,
+        "r":0.7613240418,
+        "f":0.7873873874
+      },
+      "case":{
+        "p":0.9104609929,
+        "r":0.8868739206,
+        "f":0.8985126859
+      },
+      "obl":{
+        "p":0.7344322344,
+        "r":0.6178736518,
+        "f":0.6711297071
+      },
+      "amod":{
+        "p":0.6195899772,
+        "r":0.590021692,
+        "f":0.6044444444
+      },
+      "conj":{
+        "p":0.5868263473,
+        "r":0.5485074627,
+        "f":0.5670202507
+      },
+      "cc":{
+        "p":0.880239521,
+        "r":0.8376068376,
+        "f":0.8583941606
+      },
+      "acl:relcl":{
+        "p":0.7262357414,
+        "r":0.6821428571,
+        "f":0.7034990792
+      },
+      "flat:name":{
+        "p":0.7844036697,
+        "r":0.8123515439,
+        "f":0.7981330222
+      },
+      "advmod":{
+        "p":0.7588075881,
+        "r":0.6982543641,
+        "f":0.7272727273
+      },
+      "nmod":{
+        "p":0.613836478,
+        "r":0.5816448153,
+        "f":0.5973072215
+      },
+      "nsubj:pass":{
+        "p":0.7100840336,
+        "r":0.7824074074,
+        "f":0.7444933921
+      },
+      "det":{
+        "p":0.8545454545,
+        "r":0.7943661972,
+        "f":0.8233576642
+      },
+      "aux":{
+        "p":0.9461538462,
+        "r":0.9179104478,
+        "f":0.9318181818
+      },
+      "nmod:poss":{
+        "p":0.8235294118,
+        "r":0.0848484848,
+        "f":0.1538461538
+      },
+      "dep":{
+        "p":0.2394366197,
+        "r":0.3035714286,
+        "f":0.2677165354
+      },
+      "mark":{
+        "p":0.8018018018,
+        "r":0.7542372881,
+        "f":0.7772925764
+      },
+      "cop":{
+        "p":0.9607843137,
+        "r":0.9423076923,
+        "f":0.9514563107
+      },
+      "acl":{
+        "p":0.3048780488,
+        "r":0.2232142857,
+        "f":0.2577319588
+      },
+      "nummod":{
+        "p":0.7768817204,
+        "r":0.7747989276,
+        "f":0.7758389262
+      },
+      "appos":{
+        "p":0.6460176991,
+        "r":0.553030303,
+        "f":0.5959183673
+      },
+      "xcomp":{
+        "p":0.4495412844,
+        "r":0.392,
+        "f":0.4188034188
+      },
+      "ccomp":{
+        "p":0.4571428571,
+        "r":0.3265306122,
+        "f":0.380952381
+      },
+      "obl:tmod":{
+        "p":0.6029411765,
+        "r":0.6721311475,
+        "f":0.6356589147
+      },
+      "advcl":{
+        "p":0.268907563,
+        "r":0.2269503546,
+        "f":0.2461538462
+      },
+      "advmod:emph":{
+        "p":1.0,
+        "r":0.0434782609,
+        "f":0.0833333333
+      },
+      "case:adv":{
+        "p":0.7272727273,
+        "r":0.6153846154,
         "f":0.6666666667
+      },
+      "obl:agent":{
+        "p":0.0,
+        "r":0.0,
+        "f":0.0
+      },
+      "flat":{
+        "p":0.4375,
+        "r":0.3255813953,
+        "f":0.3733333333
+      },
+      "parataxis":{
+        "p":0.2253521127,
+        "r":0.3265306122,
+        "f":0.2666666667
+      },
+      "nmod:lmod":{
+        "p":1.0,
+        "r":0.0769230769,
+        "f":0.1428571429
+      },
+      "flat:foreign":{
+        "p":0.0,
+        "r":0.0,
+        "f":0.0
+      },
+      "nmod:tmod":{
+        "p":0.3333333333,
+        "r":0.2727272727,
+        "f":0.3
+      },
+      "iobj":{
+        "p":0.0,
+        "r":0.0,
+        "f":0.0
+      },
+      "csubj":{
+        "p":0.0,
+        "r":0.0,
+        "f":0.0
+      },
+      "fixed":{
+        "p":0.6,
+        "r":0.3461538462,
+        "f":0.4390243902
+      },
+      "discourse":{
+        "p":0.0,
+        "r":0.0,
+        "f":0.0
+      },
+      "cc:preconj":{
+        "p":0.0,
+        "r":0.0,
+        "f":0.0
+      },
+      "compound:a":{
+        "p":0.0,
+        "r":0.0,
+        "f":0.0
       }
     },
+    "sents_p":0.9074733096,
+    "sents_r":0.9123434705,
+    "sents_f":0.9099018733,
+    "tok2vec_loss":8342.1316462817,
+    "ner_loss":0.0,
+    "tagger_loss":572.1505841613,
+    "morphologizer_loss":1246.6148492694,
+    "trainable_lemmatizer_loss":245.6071262917,
+    "parser_loss":8829.155590333
   },
   "requirements":[

morphologizer/model CHANGED Viewed

Binary files a/morphologizer/model and b/morphologizer/model differ

ner/model CHANGED Viewed

Binary files a/ner/model and b/ner/model differ

ner/moves CHANGED Viewed

@@ -1 +1 @@

- ��moves��{"0":{},"1":{"GPE":3540,"PERSON":3236,"ORG":1843,"DATE":1451,"CARDINAL":743,"WORK_OF_ART":705,"PRODUCT":642,"EVENT":629,"LOC":491,"FAC":486,"QUANTITY":429,"NORP":427,"LANGUAGE":194,"ORDINAL":177,"LAW":137,"PERCENT":126,"TIME":103,"MONEY":65},"2":{"GPE":3540,"PERSON":3236,"ORG":1843,"DATE":1451,"CARDINAL":743,"WORK_OF_ART":705,"PRODUCT":642,"EVENT":629,"LOC":491,"FAC":486,"QUANTITY":429,"NORP":427,"LANGUAGE":194,"ORDINAL":177,"LAW":137,"PERCENT":126,"TIME":103,"MONEY":65},"3":{"GPE":3540,"PERSON":3236,"ORG":1843,"DATE":1451,"CARDINAL":743,"WORK_OF_ART":705,"PRODUCT":642,"EVENT":629,"LOC":491,"FAC":486,"QUANTITY":429,"NORP":427,"LANGUAGE":194,"ORDINAL":177,"LAW":137,"PERCENT":126,"TIME":103,"MONEY":65},"4":{"~~GPE~~":3540,"PERSON":3236,"ORG":1843,"DATE":1451,"CARDINAL":743,"WORK_OF_ART":705,"PRODUCT":642,"EVENT":629,"LOC":491,"FAC":486,"QUANTITY":429,"NORP":427,"LANGUAGE":194,"ORDINAL":177,"LAW":137,"PERCENT":126,"TIME":103,"MONEY":65,"":1},"5":{"":1}}�cfg��neg_key�


1	+ ��moves�3{"0":{},"1":{},"2":{},"3":{},"4":{"":1},"5":{"":1}}�cfg��neg_key�

parser/model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:16da2082353eec368fedc44e473009c51f5ee03be764493d78146bf2977b6645
 size 1750016

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2738f320e714bbe52de113a1eeb00ac1cb55706a794cb67b6fc3442a29ec0e0
 size 1750016

tagger/model CHANGED Viewed

Binary files a/tagger/model and b/tagger/model differ

tok2vec/model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2f4155df88b3296a30602bab1237736752867c37007f3d4b05233c2e0c46780
 size 34126801

 version https://git-lfs.github.com/spec/v1
+oid sha256:97122c73406442748f5a28b415d61067a62dcbda60fdad337f0a9a9917d12e93
 size 34126801

trainable_lemmatizer/model CHANGED Viewed

Binary files a/trainable_lemmatizer/model and b/trainable_lemmatizer/model differ

vocab/strings.json CHANGED Viewed

The diff for this file is too large to render. See raw diff