firqaaa commited on
Commit
355be29
·
verified ·
1 Parent(s): 5aceba7

Update spaCy pipeline

Browse files
README.md CHANGED
@@ -13,43 +13,70 @@ model-index:
13
  metrics:
14
  - name: NER Precision
15
  type: precision
16
- value: 0.6721056721
17
  - name: NER Recall
18
  type: recall
19
- value: 0.6040502793
20
  - name: NER F Score
21
  type: f_score
22
- value: 0.6362633321
 
 
 
 
23
  - name: TAG (XPOS) Accuracy
24
  type: accuracy
25
- value: 0.9051536414
 
 
 
 
26
  - name: POS (UPOS) Accuracy
27
  type: accuracy
28
- value: 0.9125297415
 
 
 
 
29
  - name: Morph (UFeats) Accuracy
30
  type: accuracy
31
- value: 0.9296115526
 
 
 
 
32
  - name: Lemma Accuracy
33
  type: accuracy
34
- value: 0.9369920335
 
 
 
 
35
  - name: Unlabeled Attachment Score (UAS)
36
  type: f_score
37
- value: 0.7753785754
 
 
 
 
38
  - name: Labeled Attachment Score (LAS)
39
  type: f_score
40
- value: 0.6871555348
 
 
 
 
41
  - name: Sentences F-Score
42
  type: f_score
43
- value: 0.857881137
44
-
45
  ---
46
  | Feature | Description |
47
  | --- | --- |
48
  | **Name** | `id_core_news_sm` |
49
- | **Version** | `0.0.1` |
50
  | **spaCy** | `>=3.7.4,<3.8.0` |
51
- | **Default Pipeline** | `tok2vec`, `ner` |
52
- | **Components** | `tok2vec`, `ner` |
53
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
54
  | **Sources** | n/a |
55
  | **License** | n/a |
@@ -59,11 +86,10 @@ model-index:
59
 
60
  <details>
61
 
62
- <summary>View label scheme (18 labels for 1 components)</summary>
63
 
64
  | Component | Labels |
65
  | --- | --- |
66
- | **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `TIME`, `WORK_OF_ART` |
67
  | **`tagger`** | `APP`, `ASP`, `ASP+PS3`, `ASS`, `B--`, `B--+PS3`, `CC-`, `CCONJ`, `CD-`, `CO-`, `D--`, `D--+PS3`, `F--`, `F--+PS2`, `G--`, `G--+PS3`, `H--`, `I--`, `M--`, `M--+PS3`, `NOUN`, `NPD`, `NSD`, `NSD+PS3`, `NSF`, `NSM`, `NUM`, `O--`, `PP1`, `PP2`, `PP3`, `PROPN`, `PS1`, `PS1+VSA`, `PS2`, `PS3`, `R--`, `R--+PS3`, `S--`, `SYM`, `T--`, `VERB`, `VPA`, `VSA`, `VSA+PS2`, `VSA+PS3`, `VSP`, `W--`, `X--`, `Z--` |
68
  | **`morphologizer`** | `POS=PROPN`, `POS=AUX`, `Definite=Ind\|POS=DET\|PronType=Art`, `Number=Sing\|POS=NOUN`, `POS=PRON\|PronType=Rel`, `Mood=Ind\|POS=VERB\|Voice=Pass`, `POS=ADP`, `POS=PUNCT`, `POS=NOUN`, `POS=ADV`, `POS=CCONJ`, `POS=SCONJ`, `Mood=Ind\|POS=VERB\|Voice=Act`, `POS=VERB`, `POS=DET\|PronType=Tot`, `Number=Sing\|POS=PRON\|Person=3\|PronType=Prs`, `Number=Plur\|POS=PRON\|Person=3\|PronType=Prs`, `POS=PRON\|PronType=Prs\|Reflex=Yes`, `POS=DET\|PronType=Dem`, `NumType=Card\|POS=NUM`, `POS=ADJ`, `Number=Plur\|POS=DET\|PronType=Ind`, `NumType=Card\|POS=NUM\|PronType=Tot`, `POS=PART\|Polarity=Neg`, `POS=PRON\|PronType=Int`, `NumType=Ord\|POS=ADJ`, `POS=PART`, `POS=PRON\|PronType=Dem`, `POS=DET\|PronType=Ind`, `Number=Plur\|POS=NOUN`, `Number=Sing\|POS=PRON\|Person=1\|Polite=Form\|PronType=Prs`, `POS=ADV\|PronType=Int`, `Clusivity=In\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Definite=Def\|POS=DET\|PronType=Art`, `POS=SYM`, `Degree=Sup\|POS=ADJ`, `POS=INTJ`, `Number=Sing\|POS=PRON\|Person=2\|Polite=Infm\|PronType=Prs`, `POS=ADV\|PronType=Ind`, `Number=Sing\|POS=PRON\|Person=3\|Polite=Form\|PronType=Prs`, `Number=Sing\|POS=PRON\|Person=1\|Polite=Infm\|PronType=Prs`, `Number=Sing\|POS=PRON\|PronType=Ind`, `POS=VERB\|Voice=Act`, `POS=DET\|PronType=Emp`, `POS=VERB\|Voice=Pass`, `POS=ADV\|PronType=Dem`, `POS=NOUN\|Typo=Yes`, `POS=ADP\|Typo=Yes`, `Number=Plur\|POS=PRON\|PronType=Ind`, `POS=VERB\|Typo=Yes\|Voice=Pass`, `POS=X`, `POS=PRON\|PronType=Tot`, `POS=SCONJ\|Typo=Yes`, `Number=Plur\|POS=PRON\|Person=2\|Polite=Infm\|PronType=Prs`, `NumType=Card\|POS=NUM\|Typo=Yes`, `Clusivity=Ex\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Number=Sing\|POS=PRON\|Person=2\|Polite=Form\|PronType=Prs`, `Foreign=Yes\|POS=X`, `POS=ADV\|PronType=Rel`, `Mood=Imp\|POS=VERB\|Voice=Act`, `Number=Sing\|POS=NOUN\|Typo=Yes`, `POS=PROPN\|Typo=Yes`, `POS=DET`, `Number=Sing\|POS=DET\|PronType=Ind`, `POS=DET\|PronType=Ind\|Typo=Yes`, `Abbr=Yes\|POS=DET\|PronType=Dem`, `POS=PRON\|PronType=Ind`, `POS=VERB\|Typo=Yes`, `Abbr=Yes\|POS=PROPN`, `Abbr=Yes\|POS=PRON\|PronType=Rel`, `Number=Plur\|POS=PRON\|PronType=Int`, `Abbr=Yes\|POS=PART\|Polarity=Neg`, `POS=ADV\|PronType=Tot`, `Abbr=Yes\|POS=ADV`, `POS=ADV\|Typo=Yes`, `POS=X\|Typo=Yes`, `Number=Sing\|POS=PRON\|Person=2\|PronType=Prs`, `POS=ADV\|PronType=Int\|Typo=Yes`, `NumType=Ord\|POS=ADJ\|Typo=Yes` |
69
  | **`parser`** | `ROOT`, `acl`, `acl:relcl`, `advcl`, `advmod`, `advmod:emph`, `amod`, `appos`, `aux`, `case`, `case:adv`, `cc`, `ccomp`, `compound`, `conj`, `cop`, `dep`, `det`, `fixed`, `flat`, `flat:foreign`, `flat:name`, `mark`, `nmod`, `nmod:lmod`, `nmod:poss`, `nmod:tmod`, `nsubj`, `nsubj:pass`, `nummod`, `obj`, `obl`, `obl:agent`, `obl:tmod`, `parataxis`, `punct`, `xcomp` |
@@ -74,22 +100,22 @@ model-index:
74
 
75
  | Type | Score |
76
  | --- | --- |
77
- | `ENTS_F` | 63.63 |
78
- | `ENTS_P` | 67.21 |
79
- | `ENTS_R` | 60.41 |
80
- | `TOK2VEC_LOSS` | 45767.58 |
81
- | `NER_LOSS` | 127721.43 |
82
- | `TAG_ACC` | 90.52 |
83
- | `POS_ACC` | 91.25 |
84
- | `MORPH_ACC` | 92.96 |
85
- | `LEMMA_ACC` | 93.70 |
86
- | `DEP_UAS` | 77.54 |
87
- | `DEP_LAS` | 68.72 |
88
- | `SENTS_P` | 82.72 |
89
- | `SENTS_R` | 89.09 |
90
- | `SENTS_F` | 85.79 |
91
- | `TOK2VEC_LOSS` | 756743.38 |
92
- | `TAGGER_LOSS` | 73614.38 |
93
- | `MORPHOLOGIZER_LOSS` | 155689.33 |
94
- | `TRAINABLE_LEMMATIZER_LOSS` | 35033.93 |
95
- | `PARSER_LOSS` | 1037857.66 |
 
13
  metrics:
14
  - name: NER Precision
15
  type: precision
16
+ value: 0.0
17
  - name: NER Recall
18
  type: recall
19
+ value: 0.0
20
  - name: NER F Score
21
  type: f_score
22
+ value: 0.0
23
+ - task:
24
+ name: TAG
25
+ type: token-classification
26
+ metrics:
27
  - name: TAG (XPOS) Accuracy
28
  type: accuracy
29
+ value: 0.9058429775
30
+ - task:
31
+ name: POS
32
+ type: token-classification
33
+ metrics:
34
  - name: POS (UPOS) Accuracy
35
  type: accuracy
36
+ value: 0.911077953
37
+ - task:
38
+ name: MORPH
39
+ type: token-classification
40
+ metrics:
41
  - name: Morph (UFeats) Accuracy
42
  type: accuracy
43
+ value: 0.924529063
44
+ - task:
45
+ name: LEMMA
46
+ type: token-classification
47
+ metrics:
48
  - name: Lemma Accuracy
49
  type: accuracy
50
+ value: 0.9356240444
51
+ - task:
52
+ name: UNLABELED_DEPENDENCIES
53
+ type: token-classification
54
+ metrics:
55
  - name: Unlabeled Attachment Score (UAS)
56
  type: f_score
57
+ value: 0.779082277
58
+ - task:
59
+ name: LABELED_DEPENDENCIES
60
+ type: token-classification
61
+ metrics:
62
  - name: Labeled Attachment Score (LAS)
63
  type: f_score
64
+ value: 0.6971807939
65
+ - task:
66
+ name: SENTS
67
+ type: token-classification
68
+ metrics:
69
  - name: Sentences F-Score
70
  type: f_score
71
+ value: 0.9099018733
 
72
  ---
73
  | Feature | Description |
74
  | --- | --- |
75
  | **Name** | `id_core_news_sm` |
76
+ | **Version** | `0.0.2` |
77
  | **spaCy** | `>=3.7.4,<3.8.0` |
78
+ | **Default Pipeline** | `tok2vec`, `ner`, `tagger`, `morphologizer`, `trainable_lemmatizer`, `parser` |
79
+ | **Components** | `tok2vec`, `ner`, `tagger`, `morphologizer`, `trainable_lemmatizer`, `parser` |
80
  | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
81
  | **Sources** | n/a |
82
  | **License** | n/a |
 
86
 
87
  <details>
88
 
89
+ <summary>View label scheme (166 labels for 3 components)</summary>
90
 
91
  | Component | Labels |
92
  | --- | --- |
 
93
  | **`tagger`** | `APP`, `ASP`, `ASP+PS3`, `ASS`, `B--`, `B--+PS3`, `CC-`, `CCONJ`, `CD-`, `CO-`, `D--`, `D--+PS3`, `F--`, `F--+PS2`, `G--`, `G--+PS3`, `H--`, `I--`, `M--`, `M--+PS3`, `NOUN`, `NPD`, `NSD`, `NSD+PS3`, `NSF`, `NSM`, `NUM`, `O--`, `PP1`, `PP2`, `PP3`, `PROPN`, `PS1`, `PS1+VSA`, `PS2`, `PS3`, `R--`, `R--+PS3`, `S--`, `SYM`, `T--`, `VERB`, `VPA`, `VSA`, `VSA+PS2`, `VSA+PS3`, `VSP`, `W--`, `X--`, `Z--` |
94
  | **`morphologizer`** | `POS=PROPN`, `POS=AUX`, `Definite=Ind\|POS=DET\|PronType=Art`, `Number=Sing\|POS=NOUN`, `POS=PRON\|PronType=Rel`, `Mood=Ind\|POS=VERB\|Voice=Pass`, `POS=ADP`, `POS=PUNCT`, `POS=NOUN`, `POS=ADV`, `POS=CCONJ`, `POS=SCONJ`, `Mood=Ind\|POS=VERB\|Voice=Act`, `POS=VERB`, `POS=DET\|PronType=Tot`, `Number=Sing\|POS=PRON\|Person=3\|PronType=Prs`, `Number=Plur\|POS=PRON\|Person=3\|PronType=Prs`, `POS=PRON\|PronType=Prs\|Reflex=Yes`, `POS=DET\|PronType=Dem`, `NumType=Card\|POS=NUM`, `POS=ADJ`, `Number=Plur\|POS=DET\|PronType=Ind`, `NumType=Card\|POS=NUM\|PronType=Tot`, `POS=PART\|Polarity=Neg`, `POS=PRON\|PronType=Int`, `NumType=Ord\|POS=ADJ`, `POS=PART`, `POS=PRON\|PronType=Dem`, `POS=DET\|PronType=Ind`, `Number=Plur\|POS=NOUN`, `Number=Sing\|POS=PRON\|Person=1\|Polite=Form\|PronType=Prs`, `POS=ADV\|PronType=Int`, `Clusivity=In\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Definite=Def\|POS=DET\|PronType=Art`, `POS=SYM`, `Degree=Sup\|POS=ADJ`, `POS=INTJ`, `Number=Sing\|POS=PRON\|Person=2\|Polite=Infm\|PronType=Prs`, `POS=ADV\|PronType=Ind`, `Number=Sing\|POS=PRON\|Person=3\|Polite=Form\|PronType=Prs`, `Number=Sing\|POS=PRON\|Person=1\|Polite=Infm\|PronType=Prs`, `Number=Sing\|POS=PRON\|PronType=Ind`, `POS=VERB\|Voice=Act`, `POS=DET\|PronType=Emp`, `POS=VERB\|Voice=Pass`, `POS=ADV\|PronType=Dem`, `POS=NOUN\|Typo=Yes`, `POS=ADP\|Typo=Yes`, `Number=Plur\|POS=PRON\|PronType=Ind`, `POS=VERB\|Typo=Yes\|Voice=Pass`, `POS=X`, `POS=PRON\|PronType=Tot`, `POS=SCONJ\|Typo=Yes`, `Number=Plur\|POS=PRON\|Person=2\|Polite=Infm\|PronType=Prs`, `NumType=Card\|POS=NUM\|Typo=Yes`, `Clusivity=Ex\|Number=Plur\|POS=PRON\|Person=1\|PronType=Prs`, `Number=Sing\|POS=PRON\|Person=2\|Polite=Form\|PronType=Prs`, `Foreign=Yes\|POS=X`, `POS=ADV\|PronType=Rel`, `Mood=Imp\|POS=VERB\|Voice=Act`, `Number=Sing\|POS=NOUN\|Typo=Yes`, `POS=PROPN\|Typo=Yes`, `POS=DET`, `Number=Sing\|POS=DET\|PronType=Ind`, `POS=DET\|PronType=Ind\|Typo=Yes`, `Abbr=Yes\|POS=DET\|PronType=Dem`, `POS=PRON\|PronType=Ind`, `POS=VERB\|Typo=Yes`, `Abbr=Yes\|POS=PROPN`, `Abbr=Yes\|POS=PRON\|PronType=Rel`, `Number=Plur\|POS=PRON\|PronType=Int`, `Abbr=Yes\|POS=PART\|Polarity=Neg`, `POS=ADV\|PronType=Tot`, `Abbr=Yes\|POS=ADV`, `POS=ADV\|Typo=Yes`, `POS=X\|Typo=Yes`, `Number=Sing\|POS=PRON\|Person=2\|PronType=Prs`, `POS=ADV\|PronType=Int\|Typo=Yes`, `NumType=Ord\|POS=ADJ\|Typo=Yes` |
95
  | **`parser`** | `ROOT`, `acl`, `acl:relcl`, `advcl`, `advmod`, `advmod:emph`, `amod`, `appos`, `aux`, `case`, `case:adv`, `cc`, `ccomp`, `compound`, `conj`, `cop`, `dep`, `det`, `fixed`, `flat`, `flat:foreign`, `flat:name`, `mark`, `nmod`, `nmod:lmod`, `nmod:poss`, `nmod:tmod`, `nsubj`, `nsubj:pass`, `nummod`, `obj`, `obl`, `obl:agent`, `obl:tmod`, `parataxis`, `punct`, `xcomp` |
 
100
 
101
  | Type | Score |
102
  | --- | --- |
103
+ | `ENTS_F` | 0.00 |
104
+ | `ENTS_P` | 0.00 |
105
+ | `ENTS_R` | 0.00 |
106
+ | `ENTS_PER_TYPE` | 0.00 |
107
+ | `TAG_ACC` | 90.58 |
108
+ | `POS_ACC` | 91.11 |
109
+ | `MORPH_ACC` | 92.45 |
110
+ | `LEMMA_ACC` | 93.56 |
111
+ | `DEP_UAS` | 77.91 |
112
+ | `DEP_LAS` | 69.72 |
113
+ | `SENTS_P` | 90.75 |
114
+ | `SENTS_R` | 91.23 |
115
+ | `SENTS_F` | 90.99 |
116
+ | `TOK2VEC_LOSS` | 834213.16 |
117
+ | `NER_LOSS` | 0.00 |
118
+ | `TAGGER_LOSS` | 57215.06 |
119
+ | `MORPHOLOGIZER_LOSS` | 124661.48 |
120
+ | `TRAINABLE_LEMMATIZER_LOSS` | 24560.71 |
121
+ | `PARSER_LOSS` | 882915.56 |
config.cfg CHANGED
@@ -1,6 +1,6 @@
1
  [paths]
2
- train = "./ner_id_train.spacy"
3
- dev = "./ner_id_dev.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
@@ -10,7 +10,7 @@ seed = 0
10
 
11
  [nlp]
12
  lang = "id"
13
- pipeline = ["tok2vec","ner"]
14
  batch_size = 1000
15
  disabled = []
16
  before_creation = null
@@ -21,6 +21,23 @@ vectors = {"@vectors":"spacy.Vectors.v1"}
21
 
22
  [components]
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  [components.ner]
25
  factory = "ner"
26
  incorrect_spans_key = null
@@ -42,6 +59,45 @@ nO = null
42
  width = ${components.tok2vec.model.encode.width}
43
  upstream = "*"
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  [components.tok2vec]
46
  factory = "tok2vec"
47
 
@@ -62,6 +118,24 @@ depth = 8
62
  window_size = 1
63
  maxout_pieces = 3
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  [corpora]
66
 
67
  [corpora.dev]
@@ -125,10 +199,21 @@ eps = 0.00000001
125
  learn_rate = 0.001
126
 
127
  [training.score_weights]
128
- ents_f = 1.0
129
  ents_p = 0.0
130
  ents_r = 0.0
131
  ents_per_type = null
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  [pretraining]
134
 
 
1
  [paths]
2
+ train = "./id_gsd-ud-train.spacy"
3
+ dev = "./id_gsd-ud-dev.spacy"
4
  vectors = null
5
  init_tok2vec = null
6
 
 
10
 
11
  [nlp]
12
  lang = "id"
13
+ pipeline = ["tok2vec","ner","tagger","morphologizer","trainable_lemmatizer","parser"]
14
  batch_size = 1000
15
  disabled = []
16
  before_creation = null
 
21
 
22
  [components]
23
 
24
+ [components.morphologizer]
25
+ factory = "morphologizer"
26
+ extend = false
27
+ label_smoothing = 0.05
28
+ overwrite = true
29
+ scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
30
+
31
+ [components.morphologizer.model]
32
+ @architectures = "spacy.Tagger.v2"
33
+ nO = null
34
+ normalize = false
35
+
36
+ [components.morphologizer.model.tok2vec]
37
+ @architectures = "spacy.Tok2VecListener.v1"
38
+ width = ${components.tok2vec.model.encode.width}
39
+ upstream = "*"
40
+
41
  [components.ner]
42
  factory = "ner"
43
  incorrect_spans_key = null
 
59
  width = ${components.tok2vec.model.encode.width}
60
  upstream = "*"
61
 
62
+ [components.parser]
63
+ factory = "parser"
64
+ learn_tokens = false
65
+ min_action_freq = 30
66
+ moves = null
67
+ scorer = {"@scorers":"spacy.parser_scorer.v1"}
68
+ update_with_oracle_cut_size = 100
69
+
70
+ [components.parser.model]
71
+ @architectures = "spacy.TransitionBasedParser.v2"
72
+ state_type = "parser"
73
+ extra_state_tokens = false
74
+ hidden_width = 128
75
+ maxout_pieces = 3
76
+ use_upper = true
77
+ nO = null
78
+
79
+ [components.parser.model.tok2vec]
80
+ @architectures = "spacy.Tok2VecListener.v1"
81
+ width = ${components.tok2vec.model.encode.width}
82
+ upstream = "*"
83
+
84
+ [components.tagger]
85
+ factory = "tagger"
86
+ label_smoothing = 0.05
87
+ neg_prefix = "!"
88
+ overwrite = false
89
+ scorer = {"@scorers":"spacy.tagger_scorer.v1"}
90
+
91
+ [components.tagger.model]
92
+ @architectures = "spacy.Tagger.v2"
93
+ nO = null
94
+ normalize = false
95
+
96
+ [components.tagger.model.tok2vec]
97
+ @architectures = "spacy.Tok2VecListener.v1"
98
+ width = ${components.tok2vec.model.encode.width}
99
+ upstream = "*"
100
+
101
  [components.tok2vec]
102
  factory = "tok2vec"
103
 
 
118
  window_size = 1
119
  maxout_pieces = 3
120
 
121
+ [components.trainable_lemmatizer]
122
+ factory = "trainable_lemmatizer"
123
+ backoff = "orth"
124
+ min_tree_freq = 3
125
+ overwrite = false
126
+ scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
127
+ top_k = 1
128
+
129
+ [components.trainable_lemmatizer.model]
130
+ @architectures = "spacy.Tagger.v2"
131
+ nO = null
132
+ normalize = false
133
+
134
+ [components.trainable_lemmatizer.model.tok2vec]
135
+ @architectures = "spacy.Tok2VecListener.v1"
136
+ width = ${components.tok2vec.model.encode.width}
137
+ upstream = "*"
138
+
139
  [corpora]
140
 
141
  [corpora.dev]
 
199
  learn_rate = 0.001
200
 
201
  [training.score_weights]
202
+ ents_f = 0.2
203
  ents_p = 0.0
204
  ents_r = 0.0
205
  ents_per_type = null
206
+ tag_acc = 0.2
207
+ pos_acc = 0.1
208
+ morph_acc = 0.1
209
+ morph_per_feat = null
210
+ lemma_acc = 0.2
211
+ dep_uas = 0.1
212
+ dep_las = 0.1
213
+ dep_las_per_type = null
214
+ sents_p = null
215
+ sents_r = null
216
+ sents_f = 0.0
217
 
218
  [pretraining]
219
 
id_core_news_sm-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f5b707caf40cf9fdea315f2e5d792baa07e4a95b1ca2d7b66447a5dffb556b9
3
- size 32101183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c89d501e10ca94b399fb58b812f28a8d55848c6c8f3b58ed0d8f06465957d91
3
+ size 34239071
meta.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "lang":"id",
3
  "name":"core_news_sm",
4
- "version":"0.0.1",
5
  "description":"",
6
  "author":"",
7
  "email":"",
@@ -20,135 +20,504 @@
20
 
21
  ],
22
  "ner":[
23
- "CARDINAL",
24
- "DATE",
25
- "EVENT",
26
- "FAC",
27
- "GPE",
28
- "LANGUAGE",
29
- "LAW",
30
- "LOC",
31
- "MONEY",
32
- "NORP",
33
- "ORDINAL",
34
- "ORG",
35
- "PERCENT",
36
- "PERSON",
37
- "PRODUCT",
38
- "QUANTITY",
39
- "TIME",
40
- "WORK_OF_ART"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  ]
42
  },
43
  "pipeline":[
44
  "tok2vec",
45
- "ner"
 
 
 
 
46
  ],
47
  "components":[
48
  "tok2vec",
49
- "ner"
 
 
 
 
50
  ],
51
  "disabled":[
52
 
53
  ],
54
  "performance":{
55
- "ents_f":0.6362633321,
56
- "ents_p":0.6721056721,
57
- "ents_r":0.6040502793,
58
- "ents_per_type":{
59
- "PRODUCT":{
60
- "p":0.3571428571,
61
- "r":0.0704225352,
62
- "f":0.1176470588
63
- },
64
- "GPE":{
65
- "p":0.7411167513,
66
- "r":0.7934782609,
67
- "f":0.7664041995
68
- },
69
- "LOC":{
70
- "p":0.7142857143,
71
- "r":0.4651162791,
72
- "f":0.5633802817
73
- },
74
- "FAC":{
75
- "p":0.6666666667,
76
- "r":0.1666666667,
77
- "f":0.2666666667
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  },
79
- "ORG":{
80
- "p":0.4651162791,
81
- "r":0.3174603175,
82
- "f":0.3773584906
83
- },
84
- "PERSON":{
85
- "p":0.6809815951,
86
- "r":0.6434782609,
87
- "f":0.6616989568
88
- },
89
- "WORK_OF_ART":{
90
- "p":0.1785714286,
91
- "r":0.4,
92
- "f":0.2469135802
93
- },
94
- "DATE":{
95
- "p":0.75,
96
- "r":0.7804878049,
97
- "f":0.764940239
98
- },
99
- "ORDINAL":{
100
- "p":0.6956521739,
101
- "r":0.5925925926,
102
- "f":0.64
103
- },
104
- "CARDINAL":{
105
- "p":0.7155963303,
106
- "r":0.75,
107
- "f":0.7323943662
108
- },
109
- "NORP":{
110
- "p":0.7894736842,
111
- "r":0.4918032787,
112
- "f":0.6060606061
113
- },
114
- "LAW":{
115
- "p":0.8,
116
- "r":0.3333333333,
117
- "f":0.4705882353
118
- },
119
- "QUANTITY":{
120
- "p":0.6666666667,
121
- "r":0.5925925926,
122
- "f":0.6274509804
123
- },
124
- "EVENT":{
125
- "p":0.6875,
126
- "r":0.2619047619,
127
- "f":0.3793103448
128
- },
129
- "PERCENT":{
130
- "p":0.5555555556,
131
- "r":0.7142857143,
132
- "f":0.625
133
- },
134
- "LANGUAGE":{
135
- "p":0.8,
136
- "r":0.9230769231,
137
- "f":0.8571428571
138
- },
139
- "MONEY":{
140
  "p":0.0,
141
  "r":0.0,
142
  "f":0.0
143
  },
144
- "TIME":{
145
- "p":0.5,
146
  "r":1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  "f":0.6666666667
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  }
149
  },
150
- "tok2vec_loss":457.675804386,
151
- "ner_loss":1277.2143377495
 
 
 
 
 
 
 
152
  },
153
  "requirements":[
154
 
 
1
  {
2
  "lang":"id",
3
  "name":"core_news_sm",
4
+ "version":"0.0.2",
5
  "description":"",
6
  "author":"",
7
  "email":"",
 
20
 
21
  ],
22
  "ner":[
23
+
24
+ ],
25
+ "tagger":[
26
+ "APP",
27
+ "ASP",
28
+ "ASP+PS3",
29
+ "ASS",
30
+ "B--",
31
+ "B--+PS3",
32
+ "CC-",
33
+ "CCONJ",
34
+ "CD-",
35
+ "CO-",
36
+ "D--",
37
+ "D--+PS3",
38
+ "F--",
39
+ "F--+PS2",
40
+ "G--",
41
+ "G--+PS3",
42
+ "H--",
43
+ "I--",
44
+ "M--",
45
+ "M--+PS3",
46
+ "NOUN",
47
+ "NPD",
48
+ "NSD",
49
+ "NSD+PS3",
50
+ "NSF",
51
+ "NSM",
52
+ "NUM",
53
+ "O--",
54
+ "PP1",
55
+ "PP2",
56
+ "PP3",
57
+ "PROPN",
58
+ "PS1",
59
+ "PS1+VSA",
60
+ "PS2",
61
+ "PS3",
62
+ "R--",
63
+ "R--+PS3",
64
+ "S--",
65
+ "SYM",
66
+ "T--",
67
+ "VERB",
68
+ "VPA",
69
+ "VSA",
70
+ "VSA+PS2",
71
+ "VSA+PS3",
72
+ "VSP",
73
+ "W--",
74
+ "X--",
75
+ "Z--"
76
+ ],
77
+ "morphologizer":[
78
+ "POS=PROPN",
79
+ "POS=AUX",
80
+ "Definite=Ind|POS=DET|PronType=Art",
81
+ "Number=Sing|POS=NOUN",
82
+ "POS=PRON|PronType=Rel",
83
+ "Mood=Ind|POS=VERB|Voice=Pass",
84
+ "POS=ADP",
85
+ "POS=PUNCT",
86
+ "POS=NOUN",
87
+ "POS=ADV",
88
+ "POS=CCONJ",
89
+ "POS=SCONJ",
90
+ "Mood=Ind|POS=VERB|Voice=Act",
91
+ "POS=VERB",
92
+ "POS=DET|PronType=Tot",
93
+ "Number=Sing|POS=PRON|Person=3|PronType=Prs",
94
+ "Number=Plur|POS=PRON|Person=3|PronType=Prs",
95
+ "POS=PRON|PronType=Prs|Reflex=Yes",
96
+ "POS=DET|PronType=Dem",
97
+ "NumType=Card|POS=NUM",
98
+ "POS=ADJ",
99
+ "Number=Plur|POS=DET|PronType=Ind",
100
+ "NumType=Card|POS=NUM|PronType=Tot",
101
+ "POS=PART|Polarity=Neg",
102
+ "POS=PRON|PronType=Int",
103
+ "NumType=Ord|POS=ADJ",
104
+ "POS=PART",
105
+ "POS=PRON|PronType=Dem",
106
+ "POS=DET|PronType=Ind",
107
+ "Number=Plur|POS=NOUN",
108
+ "Number=Sing|POS=PRON|Person=1|Polite=Form|PronType=Prs",
109
+ "POS=ADV|PronType=Int",
110
+ "Clusivity=In|Number=Plur|POS=PRON|Person=1|PronType=Prs",
111
+ "Definite=Def|POS=DET|PronType=Art",
112
+ "POS=SYM",
113
+ "Degree=Sup|POS=ADJ",
114
+ "POS=INTJ",
115
+ "Number=Sing|POS=PRON|Person=2|Polite=Infm|PronType=Prs",
116
+ "POS=ADV|PronType=Ind",
117
+ "Number=Sing|POS=PRON|Person=3|Polite=Form|PronType=Prs",
118
+ "Number=Sing|POS=PRON|Person=1|Polite=Infm|PronType=Prs",
119
+ "Number=Sing|POS=PRON|PronType=Ind",
120
+ "POS=VERB|Voice=Act",
121
+ "POS=DET|PronType=Emp",
122
+ "POS=VERB|Voice=Pass",
123
+ "POS=ADV|PronType=Dem",
124
+ "POS=NOUN|Typo=Yes",
125
+ "POS=ADP|Typo=Yes",
126
+ "Number=Plur|POS=PRON|PronType=Ind",
127
+ "POS=VERB|Typo=Yes|Voice=Pass",
128
+ "POS=X",
129
+ "POS=PRON|PronType=Tot",
130
+ "POS=SCONJ|Typo=Yes",
131
+ "Number=Plur|POS=PRON|Person=2|Polite=Infm|PronType=Prs",
132
+ "NumType=Card|POS=NUM|Typo=Yes",
133
+ "Clusivity=Ex|Number=Plur|POS=PRON|Person=1|PronType=Prs",
134
+ "Number=Sing|POS=PRON|Person=2|Polite=Form|PronType=Prs",
135
+ "Foreign=Yes|POS=X",
136
+ "POS=ADV|PronType=Rel",
137
+ "Mood=Imp|POS=VERB|Voice=Act",
138
+ "Number=Sing|POS=NOUN|Typo=Yes",
139
+ "POS=PROPN|Typo=Yes",
140
+ "POS=DET",
141
+ "Number=Sing|POS=DET|PronType=Ind",
142
+ "POS=DET|PronType=Ind|Typo=Yes",
143
+ "Abbr=Yes|POS=DET|PronType=Dem",
144
+ "POS=PRON|PronType=Ind",
145
+ "POS=VERB|Typo=Yes",
146
+ "Abbr=Yes|POS=PROPN",
147
+ "Abbr=Yes|POS=PRON|PronType=Rel",
148
+ "Number=Plur|POS=PRON|PronType=Int",
149
+ "Abbr=Yes|POS=PART|Polarity=Neg",
150
+ "POS=ADV|PronType=Tot",
151
+ "Abbr=Yes|POS=ADV",
152
+ "POS=ADV|Typo=Yes",
153
+ "POS=X|Typo=Yes",
154
+ "Number=Sing|POS=PRON|Person=2|PronType=Prs",
155
+ "POS=ADV|PronType=Int|Typo=Yes",
156
+ "NumType=Ord|POS=ADJ|Typo=Yes"
157
+ ],
158
+ "parser":[
159
+ "ROOT",
160
+ "acl",
161
+ "acl:relcl",
162
+ "advcl",
163
+ "advmod",
164
+ "advmod:emph",
165
+ "amod",
166
+ "appos",
167
+ "aux",
168
+ "case",
169
+ "case:adv",
170
+ "cc",
171
+ "ccomp",
172
+ "compound",
173
+ "conj",
174
+ "cop",
175
+ "dep",
176
+ "det",
177
+ "fixed",
178
+ "flat",
179
+ "flat:foreign",
180
+ "flat:name",
181
+ "mark",
182
+ "nmod",
183
+ "nmod:lmod",
184
+ "nmod:poss",
185
+ "nmod:tmod",
186
+ "nsubj",
187
+ "nsubj:pass",
188
+ "nummod",
189
+ "obj",
190
+ "obl",
191
+ "obl:agent",
192
+ "obl:tmod",
193
+ "parataxis",
194
+ "punct",
195
+ "xcomp"
196
  ]
197
  },
198
  "pipeline":[
199
  "tok2vec",
200
+ "ner",
201
+ "tagger",
202
+ "morphologizer",
203
+ "trainable_lemmatizer",
204
+ "parser"
205
  ],
206
  "components":[
207
  "tok2vec",
208
+ "ner",
209
+ "tagger",
210
+ "morphologizer",
211
+ "trainable_lemmatizer",
212
+ "parser"
213
  ],
214
  "disabled":[
215
 
216
  ],
217
  "performance":{
218
+ "ents_f":0.0,
219
+ "ents_p":0.0,
220
+ "ents_r":0.0,
221
+ "ents_per_type":0.0,
222
+ "tag_acc":0.9058429775,
223
+ "pos_acc":0.911077953,
224
+ "morph_acc":0.924529063,
225
+ "morph_per_feat":{
226
+ "Number":{
227
+ "p":0.978250591,
228
+ "r":0.8123282293,
229
+ "f":0.8876018876
230
+ },
231
+ "Mood":{
232
+ "p":0.9941520468,
233
+ "r":0.9147982063,
234
+ "f":0.9528257823
235
+ },
236
+ "Voice":{
237
+ "p":0.9902723735,
238
+ "r":0.9113697404,
239
+ "f":0.9491841492
240
+ },
241
+ "PronType":{
242
+ "p":0.991576414,
243
+ "r":0.7795648061,
244
+ "f":0.8728813559
245
+ },
246
+ "Polarity":{
247
+ "p":1.0,
248
+ "r":0.862745098,
249
+ "f":0.9263157895
250
+ },
251
+ "Person":{
252
+ "p":1.0,
253
+ "r":0.3973509934,
254
+ "f":0.5687203791
255
+ },
256
+ "NumType":{
257
+ "p":0.9952718676,
258
+ "r":0.9503386005,
259
+ "f":0.9722863741
260
  },
261
+ "Typo":{
262
+ "p":1.0,
263
+ "r":0.4666666667,
264
+ "f":0.6363636364
265
+ },
266
+ "Definite":{
267
+ "p":0.9838709677,
268
+ "r":0.7922077922,
269
+ "f":0.8776978417
270
+ },
271
+ "Polite":{
272
+ "p":1.0,
273
+ "r":0.65625,
274
+ "f":0.7924528302
275
+ },
276
+ "Reflex":{
277
+ "p":1.0,
278
+ "r":0.5,
279
+ "f":0.6666666667
280
+ },
281
+ "Degree":{
282
+ "p":0.9375,
283
+ "r":0.8823529412,
284
+ "f":0.9090909091
285
+ },
286
+ "Foreign":{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  "p":0.0,
288
  "r":0.0,
289
  "f":0.0
290
  },
291
+ "Clusivity":{
292
+ "p":1.0,
293
  "r":1.0,
294
+ "f":1.0
295
+ },
296
+ "Abbr":{
297
+ "p":1.0,
298
+ "r":0.2,
299
+ "f":0.3333333333
300
+ }
301
+ },
302
+ "lemma_acc":0.9356240444,
303
+ "dep_uas":0.779082277,
304
+ "dep_las":0.6971807939,
305
+ "dep_las_per_type":{
306
+ "nsubj":{
307
+ "p":0.8022759602,
308
+ "r":0.7621621622,
309
+ "f":0.7817047817
310
+ },
311
+ "compound":{
312
+ "p":0.6970849176,
313
+ "r":0.6988564168,
314
+ "f":0.6979695431
315
+ },
316
+ "root":{
317
+ "p":0.8139963168,
318
+ "r":0.7906976744,
319
+ "f":0.8021778584
320
+ },
321
+ "obj":{
322
+ "p":0.8152985075,
323
+ "r":0.7613240418,
324
+ "f":0.7873873874
325
+ },
326
+ "case":{
327
+ "p":0.9104609929,
328
+ "r":0.8868739206,
329
+ "f":0.8985126859
330
+ },
331
+ "obl":{
332
+ "p":0.7344322344,
333
+ "r":0.6178736518,
334
+ "f":0.6711297071
335
+ },
336
+ "amod":{
337
+ "p":0.6195899772,
338
+ "r":0.590021692,
339
+ "f":0.6044444444
340
+ },
341
+ "conj":{
342
+ "p":0.5868263473,
343
+ "r":0.5485074627,
344
+ "f":0.5670202507
345
+ },
346
+ "cc":{
347
+ "p":0.880239521,
348
+ "r":0.8376068376,
349
+ "f":0.8583941606
350
+ },
351
+ "acl:relcl":{
352
+ "p":0.7262357414,
353
+ "r":0.6821428571,
354
+ "f":0.7034990792
355
+ },
356
+ "flat:name":{
357
+ "p":0.7844036697,
358
+ "r":0.8123515439,
359
+ "f":0.7981330222
360
+ },
361
+ "advmod":{
362
+ "p":0.7588075881,
363
+ "r":0.6982543641,
364
+ "f":0.7272727273
365
+ },
366
+ "nmod":{
367
+ "p":0.613836478,
368
+ "r":0.5816448153,
369
+ "f":0.5973072215
370
+ },
371
+ "nsubj:pass":{
372
+ "p":0.7100840336,
373
+ "r":0.7824074074,
374
+ "f":0.7444933921
375
+ },
376
+ "det":{
377
+ "p":0.8545454545,
378
+ "r":0.7943661972,
379
+ "f":0.8233576642
380
+ },
381
+ "aux":{
382
+ "p":0.9461538462,
383
+ "r":0.9179104478,
384
+ "f":0.9318181818
385
+ },
386
+ "nmod:poss":{
387
+ "p":0.8235294118,
388
+ "r":0.0848484848,
389
+ "f":0.1538461538
390
+ },
391
+ "dep":{
392
+ "p":0.2394366197,
393
+ "r":0.3035714286,
394
+ "f":0.2677165354
395
+ },
396
+ "mark":{
397
+ "p":0.8018018018,
398
+ "r":0.7542372881,
399
+ "f":0.7772925764
400
+ },
401
+ "cop":{
402
+ "p":0.9607843137,
403
+ "r":0.9423076923,
404
+ "f":0.9514563107
405
+ },
406
+ "acl":{
407
+ "p":0.3048780488,
408
+ "r":0.2232142857,
409
+ "f":0.2577319588
410
+ },
411
+ "nummod":{
412
+ "p":0.7768817204,
413
+ "r":0.7747989276,
414
+ "f":0.7758389262
415
+ },
416
+ "appos":{
417
+ "p":0.6460176991,
418
+ "r":0.553030303,
419
+ "f":0.5959183673
420
+ },
421
+ "xcomp":{
422
+ "p":0.4495412844,
423
+ "r":0.392,
424
+ "f":0.4188034188
425
+ },
426
+ "ccomp":{
427
+ "p":0.4571428571,
428
+ "r":0.3265306122,
429
+ "f":0.380952381
430
+ },
431
+ "obl:tmod":{
432
+ "p":0.6029411765,
433
+ "r":0.6721311475,
434
+ "f":0.6356589147
435
+ },
436
+ "advcl":{
437
+ "p":0.268907563,
438
+ "r":0.2269503546,
439
+ "f":0.2461538462
440
+ },
441
+ "advmod:emph":{
442
+ "p":1.0,
443
+ "r":0.0434782609,
444
+ "f":0.0833333333
445
+ },
446
+ "case:adv":{
447
+ "p":0.7272727273,
448
+ "r":0.6153846154,
449
  "f":0.6666666667
450
+ },
451
+ "obl:agent":{
452
+ "p":0.0,
453
+ "r":0.0,
454
+ "f":0.0
455
+ },
456
+ "flat":{
457
+ "p":0.4375,
458
+ "r":0.3255813953,
459
+ "f":0.3733333333
460
+ },
461
+ "parataxis":{
462
+ "p":0.2253521127,
463
+ "r":0.3265306122,
464
+ "f":0.2666666667
465
+ },
466
+ "nmod:lmod":{
467
+ "p":1.0,
468
+ "r":0.0769230769,
469
+ "f":0.1428571429
470
+ },
471
+ "flat:foreign":{
472
+ "p":0.0,
473
+ "r":0.0,
474
+ "f":0.0
475
+ },
476
+ "nmod:tmod":{
477
+ "p":0.3333333333,
478
+ "r":0.2727272727,
479
+ "f":0.3
480
+ },
481
+ "iobj":{
482
+ "p":0.0,
483
+ "r":0.0,
484
+ "f":0.0
485
+ },
486
+ "csubj":{
487
+ "p":0.0,
488
+ "r":0.0,
489
+ "f":0.0
490
+ },
491
+ "fixed":{
492
+ "p":0.6,
493
+ "r":0.3461538462,
494
+ "f":0.4390243902
495
+ },
496
+ "discourse":{
497
+ "p":0.0,
498
+ "r":0.0,
499
+ "f":0.0
500
+ },
501
+ "cc:preconj":{
502
+ "p":0.0,
503
+ "r":0.0,
504
+ "f":0.0
505
+ },
506
+ "compound:a":{
507
+ "p":0.0,
508
+ "r":0.0,
509
+ "f":0.0
510
  }
511
  },
512
+ "sents_p":0.9074733096,
513
+ "sents_r":0.9123434705,
514
+ "sents_f":0.9099018733,
515
+ "tok2vec_loss":8342.1316462817,
516
+ "ner_loss":0.0,
517
+ "tagger_loss":572.1505841613,
518
+ "morphologizer_loss":1246.6148492694,
519
+ "trainable_lemmatizer_loss":245.6071262917,
520
+ "parser_loss":8829.155590333
521
  },
522
  "requirements":[
523
 
morphologizer/model CHANGED
Binary files a/morphologizer/model and b/morphologizer/model differ
 
ner/model CHANGED
Binary files a/ner/model and b/ner/model differ
 
ner/moves CHANGED
@@ -1 +1 @@
1
- ��moves��{"0":{},"1":{"GPE":3540,"PERSON":3236,"ORG":1843,"DATE":1451,"CARDINAL":743,"WORK_OF_ART":705,"PRODUCT":642,"EVENT":629,"LOC":491,"FAC":486,"QUANTITY":429,"NORP":427,"LANGUAGE":194,"ORDINAL":177,"LAW":137,"PERCENT":126,"TIME":103,"MONEY":65},"2":{"GPE":3540,"PERSON":3236,"ORG":1843,"DATE":1451,"CARDINAL":743,"WORK_OF_ART":705,"PRODUCT":642,"EVENT":629,"LOC":491,"FAC":486,"QUANTITY":429,"NORP":427,"LANGUAGE":194,"ORDINAL":177,"LAW":137,"PERCENT":126,"TIME":103,"MONEY":65},"3":{"GPE":3540,"PERSON":3236,"ORG":1843,"DATE":1451,"CARDINAL":743,"WORK_OF_ART":705,"PRODUCT":642,"EVENT":629,"LOC":491,"FAC":486,"QUANTITY":429,"NORP":427,"LANGUAGE":194,"ORDINAL":177,"LAW":137,"PERCENT":126,"TIME":103,"MONEY":65},"4":{"GPE":3540,"PERSON":3236,"ORG":1843,"DATE":1451,"CARDINAL":743,"WORK_OF_ART":705,"PRODUCT":642,"EVENT":629,"LOC":491,"FAC":486,"QUANTITY":429,"NORP":427,"LANGUAGE":194,"ORDINAL":177,"LAW":137,"PERCENT":126,"TIME":103,"MONEY":65,"":1},"5":{"":1}}�cfg��neg_key�
 
1
+ ��moves�3{"0":{},"1":{},"2":{},"3":{},"4":{"":1},"5":{"":1}}�cfg��neg_key�
parser/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16da2082353eec368fedc44e473009c51f5ee03be764493d78146bf2977b6645
3
  size 1750016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2738f320e714bbe52de113a1eeb00ac1cb55706a794cb67b6fc3442a29ec0e0
3
  size 1750016
tagger/model CHANGED
Binary files a/tagger/model and b/tagger/model differ
 
tok2vec/model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2f4155df88b3296a30602bab1237736752867c37007f3d4b05233c2e0c46780
3
  size 34126801
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97122c73406442748f5a28b415d61067a62dcbda60fdad337f0a9a9917d12e93
3
  size 34126801
trainable_lemmatizer/model CHANGED
Binary files a/trainable_lemmatizer/model and b/trainable_lemmatizer/model differ
 
vocab/strings.json CHANGED
The diff for this file is too large to render. See raw diff