Update spaCy pipeline
Browse files- README.md +18 -18
- attribute_ruler/patterns +0 -0
- config.cfg +2 -0
- en_skillner-any-py3-none-any.whl +2 -2
- meta.json +38 -38
- ner/model +1 -1
- ner/moves +1 -1
- tagger/cfg +1 -0
- vocab/lookups.bin +2 -2
README.md
CHANGED
@@ -14,25 +14,25 @@ model-index:
|
|
14 |
metrics:
|
15 |
- name: NER Precision
|
16 |
type: precision
|
17 |
-
value: 0.
|
18 |
- name: NER Recall
|
19 |
type: recall
|
20 |
-
value: 0.
|
21 |
- name: NER F Score
|
22 |
type: f_score
|
23 |
-
value: 0.
|
24 |
---
|
25 |
A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.
|
26 |
|
27 |
| Feature | Description |
|
28 |
| --- | --- |
|
29 |
| **Name** | `en_skillner` |
|
30 |
-
| **Version** | `3.
|
31 |
-
| **spaCy** | `>=3.
|
32 |
| **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
|
33 |
| **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
|
34 |
| **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
|
35 |
-
| **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br
|
36 |
| **License** | `MIT` |
|
37 |
| **Author** | [nestauk](https://explosion.ai) |
|
38 |
|
@@ -52,15 +52,15 @@ A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT
|
|
52 |
|
53 |
| Type | Score |
|
54 |
| --- | --- |
|
55 |
-
| `ENTS_P` | 59.
|
56 |
-
| `ENTS_R` | 57.
|
57 |
-
| `ENTS_F` | 58.
|
58 |
-
| `SKILL_P` | 72.
|
59 |
-
| `SKILL_R` | 72.
|
60 |
-
| `SKILL_F` | 72.
|
61 |
-
| `EXPERIENCE_P` |
|
62 |
-
| `EXPERIENCE_R` |
|
63 |
-
| `EXPERIENCE_F` |
|
64 |
-
| `BENEFIT_P` |
|
65 |
-
| `BENEFIT_R` |
|
66 |
-
| `BENEFIT_F` |
|
|
|
14 |
metrics:
|
15 |
- name: NER Precision
|
16 |
type: precision
|
17 |
+
value: 0.5919354839
|
18 |
- name: NER Recall
|
19 |
type: recall
|
20 |
+
value: 0.5758368201
|
21 |
- name: NER F Score
|
22 |
type: f_score
|
23 |
+
value: 0.5837751856
|
24 |
---
|
25 |
A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.
|
26 |
|
27 |
| Feature | Description |
|
28 |
| --- | --- |
|
29 |
| **Name** | `en_skillner` |
|
30 |
+
| **Version** | `3.7.1` |
|
31 |
+
| **spaCy** | `>=3.7.4,<3.8.0` |
|
32 |
| **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
|
33 |
| **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
|
34 |
| **Vectors** | 514157 keys, 514157 unique vectors (300 dimensions) |
|
35 |
+
| **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br>[ClearNLP Constituent-to-Dependency Conversion](https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md) (Emory University)<br>[WordNet 3.0](https://wordnet.princeton.edu/) (Princeton University)<br>[Explosion Vectors (OSCAR 2109 + Wikipedia + OpenSubtitles + WMT News Crawl)](https://github.com/explosion/spacy-vectors-builder) (Explosion) |
|
36 |
| **License** | `MIT` |
|
37 |
| **Author** | [nestauk](https://explosion.ai) |
|
38 |
|
|
|
52 |
|
53 |
| Type | Score |
|
54 |
| --- | --- |
|
55 |
+
| `ENTS_P` | 59.19 |
|
56 |
+
| `ENTS_R` | 57.58 |
|
57 |
+
| `ENTS_F` | 58.38 |
|
58 |
+
| `SKILL_P` | 72.19 |
|
59 |
+
| `SKILL_R` | 72.62 |
|
60 |
+
| `SKILL_F` | 72.40 |
|
61 |
+
| `EXPERIENCE_P` | 52.14 |
|
62 |
+
| `EXPERIENCE_R` | 41.48 |
|
63 |
+
| `EXPERIENCE_F` | 46.20 |
|
64 |
+
| `BENEFIT_P` | 75.61 |
|
65 |
+
| `BENEFIT_R` | 46.27 |
|
66 |
+
| `BENEFIT_F` | 57.41 |
|
attribute_ruler/patterns
CHANGED
Binary files a/attribute_ruler/patterns and b/attribute_ruler/patterns differ
|
|
config.cfg
CHANGED
@@ -17,6 +17,7 @@ after_creation = null
|
|
17 |
after_pipeline_creation = null
|
18 |
batch_size = 256
|
19 |
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
|
|
20 |
|
21 |
[components]
|
22 |
|
@@ -116,6 +117,7 @@ maxout_pieces = 2
|
|
116 |
|
117 |
[components.tagger]
|
118 |
factory = "tagger"
|
|
|
119 |
neg_prefix = "!"
|
120 |
overwrite = false
|
121 |
scorer = {"@scorers":"spacy.tagger_scorer.v1"}
|
|
|
17 |
after_pipeline_creation = null
|
18 |
batch_size = 256
|
19 |
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
20 |
+
vectors = {"@vectors":"spacy.Vectors.v1"}
|
21 |
|
22 |
[components]
|
23 |
|
|
|
117 |
|
118 |
[components.tagger]
|
119 |
factory = "tagger"
|
120 |
+
label_smoothing = 0.0
|
121 |
neg_prefix = "!"
|
122 |
overwrite = false
|
123 |
scorer = {"@scorers":"spacy.tagger_scorer.v1"}
|
en_skillner-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b840315f54eda471ecbeece805f216c4c25b519a88a6962084730d7803c54d1
|
3 |
+
size 587689424
|
meta.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
{
|
2 |
"lang":"en",
|
3 |
"name":"skillner",
|
4 |
-
"version":"3.
|
5 |
"description":"A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.",
|
6 |
"author":"nestauk",
|
7 |
"email":"[email protected]",
|
8 |
"url":"https://explosion.ai",
|
9 |
"license":"MIT",
|
10 |
-
"spacy_version":">=3.
|
11 |
-
"spacy_git_version":"
|
12 |
"vectors":{
|
13 |
"width":300,
|
14 |
"vectors":514157,
|
@@ -43,54 +43,54 @@
|
|
43 |
"senter"
|
44 |
],
|
45 |
"performance":{
|
46 |
-
"ents_p":0.
|
47 |
-
"ents_r":0.
|
48 |
-
"ents_f":0.
|
49 |
-
"skill_p":0.
|
50 |
-
"skill_r":0.
|
51 |
-
"skill_f":0.
|
52 |
-
"experience_p":0.
|
53 |
-
"experience_r":0.
|
54 |
-
"experience_f":0.
|
55 |
-
"benefit_p":0.
|
56 |
-
"benefit_r":0.
|
57 |
-
"benefit_f":0.
|
58 |
"ents_per_type":{
|
59 |
"SKILL":{
|
60 |
-
"correct":
|
61 |
"incorrect":32,
|
62 |
"partial":0,
|
63 |
-
"missed":
|
64 |
-
"spurious":
|
65 |
"possible":1669,
|
66 |
-
"actual":
|
67 |
-
"precision":0.
|
68 |
-
"recall":0.
|
69 |
-
"f1":0.
|
70 |
},
|
71 |
"EXPERIENCE":{
|
72 |
-
"correct":
|
73 |
-
"incorrect":
|
74 |
"partial":0,
|
75 |
-
"missed":
|
76 |
-
"spurious":
|
77 |
"possible":176,
|
78 |
-
"actual":
|
79 |
-
"precision":0.
|
80 |
-
"recall":0.
|
81 |
-
"f1":0.
|
82 |
},
|
83 |
"BENEFIT":{
|
84 |
-
"correct":
|
85 |
-
"incorrect":
|
86 |
"partial":0,
|
87 |
-
"missed":
|
88 |
-
"spurious":
|
89 |
"possible":67,
|
90 |
-
"actual":
|
91 |
-
"precision":0.
|
92 |
-
"recall":0.
|
93 |
-
"f1":0.
|
94 |
}
|
95 |
}
|
96 |
},
|
|
|
1 |
{
|
2 |
"lang":"en",
|
3 |
"name":"skillner",
|
4 |
+
"version":"3.7.1",
|
5 |
"description":"A Named Entity Recognition (NER) model to extract SKILL, EXPERIENCE and BENEFIT from job adverts.",
|
6 |
"author":"nestauk",
|
7 |
"email":"[email protected]",
|
8 |
"url":"https://explosion.ai",
|
9 |
"license":"MIT",
|
10 |
+
"spacy_version":">=3.7.4,<3.8.0",
|
11 |
+
"spacy_git_version":"bd2c17e20",
|
12 |
"vectors":{
|
13 |
"width":300,
|
14 |
"vectors":514157,
|
|
|
43 |
"senter"
|
44 |
],
|
45 |
"performance":{
|
46 |
+
"ents_p":0.5919354839,
|
47 |
+
"ents_r":0.5758368201,
|
48 |
+
"ents_f":0.5837751856,
|
49 |
+
"skill_p":0.721858249,
|
50 |
+
"skill_r":0.7261833433,
|
51 |
+
"skill_f":0.7240143369,
|
52 |
+
"experience_p":0.5214285714,
|
53 |
+
"experience_r":0.4147727273,
|
54 |
+
"experience_f":0.4620253165,
|
55 |
+
"benefit_p":0.756097561,
|
56 |
+
"benefit_r":0.4626865672,
|
57 |
+
"benefit_f":0.5740740741,
|
58 |
"ents_per_type":{
|
59 |
"SKILL":{
|
60 |
+
"correct":1212,
|
61 |
"incorrect":32,
|
62 |
"partial":0,
|
63 |
+
"missed":425,
|
64 |
+
"spurious":435,
|
65 |
"possible":1669,
|
66 |
+
"actual":1679,
|
67 |
+
"precision":0.721858249,
|
68 |
+
"recall":0.7261833433,
|
69 |
+
"f1":0.7240143369
|
70 |
},
|
71 |
"EXPERIENCE":{
|
72 |
+
"correct":73,
|
73 |
+
"incorrect":39,
|
74 |
"partial":0,
|
75 |
+
"missed":64,
|
76 |
+
"spurious":28,
|
77 |
"possible":176,
|
78 |
+
"actual":140,
|
79 |
+
"precision":0.5214285714,
|
80 |
+
"recall":0.4147727273,
|
81 |
+
"f1":0.4620253165
|
82 |
},
|
83 |
"BENEFIT":{
|
84 |
+
"correct":31,
|
85 |
+
"incorrect":4,
|
86 |
"partial":0,
|
87 |
+
"missed":32,
|
88 |
+
"spurious":6,
|
89 |
"possible":67,
|
90 |
+
"actual":41,
|
91 |
+
"precision":0.756097561,
|
92 |
+
"recall":0.4626865672,
|
93 |
+
"f1":0.5740740741
|
94 |
}
|
95 |
}
|
96 |
},
|
ner/model
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6384063
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:feadcb06a1ff7169640e3394e886c696675ccc6d651ebe5bf862fa97d761e0fe
|
3 |
size 6384063
|
ner/moves
CHANGED
@@ -1 +1 @@
|
|
1 |
-
��moves��{"0":{},"1":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"
|
|
|
1 |
+
��moves��{"0":{},"1":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"SKILL":-1,"EXPERIENCE":-2,"BENEFIT":-3},"2":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"SKILL":-1,"EXPERIENCE":-2,"BENEFIT":-3},"3":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"SKILL":-1,"EXPERIENCE":-2,"BENEFIT":-3},"4":{"ORG":56516,"DATE":40493,"PERSON":36534,"GPE":26745,"MONEY":15158,"CARDINAL":14109,"NORP":9641,"PERCENT":9199,"WORK_OF_ART":4488,"LOC":4055,"TIME":3678,"QUANTITY":3123,"FAC":3046,"EVENT":3021,"ORDINAL":2142,"PRODUCT":1787,"LAW":1624,"LANGUAGE":355,"":1,"SKILL":-1,"EXPERIENCE":-2,"BENEFIT":-3},"5":{"":1}}�cfg��neg_key�
|
tagger/cfg
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"labels":[
|
3 |
"$",
|
4 |
"''",
|
|
|
1 |
{
|
2 |
+
"label_smoothing":0.0,
|
3 |
"labels":[
|
4 |
"$",
|
5 |
"''",
|
vocab/lookups.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fce9c883c56165f29573cc938c2a1c9d417ac61bd8f56b671dd5f7996de70682
|
3 |
+
size 70040
|