dchaplinsky
commited on
Commit
•
d8780ff
1
Parent(s):
5b794a2
Update README.md
Browse files
README.md
CHANGED
@@ -58,5 +58,66 @@ Results:
|
|
58 |
The model was fine-tuned on the [Ukrainian (UD) corpus](https://universaldependencies.org/treebanks/uk_iu/index.html), released by the [non-profit organization Institute for Ukrainian](https://mova.institute).
|
59 |
Training code is also available [here](https://github.com/lang-uk/flair-pos).
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
Copyright: [Dmytro Chaplynskyi](https://twitter.com/dchaplinsky), [lang-uk project](https://lang.org.ua), 2022
|
|
|
58 |
The model was fine-tuned on the [Ukrainian (UD) corpus](https://universaldependencies.org/treebanks/uk_iu/index.html), released by the [non-profit organization Institute for Ukrainian](https://mova.institute).
|
59 |
Training code is also available [here](https://github.com/lang-uk/flair-pos).
|
60 |
|
61 |
+
## [Usage demo](https://github.com/egorsmkv/flair-nlp-uk/blob/main/part_of_speech.py)
|
62 |
+
```python
|
63 |
+
from flair.data import Sentence
|
64 |
+
from flair.models import SequenceTagger
|
65 |
+
|
66 |
+
from pprint import pprint
|
67 |
+
|
68 |
+
tagger = SequenceTagger.load("dchaplinsky/flair-uk-pos")
|
69 |
+
|
70 |
+
sentence = Sentence("Я люблю Україну. Моє імʼя Марія Шевченко, я навчаюся в Київській політехніці.")
|
71 |
+
|
72 |
+
tagger.predict(sentence)
|
73 |
+
|
74 |
+
print(sentence)
|
75 |
+
print('---')
|
76 |
+
|
77 |
+
print('The following POS tags are found:')
|
78 |
+
|
79 |
+
pos_items = []
|
80 |
+
for label in sentence.get_labels():
|
81 |
+
all_labels = []
|
82 |
+
keys = label.data_point.annotation_layers.keys()
|
83 |
+
|
84 |
+
for key in keys:
|
85 |
+
all_labels.extend(
|
86 |
+
[
|
87 |
+
{'label': label.value, 'score': round(label.score, 4)}
|
88 |
+
for label in label.data_point.get_labels(key)
|
89 |
+
if label.data_point == label
|
90 |
+
]
|
91 |
+
)
|
92 |
+
|
93 |
+
pos_items.append({
|
94 |
+
'text': label.data_point.text,
|
95 |
+
'all_labels': all_labels,
|
96 |
+
})
|
97 |
+
|
98 |
+
pprint(pos_items)
|
99 |
+
|
100 |
+
# Result:
|
101 |
+
"""
|
102 |
+
Sentence: "Я люблю Україну . Моє імʼя Марія Шевченко , я навчаюся в Київській політехніці ." → ["Я"/PRON, "люблю"/VERB, "Україну"/PROPN, "."/PUNCT, "Моє"/DET, "імʼя"/NOUN, "Марія"/PROPN, "Шевченко"/PROPN, ","/PUNCT, "я"/PRON, "навчаюся"/VERB, "в"/ADP, "Київській"/ADJ, "політехніці"/NOUN, "."/PUNCT]
|
103 |
+
---
|
104 |
+
The following POS tags are found:
|
105 |
+
[{'all_labels': [{'label': 'PRON', 'score': 1.0}], 'text': 'Я'},
|
106 |
+
{'all_labels': [{'label': 'VERB', 'score': 1.0}], 'text': 'люблю'},
|
107 |
+
{'all_labels': [{'label': 'PROPN', 'score': 1.0}], 'text': 'Україну'},
|
108 |
+
{'all_labels': [{'label': 'PUNCT', 'score': 1.0}], 'text': '.'},
|
109 |
+
{'all_labels': [{'label': 'DET', 'score': 0.9999}], 'text': 'Моє'},
|
110 |
+
{'all_labels': [{'label': 'NOUN', 'score': 1.0}], 'text': 'імʼя'},
|
111 |
+
{'all_labels': [{'label': 'PROPN', 'score': 1.0}], 'text': 'Марія'},
|
112 |
+
{'all_labels': [{'label': 'PROPN', 'score': 1.0}], 'text': 'Шевченко'},
|
113 |
+
{'all_labels': [{'label': 'PUNCT', 'score': 1.0}], 'text': ','},
|
114 |
+
{'all_labels': [{'label': 'PRON', 'score': 1.0}], 'text': 'я'},
|
115 |
+
{'all_labels': [{'label': 'VERB', 'score': 1.0}], 'text': 'навчаюся'},
|
116 |
+
{'all_labels': [{'label': 'ADP', 'score': 1.0}], 'text': 'в'},
|
117 |
+
{'all_labels': [{'label': 'ADJ', 'score': 1.0}], 'text': 'Київській'},
|
118 |
+
{'all_labels': [{'label': 'NOUN', 'score': 1.0}], 'text': 'політехніці'},
|
119 |
+
{'all_labels': [{'label': 'PUNCT', 'score': 1.0}], 'text': '.'}]
|
120 |
+
"""
|
121 |
+
```
|
122 |
|
123 |
Copyright: [Dmytro Chaplynskyi](https://twitter.com/dchaplinsky), [lang-uk project](https://lang.org.ua), 2022
|