flair
/

upos-multi

alanakbik commited on Feb 23, 2021

Commit

3ca1c76

1 Parent(s): 1f5fb36

initial model commit

Files changed (1) hide show

README.md CHANGED Viewed

@@ -104,16 +104,26 @@ So, the words "*Ich*" and "*they*" are labeled as **pronouns** (PRON), while "*l
 The following Flair script was used to train this model:
 ```python
-from flair.data import Corpus
-from flair.datasets import ColumnCorpus
-from flair.embeddings import WordEmbeddings, StackedEmbeddings, FlairEmbeddings
-# 1. load the corpus (Ontonotes does not ship with Flair, you need to download and reformat into a column format yourself)
-corpus: Corpus = ColumnCorpus(
-                "resources/tasks/onto-ner",
-                column_format={0: "text", 1: "pos", 2: "upos", 3: "ner"},
-                tag_to_bioes="ner",
-            )
 # 2. what tag do we want to predict?
 tag_type = 'upos'
@@ -140,7 +150,8 @@ from flair.models import SequenceTagger
 tagger = SequenceTagger(hidden_size=256,
                         embeddings=embeddings,
                         tag_dictionary=tag_dictionary,
-                        tag_type=tag_type)
 # 6. initialize trainer
 from flair.trainers import ModelTrainer

 The following Flair script was used to train this model:
 ```python
+from flair.data import MultiCorpus
+from flair.datasets import UD_ENGLISH, UD_GERMAN, UD_FRENCH, UD_ITALIAN, UD_POLISH, UD_DUTCH, UD_CZECH, \
+    UD_DANISH, UD_SPANISH, UD_SWEDISH, UD_NORWEGIAN, UD_FINNISH
+from flair.embeddings import StackedEmbeddings, FlairEmbeddings
+# 1. make a multi corpus consisting of 12 UD treebanks (in_memory=False here because this corpus becomes large)
+corpus = MultiCorpus([
+    UD_ENGLISH(in_memory=False),
+    UD_GERMAN(in_memory=False),
+    UD_DUTCH(in_memory=False),
+    UD_FRENCH(in_memory=False),
+    UD_ITALIAN(in_memory=False),
+    UD_SPANISH(in_memory=False),
+    UD_POLISH(in_memory=False),
+    UD_CZECH(in_memory=False),
+    UD_DANISH(in_memory=False),
+    UD_SWEDISH(in_memory=False),
+    UD_NORWEGIAN(in_memory=False),
+    UD_FINNISH(in_memory=False),
+])
 # 2. what tag do we want to predict?
 tag_type = 'upos'
 tagger = SequenceTagger(hidden_size=256,
                         embeddings=embeddings,
                         tag_dictionary=tag_dictionary,
+                        tag_type=tag_type,
+                        use_crf=False)
 # 6. initialize trainer
 from flair.trainers import ModelTrainer