alanakbik commited on
Commit
3ca1c76
1 Parent(s): 1f5fb36

initial model commit

Browse files
Files changed (1) hide show
  1. README.md +22 -11
README.md CHANGED
@@ -104,16 +104,26 @@ So, the words "*Ich*" and "*they*" are labeled as **pronouns** (PRON), while "*l
104
  The following Flair script was used to train this model:
105
 
106
  ```python
107
- from flair.data import Corpus
108
- from flair.datasets import ColumnCorpus
109
- from flair.embeddings import WordEmbeddings, StackedEmbeddings, FlairEmbeddings
110
-
111
- # 1. load the corpus (Ontonotes does not ship with Flair, you need to download and reformat into a column format yourself)
112
- corpus: Corpus = ColumnCorpus(
113
- "resources/tasks/onto-ner",
114
- column_format={0: "text", 1: "pos", 2: "upos", 3: "ner"},
115
- tag_to_bioes="ner",
116
- )
 
 
 
 
 
 
 
 
 
 
117
 
118
  # 2. what tag do we want to predict?
119
  tag_type = 'upos'
@@ -140,7 +150,8 @@ from flair.models import SequenceTagger
140
  tagger = SequenceTagger(hidden_size=256,
141
  embeddings=embeddings,
142
  tag_dictionary=tag_dictionary,
143
- tag_type=tag_type)
 
144
 
145
  # 6. initialize trainer
146
  from flair.trainers import ModelTrainer
 
104
  The following Flair script was used to train this model:
105
 
106
  ```python
107
+ from flair.data import MultiCorpus
108
+ from flair.datasets import UD_ENGLISH, UD_GERMAN, UD_FRENCH, UD_ITALIAN, UD_POLISH, UD_DUTCH, UD_CZECH, \
109
+ UD_DANISH, UD_SPANISH, UD_SWEDISH, UD_NORWEGIAN, UD_FINNISH
110
+ from flair.embeddings import StackedEmbeddings, FlairEmbeddings
111
+
112
+ # 1. make a multi corpus consisting of 12 UD treebanks (in_memory=False here because this corpus becomes large)
113
+ corpus = MultiCorpus([
114
+ UD_ENGLISH(in_memory=False),
115
+ UD_GERMAN(in_memory=False),
116
+ UD_DUTCH(in_memory=False),
117
+ UD_FRENCH(in_memory=False),
118
+ UD_ITALIAN(in_memory=False),
119
+ UD_SPANISH(in_memory=False),
120
+ UD_POLISH(in_memory=False),
121
+ UD_CZECH(in_memory=False),
122
+ UD_DANISH(in_memory=False),
123
+ UD_SWEDISH(in_memory=False),
124
+ UD_NORWEGIAN(in_memory=False),
125
+ UD_FINNISH(in_memory=False),
126
+ ])
127
 
128
  # 2. what tag do we want to predict?
129
  tag_type = 'upos'
 
150
  tagger = SequenceTagger(hidden_size=256,
151
  embeddings=embeddings,
152
  tag_dictionary=tag_dictionary,
153
+ tag_type=tag_type,
154
+ use_crf=False)
155
 
156
  # 6. initialize trainer
157
  from flair.trainers import ModelTrainer