File size: 706 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
"""Define constant values used across the project."""


class DefaultTokens(object):
    PAD = '<blank>'
    BOS = '<s>'
    EOS = '</s>'
    UNK = '<unk>'
    MASK = '<mask>'
    VOCAB_PAD = 'averyunlikelytoken'
    SENT_FULL_STOPS = [".", "?", "!"]
    PHRASE_TABLE_SEPARATOR = '|||'
    ALIGNMENT_SEPARATOR = ' ||| '


class CorpusName(object):
    VALID = 'valid'
    TRAIN = 'train'
    SAMPLE = 'sample'


class SubwordMarker(object):
    SPACER = '▁'
    JOINER = '■'
    BEGIN_UPPERCASE = "⦅mrk_begin_case_region_U⦆"
    END_UPPERCASE = "⦅mrk_end_case_region_U⦆"
    BEGIN_CASED = "⦅mrk_case_modifier_C⦆"


class ModelTask(object):
    LANGUAGE_MODEL = 'lm'
    SEQ2SEQ = 'seq2seq'