Ling / utils /pos_helpers.py
Nam Fam
update files
ea99abb
# POS helpers and constants
POS_MODELS = [
"gemini-2.0-flash" # Only allow gemini-2.0-flash for now
# "gpt-4",
# "claude-2",
# "vblagoje/bert-english-uncased-finetuned-pos",
# "QCRI/bert-base-multilingual-cased-pos-english"
]
DEFAULT_MODEL = "gemini-2.0-flash"
STANDARD_POS_TAGS = [
"ADJ", "ADP", "ADV", "AUX", "CONJ", "CCONJ", "DET", "INTJ", "NOUN",
"NUM", "PART", "PRON", "PROPN", "PUNCT", "SCONJ", "SYM", "VERB", "X"
]
POS_TAG_DESCRIPTIONS = {
"ADJ": "Adjective (big, old, green, interesting)",
"ADP": "Adposition (in, to, during)",
"ADV": "Adverb (very, well, there, tomorrow)",
"AUX": "Auxiliary verb (is, has (done), will (do), should (do))",
"CCONJ": "Coordinating conjunction (and, or, but)",
"DET": "Determiner (a, an, the, this, those)",
"INTJ": "Interjection (oh, hey, oops, hmm)",
"NOUN": "Noun (dog, cat, man, house, idea)",
"NUM": "Numeral (one, two, 3, 55, 2019)",
"PART": "Particle (not, 's, let's)",
"PRON": "Pronoun (I, you, he, she, it, we, they, me, him, her, us, them)",
"PROPN": "Proper noun (John, Mary, London, Microsoft)",
"PUNCT": "Punctuation (.,!?;:)",
"SCONJ": "Subordinating conjunction (if, because, as, that)",
"SYM": "Symbol (%, $, §, ©)",
"VERB": "Verb (run, runs, running, eat, ate, eaten)",
"X": "Other (foreign words, typos, etc.)"
}
DEFAULT_SELECTED_TAGS = list(POS_TAG_DESCRIPTIONS.keys())