|
SPACY_LANGUAGE_MAPPER = { |
|
"ca": "ca_core_news_sm", |
|
"da": "da_core_news_sm", |
|
"de": "de_core_news_sm", |
|
"el": "el_core_news_sm", |
|
"en": "en_core_web_sm", |
|
"es": "es_core_news_sm", |
|
"fr": "fr_core_news_sm", |
|
"it": "it_core_news_sm", |
|
"ja": "ja_core_news_sm", |
|
"lt": "lt_core_news_sm", |
|
"mk": "mk_core_news_sm", |
|
"nb": "nb_core_news_sm", |
|
"nl": "nl_core_news_sm", |
|
"pl": "pl_core_news_sm", |
|
"pt": "pt_core_news_sm", |
|
"ro": "ro_core_news_sm", |
|
"ru": "ru_core_news_sm", |
|
"xx": "xx_sent_ud_sm", |
|
"zh": "zh_core_web_sm", |
|
"ca_core_news_sm": "ca_core_news_sm", |
|
"ca_core_news_md": "ca_core_news_md", |
|
"ca_core_news_lg": "ca_core_news_lg", |
|
"ca_core_news_trf": "ca_core_news_trf", |
|
"da_core_news_sm": "da_core_news_sm", |
|
"da_core_news_md": "da_core_news_md", |
|
"da_core_news_lg": "da_core_news_lg", |
|
"da_core_news_trf": "da_core_news_trf", |
|
"de_core_news_sm": "de_core_news_sm", |
|
"de_core_news_md": "de_core_news_md", |
|
"de_core_news_lg": "de_core_news_lg", |
|
"de_dep_news_trf": "de_dep_news_trf", |
|
"el_core_news_sm": "el_core_news_sm", |
|
"el_core_news_md": "el_core_news_md", |
|
"el_core_news_lg": "el_core_news_lg", |
|
"en_core_web_sm": "en_core_web_sm", |
|
"en_core_web_md": "en_core_web_md", |
|
"en_core_web_lg": "en_core_web_lg", |
|
"en_core_web_trf": "en_core_web_trf", |
|
"es_core_news_sm": "es_core_news_sm", |
|
"es_core_news_md": "es_core_news_md", |
|
"es_core_news_lg": "es_core_news_lg", |
|
"es_dep_news_trf": "es_dep_news_trf", |
|
"fr_core_news_sm": "fr_core_news_sm", |
|
"fr_core_news_md": "fr_core_news_md", |
|
"fr_core_news_lg": "fr_core_news_lg", |
|
"fr_dep_news_trf": "fr_dep_news_trf", |
|
"it_core_news_sm": "it_core_news_sm", |
|
"it_core_news_md": "it_core_news_md", |
|
"it_core_news_lg": "it_core_news_lg", |
|
"ja_core_news_sm": "ja_core_news_sm", |
|
"ja_core_news_md": "ja_core_news_md", |
|
"ja_core_news_lg": "ja_core_news_lg", |
|
"ja_dep_news_trf": "ja_dep_news_trf", |
|
"lt_core_news_sm": "lt_core_news_sm", |
|
"lt_core_news_md": "lt_core_news_md", |
|
"lt_core_news_lg": "lt_core_news_lg", |
|
"mk_core_news_sm": "mk_core_news_sm", |
|
"mk_core_news_md": "mk_core_news_md", |
|
"mk_core_news_lg": "mk_core_news_lg", |
|
"nb_core_news_sm": "nb_core_news_sm", |
|
"nb_core_news_md": "nb_core_news_md", |
|
"nb_core_news_lg": "nb_core_news_lg", |
|
"nl_core_news_sm": "nl_core_news_sm", |
|
"nl_core_news_md": "nl_core_news_md", |
|
"nl_core_news_lg": "nl_core_news_lg", |
|
"pl_core_news_sm": "pl_core_news_sm", |
|
"pl_core_news_md": "pl_core_news_md", |
|
"pl_core_news_lg": "pl_core_news_lg", |
|
"pt_core_news_sm": "pt_core_news_sm", |
|
"pt_core_news_md": "pt_core_news_md", |
|
"pt_core_news_lg": "pt_core_news_lg", |
|
"ro_core_news_sm": "ro_core_news_sm", |
|
"ro_core_news_md": "ro_core_news_md", |
|
"ro_core_news_lg": "ro_core_news_lg", |
|
"ru_core_news_sm": "ru_core_news_sm", |
|
"ru_core_news_md": "ru_core_news_md", |
|
"ru_core_news_lg": "ru_core_news_lg", |
|
"xx_ent_wiki_sm": "xx_ent_wiki_sm", |
|
"xx_sent_ud_sm": "xx_sent_ud_sm", |
|
"zh_core_web_sm": "zh_core_web_sm", |
|
"zh_core_web_md": "zh_core_web_md", |
|
"zh_core_web_lg": "zh_core_web_lg", |
|
"zh_core_web_trf": "zh_core_web_trf", |
|
} |
|
|
|
from relik.inference.data.tokenizers.spacy_tokenizer import SpacyTokenizer |
|
|