lang-word-tokenizers / data /Austro-Asiatic.json
guipenedo's picture
guipenedo HF staff
added khmer, tibetan and lao
baa687b unverified
{
"name": "Austro-Asiatic",
"depth": 0,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Mon-Khmer",
"depth": 1,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Aslian",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Jah Hut",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Jah Hut",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "jah",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1167",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1166",
"native_tokenizers": [],
"scripts": []
},
{
"name": "North Aslian",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Chewong",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Cheq Wong",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "cwg",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1170",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1169",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Eastern",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Batek",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "btq",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1172",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Jehai",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "jhi",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1173",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Minriq",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "mnq",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1174",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Mintil",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "mzt",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1175",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1171",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Tonga",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Ten\u2019edn",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "tnz",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1177",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1176",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Western",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kintaq",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "knq",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1179",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kensiu",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "kns",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1180",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1178",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1168",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Senoic",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Lanoh",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "lnh",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1182",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Sab\u00fcm",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "sbo",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1183",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Semai",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "sea",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1184",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Semnam",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "ssm",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1185",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Temiar",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "tea",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1186",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1181",
"native_tokenizers": [],
"scripts": []
},
{
"name": "South Aslian",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Mah Meri",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "mhe",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1188",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Semelai",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "sza",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1189",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Semaq Beri",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "szc",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1190",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Temoq",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "tmo",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1191",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1187",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1165",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Eastern Mon-Khmer",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Bahnaric",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Central Bahnaric",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Alak",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "alk",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1195",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Bahnar",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "bdq",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1196",
"native_tokenizers": [],
"scripts": [
"Latn"
]
},
{
"name": "Romam",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "rmx",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1197",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Tampuan",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "tpu",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1198",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1194",
"native_tokenizers": [],
"scripts": []
},
{
"name": "East Bahnaric",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Cua",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "cua",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1200",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1199",
"native_tokenizers": [],
"scripts": []
},
{
"name": "North Bahnaric",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Katua",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "kta",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1202",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kachok",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "xkk",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1203",
"native_tokenizers": [],
"scripts": []
},
{
"name": "East",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kayong",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "kxy",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1205",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Takua",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "tkz",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1206",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1204",
"native_tokenizers": [],
"scripts": []
},
{
"name": "West",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Trieng",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "stg",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1208",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Talieng",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "tdf",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1209",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Duan",
"depth": 6,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Halang Doan",
"depth": 7,
"iso_1_code": null,
"iso_3_code": "hld",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1211",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1210",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Jeh-Halang",
"depth": 6,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Halang",
"depth": 7,
"iso_1_code": null,
"iso_3_code": "hal",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1213",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Jeh",
"depth": 7,
"iso_1_code": null,
"iso_3_code": "jeh",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1214",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1212",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Rengao",
"depth": 6,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Rengao",
"depth": 7,
"iso_1_code": null,
"iso_3_code": "ren",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1216",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1215",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Sedang-Todrah",
"depth": 6,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Sedang",
"depth": 7,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Hre",
"depth": 8,
"iso_1_code": null,
"iso_3_code": "hre",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1219",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Sedang",
"depth": 8,
"iso_1_code": null,
"iso_3_code": "sed",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1220",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1218",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Todrah-Monom",
"depth": 7,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Monom",
"depth": 8,
"iso_1_code": null,
"iso_3_code": "moo",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1222",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Todrah",
"depth": 8,
"iso_1_code": null,
"iso_3_code": "tdr",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1223",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1221",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1217",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1207",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1201",
"native_tokenizers": [],
"scripts": []
},
{
"name": "South Bahnaric",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Stieng, Budeh",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "stt",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1225",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Sre-Mnong",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Mnong",
"depth": 6,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Eastern Mnong",
"depth": 7,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Mnong, Eastern",
"depth": 8,
"iso_1_code": null,
"iso_3_code": "mng",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1229",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1228",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Southern-Central Mnong",
"depth": 7,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Mnong, Central",
"depth": 8,
"iso_1_code": null,
"iso_3_code": "cmo",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {
"Khmr": {
"full_object": "KhmerTokenizer()",
"original_lang_name": "khmer",
"original_lang_code": "khm",
"script": "Khmr",
"class_name": "KhmerTokenizer"
},
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1231",
"native_tokenizers": [],
"scripts": [
"Latn",
"Khmr"
]
},
{
"name": "Mnong, Southern",
"depth": 8,
"iso_1_code": null,
"iso_3_code": "mnn",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1232",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kraol",
"depth": 8,
"iso_1_code": null,
"iso_3_code": "rka",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1233",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Khmr": {
"full_object": "KhmerTokenizer()",
"original_lang_name": "khmer",
"original_lang_code": "khm",
"script": "Khmr",
"class_name": "KhmerTokenizer"
},
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1230",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Khmr": {
"full_object": "KhmerTokenizer()",
"original_lang_name": "khmer",
"original_lang_code": "khm",
"script": "Khmr",
"class_name": "KhmerTokenizer"
},
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1227",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Sre",
"depth": 6,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Maa",
"depth": 7,
"iso_1_code": null,
"iso_3_code": "cma",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1235",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Koho",
"depth": 7,
"iso_1_code": null,
"iso_3_code": "kpm",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1236",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1234",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Khmr": {
"full_object": "KhmerTokenizer()",
"original_lang_name": "khmer",
"original_lang_code": "khm",
"script": "Khmr",
"class_name": "KhmerTokenizer"
},
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1226",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Stieng-Chrau",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Chrau",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "crw",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1238",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Mel-Khaonh",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "hkn",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1239",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Stieng, Bulo",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "sti",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1240",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1237",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Khmr": {
"full_object": "KhmerTokenizer()",
"original_lang_name": "khmer",
"original_lang_code": "khm",
"script": "Khmr",
"class_name": "KhmerTokenizer"
},
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1224",
"native_tokenizers": [],
"scripts": []
},
{
"name": "West Bahnaric",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Lavi",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "lvi",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1242",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Brao-Kravet",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Brao",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "brb",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1244",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Krung",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "krr",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1245",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kavet",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "krv",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1246",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Sou",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "sqq",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1247",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1243",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Laven",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Laven",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "lbo",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1249",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1248",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Nyaheun",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Nyaheun",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "nev",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1251",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1250",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Oi-The",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Oy",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "oyb",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1253",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Sapuan",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "spu",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1254",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1252",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1241",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Khmr": {
"full_object": "KhmerTokenizer()",
"original_lang_name": "khmer",
"original_lang_code": "khm",
"script": "Khmr",
"class_name": "KhmerTokenizer"
},
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1193",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Katuic",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Central Katuic",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Ta\u2019oih",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Ir",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "irr",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1258",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Ong",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "oog",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1259",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Ta\u2019oih, Upper",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "tth",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1260",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Ta\u2019oih, Lower",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "tto",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1261",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1257",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1256",
"native_tokenizers": [],
"scripts": []
},
{
"name": "East Katuic",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Katu-Pacoh",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Katu, Eastern",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "ktv",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1264",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Katu, Western",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "kuf",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1265",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Pacoh",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "pac",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1266",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Phuong",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "phg",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1267",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Tareng",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "tgr",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1268",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1263",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Ngeq-Nkriang",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kriang",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "ngt",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1270",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1269",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1262",
"native_tokenizers": [],
"scripts": []
},
{
"name": "West Katuic",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Bru",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Bru, Eastern",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "bru",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1273",
"native_tokenizers": [],
"scripts": [
"Latn"
]
},
{
"name": "Bru, Western",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "brv",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1274",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Katang, Northern",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "ncq",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1275",
"native_tokenizers": [],
"scripts": [
"Laoo"
]
},
{
"name": "Katang, Southern",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "sct",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1276",
"native_tokenizers": [],
"scripts": []
},
{
"name": "So",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "sss",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1277",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Khua",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "xhv",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1278",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1272",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kuay",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kuay",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "kdt",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1280",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Nyeu",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "nyl",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1281",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1279",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1271",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1255",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Khmer",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Khmer",
"depth": 4,
"iso_1_code": "km",
"iso_3_code": "khm",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {
"Khmr": {
"full_object": "KhmerTokenizer()",
"original_lang_name": "khmer",
"original_lang_code": "khm",
"script": "Khmr",
"class_name": "KhmerTokenizer"
}
},
"node_i": "1283",
"native_tokenizers": [
"Khmr"
],
"scripts": [
"Khmr"
]
},
{
"name": "Khmer, Northern",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "kxm",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1284",
"native_tokenizers": [],
"scripts": [
"Thai"
]
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Khmr": {
"full_object": "KhmerTokenizer()",
"original_lang_name": "khmer",
"original_lang_code": "khm",
"script": "Khmr",
"class_name": "KhmerTokenizer"
}
},
"node_i": "1282",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Pearic",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Eastern",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Pear",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "pcb",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1287",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1286",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Western",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Chong",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Chong",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "cog",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1290",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Chung",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "scq",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1291",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1289",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Samre",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Somray",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "smu",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1293",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Samre",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "sxm",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1294",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1292",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Suoy",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Su\u2019ung",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "syo",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1296",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1295",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1288",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1285",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Khmr": {
"full_object": "KhmerTokenizer()",
"original_lang_name": "khmer",
"original_lang_code": "khm",
"script": "Khmr",
"class_name": "KhmerTokenizer"
},
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1192",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Monic",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Mon",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "mnw",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1298",
"native_tokenizers": [],
"scripts": [
"Mymr"
]
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1297",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Nicobar",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Car",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Nicobarese, Car",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "caq",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1301",
"native_tokenizers": [],
"scripts": [
"Latn"
]
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1300",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Chowra-Teressa",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Chaura",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "crv",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1303",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Teressa",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "tef",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1304",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1302",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Great Nicobar",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Nicobarese, Southern",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "nik",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1306",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1305",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Nancowry",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Nicobarese, Central",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "ncb",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1308",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1307",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Shom Peng",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Shom Peng",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "sii",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1310",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1309",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1299",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Northern Mon-Khmer",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Khasian",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "War-Jaintia",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "aml",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1313",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Khasi",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "kha",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1314",
"native_tokenizers": [],
"scripts": [
"Latn"
]
},
{
"name": "Lyngngam",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "lyg",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1315",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Pnar",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "pbv",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1316",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1312",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Khmuic",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Khao",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Khao",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "xao",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1319",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1318",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Mal-Khmu\u2019",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Khmu\u2019",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Khuen",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "khf",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1322",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Khmu",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "kjg",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1323",
"native_tokenizers": [],
"scripts": []
},
{
"name": "O\u2019du",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "tyh",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1324",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1321",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Mal-Prai",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Mal",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "mlf",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1326",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Prai",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "prt",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1327",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1325",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1320",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Mlabri",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Mlabri",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "mra",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1329",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1328",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Xinh Mul",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Phong-Kniang",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "pnx",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1331",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Puoc",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "puo",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1332",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1330",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1317",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Mang",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Mang",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "zng",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1334",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1333",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Palaungic",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Eastern Palaungic",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Angkuic",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Hu",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "huo",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1338",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kon Keu",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "kkn",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1339",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Man Met",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "mml",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1340",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Mok",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "mqt",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1341",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Samtao",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "stu",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1342",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Tai Loi",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "tlq",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1343",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Muak Sa-aak",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "ukk",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1344",
"native_tokenizers": [],
"scripts": []
},
{
"name": "U",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "uuu",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1345",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kiorr",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "xko",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1346",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1337",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Bit-Khang",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Bit",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "bgk",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1348",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Bumang",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "bvp",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1349",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kh\u00e1ng",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "kjm",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1350",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1347",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Lametic",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Con",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "cno",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1352",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Rmeet",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "lbn",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1353",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1351",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Waic",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Bulang",
"depth": 6,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Blang",
"depth": 7,
"iso_1_code": null,
"iso_3_code": "blr",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1356",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1355",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Lawa",
"depth": 6,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Lawa, Western",
"depth": 7,
"iso_1_code": null,
"iso_3_code": "lcp",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1358",
"native_tokenizers": [],
"scripts": [
"Thai"
]
},
{
"name": "Lawa, Eastern",
"depth": 7,
"iso_1_code": null,
"iso_3_code": "lwl",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1359",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1357",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Wa",
"depth": 6,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Wa, Parauk",
"depth": 7,
"iso_1_code": null,
"iso_3_code": "prk",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1361",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Awa",
"depth": 7,
"iso_1_code": null,
"iso_3_code": "vwa",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1362",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Wa, Vo",
"depth": 7,
"iso_1_code": null,
"iso_3_code": "wbm",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1363",
"native_tokenizers": [],
"scripts": [
"Latn"
]
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1360",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1354",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1336",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Western Palaungic",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Danau",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Danau",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "dnu",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1366",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1365",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Palaung",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Palaung, Ruching",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "pce",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1368",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Palaung, Shwe",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "pll",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1369",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Palaung, Rumai",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "rbb",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1370",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1367",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Riang",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Riang Lang",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "ril",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1372",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Riang Lai",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "yin",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1373",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1371",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1364",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1335",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1311",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Palyu",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Bugan",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "bbh",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1375",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Bolyu",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "ply",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1376",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1374",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Southern Monic",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Nyahkur",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "cbn",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1378",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1377",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Unclassified",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kemiehua",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "kfj",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1380",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kuanhua",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "xnh",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1381",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1379",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Viet-Muong",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Chut",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Arem",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "aem",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1384",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Maleng",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "pkt",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1385",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Chut",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "scb",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1386",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1383",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Cuoi",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Hung",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "hnu",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1388",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Tho",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "tou",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1389",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1387",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Muong",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Bo",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "bgl",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1391",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Muong",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "mtq",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1392",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Ngu\u00f4n",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "nuo",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1393",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1390",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Thavung",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Aheu",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "thm",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1395",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1394",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Vietnamese",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Vietnamese",
"depth": 4,
"iso_1_code": "vi",
"iso_3_code": "vie",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1397",
"native_tokenizers": [
"Latn"
],
"scripts": [
"Latn"
]
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1396",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1382",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {
"Khmr": {
"full_object": "KhmerTokenizer()",
"original_lang_name": "khmer",
"original_lang_code": "khm",
"script": "Khmr",
"class_name": "KhmerTokenizer"
},
"Latn": {
"full_object": "SpaCyTokenizer(\"vi\")",
"original_lang_name": "vietnamese",
"original_lang_code": "vie",
"script": "Latn",
"class_name": "SpaCyTokenizer"
}
},
"node_i": "1164",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Munda",
"depth": 1,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "North Munda",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kherwari",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Agariya",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "agi",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1401",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Bijori",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "bix",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1402",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kodaku",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "ksz",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1403",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Mundari",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Asuri",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "asr",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1405",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Birhor",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "biy",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1406",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Koda",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "cdz",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1407",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kol",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "ekl",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1408",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Ho",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "hoc",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1409",
"native_tokenizers": [],
"scripts": [
"Latn",
"Wara"
]
},
{
"name": "Korwa",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "kfp",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1410",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Mundari",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "unr",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1411",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Munda",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "unx",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1412",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1404",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Santali",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Mahali",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "mjx",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1414",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Santhali",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "sat",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1415",
"native_tokenizers": [],
"scripts": [
"Latn",
"Olck"
]
},
{
"name": "Turi",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "trd",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1416",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1413",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1400",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Korku",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Korku",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "kfq",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1418",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1417",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1399",
"native_tokenizers": [],
"scripts": []
},
{
"name": "South Munda",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kharia-Juang",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Juang",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "jun",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1421",
"native_tokenizers": [],
"scripts": [
"Orya"
]
},
{
"name": "Kharia",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "khr",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1422",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1420",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Koraput Munda",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Gutob-Remo-Geta\u2019",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Geta\u2019",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Gata\u2019",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "gaq",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1426",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1425",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Gutob-Remo",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Bondo",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "bfw",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1428",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Gadaba, Bodo",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "gbj",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1429",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1427",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1424",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Sora-Juray-Gorum",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Gorum",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Parenga",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "pcj",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1432",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1431",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Sora-Juray",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Juray",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "juy",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1434",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Sora",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "srb",
"children": [],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1435",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1433",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1430",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1423",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1419",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1398",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Austro-Asiatic",
"tokenizers": {},
"node_i": "1163",
"native_tokenizers": [],
"scripts": []
}