lang-word-tokenizers / data /Dravidian.json
guipenedo's picture
guipenedo HF staff
do not propagate to the root
49dc1e7 unverified
{
"name": "Dravidian",
"depth": 0,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Central",
"depth": 1,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kolami-Naiki",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kolami, Northwestern",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "kfb",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3604",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kolami, Southeastern",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "nit",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3605",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3603",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Parji-Gadaba",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Gadaba, Mudhili",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "gau",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3607",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Gadaba, Pottangi Ollar",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "gdb",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3608",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Duruwa",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "pci",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3609",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3606",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3602",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Northern",
"depth": 1,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Brahui",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "brh",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3611",
"native_tokenizers": [],
"scripts": [
"Arab"
]
},
{
"name": "Kumarbhag Paharia",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "kmj",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3612",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kurux",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "kru",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3613",
"native_tokenizers": [],
"scripts": [
"Deva"
]
},
{
"name": "Sauria Paharia",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "mjt",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3614",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kisan",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "xis",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3615",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3610",
"native_tokenizers": [],
"scripts": []
},
{
"name": "South-Central",
"depth": 1,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Gondi-Kui",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Gondi",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Maria, Dandami",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "daq",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3619",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Muria, Eastern",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "emu",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3620",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Gondi, Aheri",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "esg",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3621",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Muria, Far Western",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "fmu",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3622",
"native_tokenizers": [],
"scripts": [
"Deva"
]
},
{
"name": "Gondi, Northern",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "gno",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3623",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Khirwar",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "kwx",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3624",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Maria",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "mrr",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3625",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Muria, Western",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "mut",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3626",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Nagarchal",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "nbg",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3627",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Pardhan",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "pch",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3628",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Gondi, Adilabad",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "wsg",
"children": [],
"family": "Dravidian",
"tokenizers": {
"Telu": {
"full_object": "IndicNLPTokenizer(\"te\")",
"original_lang_name": "telugu",
"original_lang_code": "tel",
"script": "Telu",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3629",
"native_tokenizers": [],
"scripts": [
"Telu"
]
}
],
"family": "Dravidian",
"tokenizers": {
"Telu": {
"full_object": "IndicNLPTokenizer(\"te\")",
"original_lang_name": "telugu",
"original_lang_code": "tel",
"script": "Telu",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3618",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Konda-Kui",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Konda",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Konda-Dora",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "kfc",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3632",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Mukha-Dora",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "mmk",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3633",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3631",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Manda-Kui",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kui-Kuvi",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kui, Dawik",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "dwk",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3636",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Koya",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "kff",
"children": [],
"family": "Dravidian",
"tokenizers": {
"Telu": {
"full_object": "IndicNLPTokenizer(\"te\")",
"original_lang_name": "telugu",
"original_lang_code": "tel",
"script": "Telu",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3637",
"native_tokenizers": [],
"scripts": [
"Telu"
]
},
{
"name": "Kuvi",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "kxv",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3638",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kui",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "uki",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3639",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Telu": {
"full_object": "IndicNLPTokenizer(\"te\")",
"original_lang_name": "telugu",
"original_lang_code": "tel",
"script": "Telu",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3635",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Manda-Pengo",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Manda",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "mha",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3641",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Pengo",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "peg",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3642",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3640",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Telu": {
"full_object": "IndicNLPTokenizer(\"te\")",
"original_lang_name": "telugu",
"original_lang_code": "tel",
"script": "Telu",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3634",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Telu": {
"full_object": "IndicNLPTokenizer(\"te\")",
"original_lang_name": "telugu",
"original_lang_code": "tel",
"script": "Telu",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3630",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Telu": {
"full_object": "IndicNLPTokenizer(\"te\")",
"original_lang_name": "telugu",
"original_lang_code": "tel",
"script": "Telu",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3617",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Telugu",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Chenchu",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "cde",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3644",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Manna-Dora",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "mju",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3645",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Telugu",
"depth": 3,
"iso_1_code": "te",
"iso_3_code": "tel",
"children": [],
"family": "Dravidian",
"tokenizers": {
"Telu": {
"full_object": "IndicNLPTokenizer(\"te\")",
"original_lang_name": "telugu",
"original_lang_code": "tel",
"script": "Telu",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3646",
"native_tokenizers": [
"Telu"
],
"scripts": [
"Telu",
"Latn"
]
},
{
"name": "Waddar",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "wbq",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3647",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Telu": {
"full_object": "IndicNLPTokenizer(\"te\")",
"original_lang_name": "telugu",
"original_lang_code": "tel",
"script": "Telu",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3643",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Telu": {
"full_object": "IndicNLPTokenizer(\"te\")",
"original_lang_name": "telugu",
"original_lang_code": "tel",
"script": "Telu",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3616",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Southern",
"depth": 1,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kurichiya",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "kfh",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3649",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kurumba, Attapady",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "pkr",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3650",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Pathiya",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "pty",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3651",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Muduga",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "udg",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3652",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kumbaran",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "wkb",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3653",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kalanadi",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "wkl",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3654",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kunduvadi",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "wku",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3655",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Tamil-Kannada",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kannada",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Badaga",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "bfq",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3658",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Holiya",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "hoy",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3659",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kannada",
"depth": 4,
"iso_1_code": "kn",
"iso_3_code": "kan",
"children": [],
"family": "Dravidian",
"tokenizers": {
"Knda": {
"full_object": "IndicNLPTokenizer(\"kn\")",
"original_lang_name": "kannada",
"original_lang_code": "kan",
"script": "Knda",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3660",
"native_tokenizers": [
"Knda"
],
"scripts": [
"Latn",
"Knda"
]
},
{
"name": "Urali",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "url",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3661",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Knda": {
"full_object": "IndicNLPTokenizer(\"kn\")",
"original_lang_name": "kannada",
"original_lang_code": "kan",
"script": "Knda",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3657",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Tamil-Kodagu",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kodagu",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kodava",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "kfa",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3664",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kurumba, Kannada",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "kfi",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3665",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kurumba, Mullu",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "kpb",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3666",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kurumba, Alu",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "xua",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3667",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kurumba, Jennu",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "xuj",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3668",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3663",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Tamil-Malayalam",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Mannan",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "mjv",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3670",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Malayalam",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Aranadan",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "aaf",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3672",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kadar",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "kej",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3673",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Malayalam",
"depth": 6,
"iso_1_code": "ml",
"iso_3_code": "mal",
"children": [],
"family": "Dravidian",
"tokenizers": {
"Mlym": {
"full_object": "IndicNLPTokenizer(\"ml\")",
"original_lang_name": "malayalam",
"original_lang_code": "mal",
"script": "Mlym",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3674",
"native_tokenizers": [
"Mlym"
],
"scripts": [
"Latn",
"Mlym"
]
},
{
"name": "Malapandaram",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "mjp",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3675",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Malaryan",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "mjq",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3676",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Malavedan",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "mjr",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3677",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Paliyan",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "pcf",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3678",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Paniya",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "pcg",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3679",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Ravula",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "yea",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3680",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Mlym": {
"full_object": "IndicNLPTokenizer(\"ml\")",
"original_lang_name": "malayalam",
"original_lang_code": "mal",
"script": "Mlym",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3671",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Tamil",
"depth": 5,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Eravallan",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "era",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3682",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Irula",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "iru",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3683",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kaikadi",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "kep",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3684",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kanikkaran",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "kev",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3685",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Muthuvan",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "muv",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3686",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Sholaga",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "sle",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3687",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Tamil",
"depth": 6,
"iso_1_code": "ta",
"iso_3_code": "tam",
"children": [],
"family": "Dravidian",
"tokenizers": {
"Taml": {
"full_object": "IndicNLPTokenizer(\"ta\")",
"original_lang_name": "tamil",
"original_lang_code": "tam",
"script": "Taml",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3688",
"native_tokenizers": [
"Taml"
],
"scripts": [
"Taml",
"Latn"
]
},
{
"name": "Kurumba, Betta",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "xub",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3689",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Yerukula",
"depth": 6,
"iso_1_code": null,
"iso_3_code": "yeu",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3690",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Taml": {
"full_object": "IndicNLPTokenizer(\"ta\")",
"original_lang_name": "tamil",
"original_lang_code": "tam",
"script": "Taml",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3681",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Mlym": {
"full_object": "IndicNLPTokenizer(\"ml\")",
"original_lang_name": "malayalam",
"original_lang_code": "mal",
"script": "Mlym",
"class_name": "IndicNLPTokenizer"
},
"Taml": {
"full_object": "IndicNLPTokenizer(\"ta\")",
"original_lang_name": "tamil",
"original_lang_code": "tam",
"script": "Taml",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3669",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Toda-Kota",
"depth": 4,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Kota",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "kfe",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3692",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Toda",
"depth": 5,
"iso_1_code": null,
"iso_3_code": "tcx",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3693",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3691",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Mlym": {
"full_object": "IndicNLPTokenizer(\"ml\")",
"original_lang_name": "malayalam",
"original_lang_code": "mal",
"script": "Mlym",
"class_name": "IndicNLPTokenizer"
},
"Taml": {
"full_object": "IndicNLPTokenizer(\"ta\")",
"original_lang_name": "tamil",
"original_lang_code": "tam",
"script": "Taml",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3662",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Unclassified",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Chetti, Wayanad",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "ctt",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3695",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3694",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Knda": {
"full_object": "IndicNLPTokenizer(\"kn\")",
"original_lang_name": "kannada",
"original_lang_code": "kan",
"script": "Knda",
"class_name": "IndicNLPTokenizer"
},
"Mlym": {
"full_object": "IndicNLPTokenizer(\"ml\")",
"original_lang_name": "malayalam",
"original_lang_code": "mal",
"script": "Mlym",
"class_name": "IndicNLPTokenizer"
},
"Taml": {
"full_object": "IndicNLPTokenizer(\"ta\")",
"original_lang_name": "tamil",
"original_lang_code": "tam",
"script": "Taml",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3656",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Tulu",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Bellari",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "brw",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3697",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Kudiya",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "kfg",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3698",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Tulu",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "tcy",
"children": [],
"family": "Dravidian",
"tokenizers": {
"Knda": {
"full_object": "IndicNLPTokenizer(\"kn\")",
"original_lang_name": "kannada",
"original_lang_code": "kan",
"script": "Knda",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3699",
"native_tokenizers": [],
"scripts": [
"Knda"
]
},
{
"name": "Koraga",
"depth": 3,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Koraga, Korra",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "kfd",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3701",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Koraga, Mudu",
"depth": 4,
"iso_1_code": null,
"iso_3_code": "vmd",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3702",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3700",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Knda": {
"full_object": "IndicNLPTokenizer(\"kn\")",
"original_lang_name": "kannada",
"original_lang_code": "kan",
"script": "Knda",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3696",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Unclassified",
"depth": 2,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Mala Malasar",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "ima",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3704",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Thachanadan",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "thn",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3705",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Ullatan",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "ull",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3706",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Malasar",
"depth": 3,
"iso_1_code": null,
"iso_3_code": "ymr",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3707",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3703",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {
"Knda": {
"full_object": "IndicNLPTokenizer(\"kn\")",
"original_lang_name": "kannada",
"original_lang_code": "kan",
"script": "Knda",
"class_name": "IndicNLPTokenizer"
},
"Mlym": {
"full_object": "IndicNLPTokenizer(\"ml\")",
"original_lang_name": "malayalam",
"original_lang_code": "mal",
"script": "Mlym",
"class_name": "IndicNLPTokenizer"
},
"Taml": {
"full_object": "IndicNLPTokenizer(\"ta\")",
"original_lang_name": "tamil",
"original_lang_code": "tam",
"script": "Taml",
"class_name": "IndicNLPTokenizer"
}
},
"node_i": "3648",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Unclassified",
"depth": 1,
"iso_1_code": null,
"iso_3_code": null,
"children": [
{
"name": "Allar",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "all",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3709",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Bharia",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "bha",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3710",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Malankuravan",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "mjo",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3711",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Pattapu",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "ptq",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3712",
"native_tokenizers": [],
"scripts": []
},
{
"name": "Vishavan",
"depth": 2,
"iso_1_code": null,
"iso_3_code": "vis",
"children": [],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3713",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3708",
"native_tokenizers": [],
"scripts": []
}
],
"family": "Dravidian",
"tokenizers": {},
"node_i": "3601",
"native_tokenizers": [],
"scripts": []
}