{ "name": "Creole", "depth": 0, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Afrikaans based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Flaaitaal", "depth": 2, "iso_1_code": null, "iso_3_code": "fly", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3483", "native_tokenizers": [], "scripts": [] }, { "name": "Oorlams", "depth": 2, "iso_1_code": null, "iso_3_code": "oor", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3484", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3482", "native_tokenizers": [], "scripts": [] }, { "name": "Arabic based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Nubi", "depth": 2, "iso_1_code": null, "iso_3_code": "kcn", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3486", "native_tokenizers": [], "scripts": [] }, { "name": "Arabic, Juba", "depth": 2, "iso_1_code": "ar", "iso_3_code": "pga", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3487", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3485", "native_tokenizers": [], "scripts": [] }, { "name": "Assamese based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Nagamese", "depth": 2, "iso_1_code": null, "iso_3_code": "nag", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3489", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3488", "native_tokenizers": [], "scripts": [] }, { "name": "Dutch based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Berbice Dutch Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "brc", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3491", "native_tokenizers": [], "scripts": [] }, { "name": "Negerhollands", "depth": 2, "iso_1_code": null, "iso_3_code": "dcr", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3492", "native_tokenizers": [], "scripts": [] }, { "name": "Javindo", "depth": 2, "iso_1_code": null, "iso_3_code": "jvd", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3493", "native_tokenizers": [], "scripts": [] }, { "name": "Petjo", "depth": 2, "iso_1_code": null, "iso_3_code": "pey", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3494", "native_tokenizers": [], "scripts": [] }, { "name": "Skepi Dutch Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "skw", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3495", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3490", "native_tokenizers": [], "scripts": [] }, { "name": "English based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Saramaccan", "depth": 2, "iso_1_code": null, "iso_3_code": "srm", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3497", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Atlantic", "depth": 2, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Eastern", "depth": 3, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Turks and Caicos English Creole", "depth": 4, "iso_1_code": null, "iso_3_code": "tch", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3500", "native_tokenizers": [], "scripts": [] }, { "name": "Northern", "depth": 4, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Afro-Seminole Creole", "depth": 5, "iso_1_code": null, "iso_3_code": "afs", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3502", "native_tokenizers": [], "scripts": [] }, { "name": "Bahamas English Creole", "depth": 5, "iso_1_code": null, "iso_3_code": "bah", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3503", "native_tokenizers": [], "scripts": [] }, { "name": "Sea Island English Creole", "depth": 5, "iso_1_code": null, "iso_3_code": "gul", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3504", "native_tokenizers": [], "scripts": [ "Latn" ] } ], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3501", "native_tokenizers": [], "scripts": [] }, { "name": "Southern", "depth": 4, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Leeward Caribbean English Creole", "depth": 5, "iso_1_code": null, "iso_3_code": "aig", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3506", "native_tokenizers": [], "scripts": [] }, { "name": "Bajan", "depth": 5, "iso_1_code": null, "iso_3_code": "bjs", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3507", "native_tokenizers": [], "scripts": [] }, { "name": "Grenadian English Creole", "depth": 5, "iso_1_code": null, "iso_3_code": "gcl", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3508", "native_tokenizers": [], "scripts": [] }, { "name": "Guyanese English Creole", "depth": 5, "iso_1_code": null, "iso_3_code": "gyn", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3509", "native_tokenizers": [], "scripts": [] }, { "name": "Vincentian English Creole", "depth": 5, "iso_1_code": null, "iso_3_code": "svc", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3510", "native_tokenizers": [], "scripts": [] }, { "name": "Tobagonian English Creole", "depth": 5, "iso_1_code": null, "iso_3_code": "tgh", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3511", "native_tokenizers": [], "scripts": [] }, { "name": "Trinidadian English Creole", "depth": 5, "iso_1_code": null, "iso_3_code": "trf", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3512", "native_tokenizers": [], "scripts": [] }, { "name": "Virgin Islands English Creole", "depth": 5, "iso_1_code": null, "iso_3_code": "vic", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3513", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3505", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3499", "native_tokenizers": [], "scripts": [] }, { "name": "Krio", "depth": 3, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Equatorial Guinean Pidgin", "depth": 4, "iso_1_code": null, "iso_3_code": "fpe", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3515", "native_tokenizers": [], "scripts": [] }, { "name": "Ghanaian Pidgin English", "depth": 4, "iso_1_code": null, "iso_3_code": "gpe", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3516", "native_tokenizers": [], "scripts": [] }, { "name": "Krio", "depth": 4, "iso_1_code": null, "iso_3_code": "kri", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3517", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Pidgin, Nigerian", "depth": 4, "iso_1_code": null, "iso_3_code": "pcm", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3518", "native_tokenizers": [ "Latn" ], "scripts": [ "Latn" ] }, { "name": "Pidgin, Cameroon", "depth": 4, "iso_1_code": null, "iso_3_code": "wes", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3519", "native_tokenizers": [], "scripts": [ "Latn" ] } ], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3514", "native_tokenizers": [], "scripts": [] }, { "name": "Suriname", "depth": 3, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Sranan Tongo", "depth": 4, "iso_1_code": null, "iso_3_code": "srn", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3521", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Ndyuka", "depth": 4, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Aukan", "depth": 5, "iso_1_code": null, "iso_3_code": "djk", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3523", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Kwinti", "depth": 5, "iso_1_code": null, "iso_3_code": "kww", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3524", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3522", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3520", "native_tokenizers": [], "scripts": [] }, { "name": "Western", "depth": 3, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Belize English Creole", "depth": 4, "iso_1_code": null, "iso_3_code": "bzj", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3526", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Nicaragua English Creole", "depth": 4, "iso_1_code": null, "iso_3_code": "bzk", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3527", "native_tokenizers": [], "scripts": [] }, { "name": "Islander English Creole", "depth": 4, "iso_1_code": null, "iso_3_code": "icr", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3528", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Jamaican English Creole", "depth": 4, "iso_1_code": null, "iso_3_code": "jam", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3529", "native_tokenizers": [], "scripts": [ "Latn" ] } ], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3525", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3498", "native_tokenizers": [], "scripts": [] }, { "name": "Pacific", "depth": 2, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Bislama", "depth": 3, "iso_1_code": "bi", "iso_3_code": "bis", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3531", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Hawaii Pidgin", "depth": 3, "iso_1_code": null, "iso_3_code": "hwc", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3532", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Ngatik Men\u2019s Creole", "depth": 3, "iso_1_code": null, "iso_3_code": "ngm", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3533", "native_tokenizers": [], "scripts": [] }, { "name": "Pitcairn-Norfolk", "depth": 3, "iso_1_code": null, "iso_3_code": "pih", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3534", "native_tokenizers": [], "scripts": [] }, { "name": "Pijin", "depth": 3, "iso_1_code": null, "iso_3_code": "pis", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3535", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Kriol", "depth": 3, "iso_1_code": null, "iso_3_code": "rop", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3536", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Torres Strait Creole", "depth": 3, "iso_1_code": null, "iso_3_code": "tcs", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3537", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Tok Pisin", "depth": 3, "iso_1_code": null, "iso_3_code": "tpi", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3538", "native_tokenizers": [], "scripts": [ "Latn" ] } ], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3530", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": { "Latn": { "full_object": "StanzaTokenizer(\"pcm\")", "original_lang_name": "nigerian_pidgin", "original_lang_code": "pcm", "script": "Latn", "class_name": "StanzaTokenizer" } }, "node_i": "3496", "native_tokenizers": [], "scripts": [] }, { "name": "French based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Lesser Antillean French Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "acf", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3540", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Tayo", "depth": 2, "iso_1_code": null, "iso_3_code": "cks", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3541", "native_tokenizers": [], "scripts": [] }, { "name": "Seychelles French Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "crs", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3542", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Guadeloupean French Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "gcf", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3543", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Guianese French Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "gcr", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3544", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Haitian Creole", "depth": 2, "iso_1_code": "ht", "iso_3_code": "hat", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3545", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Karipuna French Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "kmv", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3546", "native_tokenizers": [], "scripts": [] }, { "name": "Louisiana Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "lou", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3547", "native_tokenizers": [], "scripts": [] }, { "name": "Morisyen", "depth": 2, "iso_1_code": null, "iso_3_code": "mfe", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3548", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "R\u00e9union French Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "rcf", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3549", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "San Miguel French Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "scf", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3550", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3539", "native_tokenizers": [], "scripts": [] }, { "name": "German based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Unserdeutsch", "depth": 2, "iso_1_code": null, "iso_3_code": "uln", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3552", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3551", "native_tokenizers": [], "scripts": [] }, { "name": "Hindi based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Andaman Hindi Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "hca", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3554", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3553", "native_tokenizers": [], "scripts": [] }, { "name": "Iberian based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Papiamentu", "depth": 2, "iso_1_code": null, "iso_3_code": "pap", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3556", "native_tokenizers": [], "scripts": [ "Latn" ] } ], "family": "Creole", "tokenizers": {}, "node_i": "3555", "native_tokenizers": [], "scripts": [] }, { "name": "Japanese-based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Yilan Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "ycr", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3558", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3557", "native_tokenizers": [], "scripts": [] }, { "name": "Kongo based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Kituba", "depth": 2, "iso_1_code": null, "iso_3_code": "ktu", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3560", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Kituba", "depth": 2, "iso_1_code": null, "iso_3_code": "mkw", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3561", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3559", "native_tokenizers": [], "scripts": [] }, { "name": "Malay based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Malay, Ambonese", "depth": 2, "iso_1_code": null, "iso_3_code": "abs", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"ms\")", "original_lang_name": "malay", "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } }, "node_i": "3563", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Betawi", "depth": 2, "iso_1_code": null, "iso_3_code": "bew", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"ms\")", "original_lang_name": "malay", "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } }, "node_i": "3564", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Malay, Banda", "depth": 2, "iso_1_code": null, "iso_3_code": "bpq", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3565", "native_tokenizers": [], "scripts": [] }, { "name": "Malaccan Malay Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "ccm", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3566", "native_tokenizers": [], "scripts": [] }, { "name": "Malay, Cocos Islands", "depth": 2, "iso_1_code": "ms", "iso_3_code": "coa", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3567", "native_tokenizers": [], "scripts": [] }, { "name": "Malay, Larantuka", "depth": 2, "iso_1_code": null, "iso_3_code": "lrt", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3568", "native_tokenizers": [], "scripts": [] }, { "name": "Malay, North Moluccan", "depth": 2, "iso_1_code": "ms", "iso_3_code": "max", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"ms\")", "original_lang_name": "malay", "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } }, "node_i": "3569", "native_tokenizers": [ "Latn" ], "scripts": [ "Latn" ] }, { "name": "Malay, Baba", "depth": 2, "iso_1_code": null, "iso_3_code": "mbf", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"ms\")", "original_lang_name": "malay", "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } }, "node_i": "3570", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Malay, Balinese", "depth": 2, "iso_1_code": null, "iso_3_code": "mhp", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3571", "native_tokenizers": [], "scripts": [] }, { "name": "Malay, Kupang", "depth": 2, "iso_1_code": null, "iso_3_code": "mkn", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"ms\")", "original_lang_name": "malay", "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } }, "node_i": "3572", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Indonesian, Peranakan", "depth": 2, "iso_1_code": null, "iso_3_code": "pea", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3573", "native_tokenizers": [], "scripts": [] }, { "name": "Malay, Papuan", "depth": 2, "iso_1_code": null, "iso_3_code": "pmy", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3574", "native_tokenizers": [], "scripts": [] }, { "name": "Sri Lankan Malay Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "sci", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3575", "native_tokenizers": [], "scripts": [] }, { "name": "Malay, Manado", "depth": 2, "iso_1_code": "ms", "iso_3_code": "xmm", "children": [], "family": "Creole", "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"ms\")", "original_lang_name": "malay", "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } }, "node_i": "3576", "native_tokenizers": [ "Latn" ], "scripts": [ "Latn" ] } ], "family": "Creole", "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"ms\")", "original_lang_name": "malay", "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } }, "node_i": "3562", "native_tokenizers": [], "scripts": [] }, { "name": "Ngbandi based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Sango", "depth": 2, "iso_1_code": "sg", "iso_3_code": "sag", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3578", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Sango, Riverain", "depth": 2, "iso_1_code": null, "iso_3_code": "snj", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3579", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3577", "native_tokenizers": [], "scripts": [] }, { "name": "Portuguese based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Angolar", "depth": 2, "iso_1_code": null, "iso_3_code": "aoa", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3581", "native_tokenizers": [], "scripts": [] }, { "name": "Cafundo Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "ccd", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3582", "native_tokenizers": [], "scripts": [] }, { "name": "S\u00e3otomense", "depth": 2, "iso_1_code": null, "iso_3_code": "cri", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3583", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Fa d\u2019Ambu", "depth": 2, "iso_1_code": null, "iso_3_code": "fab", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3584", "native_tokenizers": [], "scripts": [] }, { "name": "Indo-Portuguese", "depth": 2, "iso_1_code": null, "iso_3_code": "idb", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3585", "native_tokenizers": [], "scripts": [] }, { "name": "Kabuverdianu", "depth": 2, "iso_1_code": null, "iso_3_code": "kea", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3586", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Malaccan Portuguese Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "mcm", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3587", "native_tokenizers": [], "scripts": [] }, { "name": "Macanese", "depth": 2, "iso_1_code": null, "iso_3_code": "mzs", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3588", "native_tokenizers": [], "scripts": [] }, { "name": "Guinea-Bissau Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "pov", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3589", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Principense", "depth": 2, "iso_1_code": null, "iso_3_code": "pre", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3590", "native_tokenizers": [], "scripts": [] }, { "name": "Ternate\u00f1o", "depth": 2, "iso_1_code": null, "iso_3_code": "tmg", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3591", "native_tokenizers": [], "scripts": [] }, { "name": "Pidgin, Timor", "depth": 2, "iso_1_code": null, "iso_3_code": "tvy", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3592", "native_tokenizers": [], "scripts": [] }, { "name": "Korlai Portuguese Creole", "depth": 2, "iso_1_code": null, "iso_3_code": "vkp", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3593", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3580", "native_tokenizers": [], "scripts": [] }, { "name": "Spanish based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Chavacano", "depth": 2, "iso_1_code": null, "iso_3_code": "cbk", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3595", "native_tokenizers": [], "scripts": [ "Latn" ] }, { "name": "Palenquero", "depth": 2, "iso_1_code": null, "iso_3_code": "pln", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3596", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3594", "native_tokenizers": [], "scripts": [] }, { "name": "Swahili based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Cutchi-Swahili", "depth": 2, "iso_1_code": null, "iso_3_code": "ccl", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3598", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3597", "native_tokenizers": [], "scripts": [] }, { "name": "Tetun based", "depth": 1, "iso_1_code": null, "iso_3_code": null, "children": [ { "name": "Tetun Dili", "depth": 2, "iso_1_code": null, "iso_3_code": "tdt", "children": [], "family": "Creole", "tokenizers": {}, "node_i": "3600", "native_tokenizers": [], "scripts": [ "Latn" ] } ], "family": "Creole", "tokenizers": {}, "node_i": "3599", "native_tokenizers": [], "scripts": [] } ], "family": "Creole", "tokenizers": {}, "node_i": "3481", "native_tokenizers": [], "scripts": [] }