diff --git "a/data/Austronesian.json" "b/data/Austronesian.json" --- "a/data/Austronesian.json" +++ "b/data/Austronesian.json" @@ -15,9 +15,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -35,9 +35,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -51,9 +51,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -100,9 +100,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -120,9 +120,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -136,9 +136,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -212,9 +212,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -250,9 +250,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -275,9 +275,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -301,9 +301,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -315,9 +315,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -343,9 +343,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -363,9 +363,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -379,9 +379,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -414,9 +414,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -447,9 +447,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -472,9 +472,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -492,9 +492,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -508,9 +508,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -531,9 +531,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -547,9 +547,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -561,9 +561,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -599,9 +599,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -619,9 +619,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -635,9 +635,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -683,9 +683,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -709,9 +709,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -723,9 +723,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -737,9 +737,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1116,9 +1116,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1152,9 +1152,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1166,9 +1166,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1180,9 +1180,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1194,9 +1194,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1249,9 +1249,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1263,9 +1263,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1277,9 +1277,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1330,9 +1330,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1350,9 +1350,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1406,9 +1406,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1429,9 +1429,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1455,9 +1455,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1469,9 +1469,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1502,9 +1502,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1547,9 +1547,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1567,9 +1567,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1583,9 +1583,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1597,9 +1597,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1611,9 +1611,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1685,9 +1685,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1711,9 +1711,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1786,9 +1786,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1800,9 +1800,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1897,9 +1897,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1925,9 +1925,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -1941,9 +1941,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -2004,9 +2004,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -2024,9 +2024,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -2044,9 +2044,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -2064,9 +2064,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -2080,9 +2080,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -2115,9 +2115,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -2479,9 +2479,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -2629,9 +2629,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -2775,9 +2775,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3497,9 +3497,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3523,9 +3523,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3537,9 +3537,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3551,9 +3551,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3586,9 +3586,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3600,9 +3600,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3655,9 +3655,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3669,9 +3669,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3745,9 +3745,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3813,9 +3813,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3873,9 +3873,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3919,9 +3919,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -3993,9 +3993,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4059,9 +4059,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4073,9 +4073,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4169,9 +4169,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4202,9 +4202,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4222,9 +4222,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4238,9 +4238,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4252,9 +4252,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4295,9 +4295,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4341,9 +4341,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4394,9 +4394,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4424,9 +4424,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4444,9 +4444,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4460,9 +4460,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4493,9 +4493,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4533,9 +4533,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4573,9 +4573,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4589,9 +4589,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4603,9 +4603,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4626,9 +4626,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4642,9 +4642,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4656,9 +4656,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4704,9 +4704,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4720,9 +4720,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4743,9 +4743,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4763,9 +4763,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4833,9 +4833,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4849,9 +4849,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4882,9 +4882,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -4968,9 +4968,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5012,9 +5012,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5028,9 +5028,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5051,9 +5051,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5067,9 +5067,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5081,9 +5081,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5095,9 +5095,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5118,9 +5118,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5138,9 +5138,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5154,9 +5154,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5168,9 +5168,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5182,9 +5182,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5210,9 +5210,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5226,9 +5226,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5271,9 +5271,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5285,9 +5285,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5308,9 +5308,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5344,9 +5344,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5377,9 +5377,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5393,9 +5393,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5416,9 +5416,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5432,9 +5432,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5455,9 +5455,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5471,9 +5471,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5509,9 +5509,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5635,9 +5635,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5678,9 +5678,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5694,9 +5694,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5708,9 +5708,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5722,9 +5722,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5757,9 +5757,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5841,9 +5841,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -5857,9 +5857,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -6045,9 +6045,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -6109,9 +6109,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -6165,9 +6165,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -6241,9 +6241,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -6255,9 +6255,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -6269,9 +6269,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -6810,9 +6810,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -6830,9 +6830,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -6910,9 +6910,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -6956,9 +6956,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7061,9 +7061,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7077,9 +7077,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7091,9 +7091,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7105,9 +7105,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7148,9 +7148,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7248,9 +7248,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7284,9 +7284,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7447,9 +7447,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7553,9 +7553,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7567,9 +7567,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7581,9 +7581,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7595,9 +7595,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7685,9 +7685,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7705,9 +7705,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7735,9 +7735,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7755,9 +7755,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7771,9 +7771,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7785,9 +7785,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7818,9 +7818,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7834,9 +7834,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7867,9 +7867,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7883,9 +7883,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -7948,9 +7948,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8002,9 +8002,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8022,9 +8022,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8042,9 +8042,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8072,9 +8072,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8092,9 +8092,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8112,9 +8112,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8142,9 +8142,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8158,9 +8158,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8191,9 +8191,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8227,9 +8227,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8241,9 +8241,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8264,9 +8264,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8284,9 +8284,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8324,9 +8324,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8340,9 +8340,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8354,9 +8354,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8368,9 +8368,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8382,9 +8382,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8405,9 +8405,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8431,9 +8431,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8499,9 +8499,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8515,9 +8515,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8611,9 +8611,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8680,9 +8680,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8700,9 +8700,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8766,9 +8766,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8809,9 +8809,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8825,9 +8825,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -8995,9 +8995,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9011,9 +9011,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9025,9 +9025,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9048,9 +9048,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9064,9 +9064,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9118,9 +9118,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9148,9 +9148,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9164,9 +9164,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9199,9 +9199,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9318,9 +9318,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9354,9 +9354,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9368,9 +9368,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9391,9 +9391,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9411,9 +9411,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9471,9 +9471,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9491,9 +9491,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9511,9 +9511,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9531,9 +9531,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9551,9 +9551,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9577,9 +9577,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9736,9 +9736,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9802,9 +9802,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9835,9 +9835,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9865,9 +9865,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9881,9 +9881,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -9895,9 +9895,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10047,9 +10047,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10114,9 +10114,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10169,9 +10169,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10192,9 +10192,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10212,9 +10212,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10238,9 +10238,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10292,9 +10292,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10312,9 +10312,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10397,9 +10397,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10423,9 +10423,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10437,9 +10437,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10460,9 +10460,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10486,9 +10486,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10500,9 +10500,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10523,9 +10523,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10539,9 +10539,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10553,9 +10553,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10567,9 +10567,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10600,9 +10600,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10646,9 +10646,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10679,9 +10679,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10695,9 +10695,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10709,9 +10709,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10818,9 +10818,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10834,9 +10834,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10857,9 +10857,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10873,9 +10873,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10887,9 +10887,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10901,9 +10901,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10945,9 +10945,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -10991,9 +10991,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11014,9 +11014,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11030,9 +11030,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11063,9 +11063,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11083,9 +11083,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11099,9 +11099,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11122,9 +11122,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11148,9 +11148,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11171,9 +11171,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11187,9 +11187,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11241,9 +11241,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11359,9 +11359,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11402,9 +11402,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11438,9 +11438,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11452,9 +11452,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11497,9 +11497,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11532,9 +11532,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11546,9 +11546,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11823,9 +11823,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11859,9 +11859,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11873,9 +11873,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11968,9 +11968,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -11982,9 +11982,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12041,9 +12041,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12057,9 +12057,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12085,9 +12085,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12105,9 +12105,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12135,9 +12135,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12155,9 +12155,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12175,9 +12175,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12195,9 +12195,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12211,9 +12211,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12264,9 +12264,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12284,9 +12284,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12304,9 +12304,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12334,9 +12334,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12360,9 +12360,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12374,9 +12374,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12397,9 +12397,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12437,9 +12437,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12457,9 +12457,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12477,9 +12477,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12497,9 +12497,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12513,9 +12513,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12536,9 +12536,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12566,9 +12566,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12622,9 +12622,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12666,9 +12666,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12686,9 +12686,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12702,9 +12702,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12716,9 +12716,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12739,9 +12739,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12759,9 +12759,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12789,9 +12789,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12809,9 +12809,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12829,9 +12829,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12875,9 +12875,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12889,9 +12889,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -12994,9 +12994,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13014,9 +13014,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13034,9 +13034,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13050,9 +13050,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13119,9 +13119,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13149,9 +13149,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13165,9 +13165,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13179,9 +13179,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13193,9 +13193,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13231,9 +13231,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13251,9 +13251,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13267,9 +13267,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13290,9 +13290,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13306,9 +13306,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13329,9 +13329,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13349,9 +13349,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13365,9 +13365,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13379,9 +13379,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13393,9 +13393,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13407,9 +13407,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13421,9 +13421,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13444,9 +13444,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13460,9 +13460,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13474,9 +13474,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13646,9 +13646,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13682,9 +13682,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -13905,9 +13905,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14021,9 +14021,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14066,9 +14066,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14122,9 +14122,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14136,9 +14136,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14150,9 +14150,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14164,9 +14164,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14290,9 +14290,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14310,9 +14310,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14326,9 +14326,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14361,9 +14361,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14406,9 +14406,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14434,9 +14434,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14454,9 +14454,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14470,9 +14470,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14543,9 +14543,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14573,9 +14573,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14589,9 +14589,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14603,9 +14603,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14682,9 +14682,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14752,9 +14752,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14782,9 +14782,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14857,9 +14857,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14877,9 +14877,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14897,9 +14897,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14917,9 +14917,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14937,9 +14937,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14953,9 +14953,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14976,9 +14976,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -14996,9 +14996,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15022,9 +15022,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15036,9 +15036,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15085,9 +15085,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15111,9 +15111,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15134,9 +15134,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15160,9 +15160,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15298,9 +15298,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15312,9 +15312,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15368,9 +15368,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15391,9 +15391,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15407,9 +15407,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15517,9 +15517,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15543,9 +15543,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15557,9 +15557,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15630,9 +15630,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15650,9 +15650,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15670,9 +15670,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15710,9 +15710,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15736,9 +15736,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15781,9 +15781,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15804,9 +15804,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15829,9 +15829,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15845,9 +15845,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15914,9 +15914,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15950,9 +15950,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15985,9 +15985,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -15999,9 +15999,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -16037,9 +16037,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -16063,9 +16063,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -16096,9 +16096,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -16112,9 +16112,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -16126,9 +16126,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -16140,9 +16140,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18475,9 +18475,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18496,9 +18496,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18532,9 +18532,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18555,9 +18555,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18575,9 +18575,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18591,9 +18591,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18634,9 +18634,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18650,9 +18650,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18749,9 +18749,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18765,9 +18765,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18821,9 +18821,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18844,9 +18844,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18860,9 +18860,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18874,9 +18874,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18937,9 +18937,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18963,9 +18963,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -18977,9 +18977,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19010,9 +19010,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19026,9 +19026,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19059,9 +19059,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19076,9 +19076,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19161,9 +19161,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19181,9 +19181,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19269,9 +19269,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19283,9 +19283,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19316,9 +19316,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19363,9 +19363,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19409,9 +19409,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19432,15 +19432,17 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } }, "node_i": "2880", - "native_tokenizers": [], + "native_tokenizers": [ + "Latn" + ], "scripts": [ "Latn", "Arab" @@ -19645,15 +19647,17 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } }, "node_i": "2899", - "native_tokenizers": [], + "native_tokenizers": [ + "Latn" + ], "scripts": [ "Latn", "Arab" @@ -19686,15 +19690,17 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } }, "node_i": "2902", - "native_tokenizers": [], + "native_tokenizers": [ + "Latn" + ], "scripts": [ "Latn" ] @@ -19736,15 +19742,17 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } }, "node_i": "2906", - "native_tokenizers": [], + "native_tokenizers": [ + "Latn" + ], "scripts": [ "Latn" ] @@ -19806,15 +19814,17 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } }, "node_i": "2912", - "native_tokenizers": [], + "native_tokenizers": [ + "Latn" + ], "scripts": [ "Latn", "Arab" @@ -19823,9 +19833,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19837,9 +19847,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -19851,9 +19861,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20270,9 +20280,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20296,9 +20306,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20310,9 +20320,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20324,9 +20334,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20338,9 +20348,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20407,9 +20417,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20453,9 +20463,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20467,9 +20477,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20755,9 +20765,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20771,9 +20781,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20816,9 +20826,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20849,9 +20859,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20865,9 +20875,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20879,9 +20889,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -20914,9 +20924,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21070,9 +21080,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21135,9 +21145,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21171,9 +21181,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21194,9 +21204,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21210,9 +21220,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21224,9 +21234,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21259,9 +21269,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21358,9 +21368,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21374,9 +21384,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21512,9 +21522,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21619,9 +21629,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21633,9 +21643,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21656,9 +21666,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21676,9 +21686,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21702,9 +21712,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21751,9 +21761,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21767,9 +21777,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21948,9 +21958,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -21968,9 +21978,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22008,9 +22018,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22034,9 +22044,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22048,9 +22058,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22076,9 +22086,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22092,9 +22102,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22120,9 +22130,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22140,9 +22150,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22186,9 +22196,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22209,9 +22219,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22229,9 +22239,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22245,9 +22255,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22259,9 +22269,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22282,9 +22292,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22302,9 +22312,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22322,9 +22332,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22342,9 +22352,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22358,9 +22368,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22372,9 +22382,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22386,9 +22396,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22400,9 +22410,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22449,9 +22459,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22510,9 +22520,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22530,9 +22540,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22546,9 +22556,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22581,9 +22591,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22595,9 +22605,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22609,9 +22619,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22623,9 +22633,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22637,9 +22647,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22680,9 +22690,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22710,9 +22720,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22730,9 +22740,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22765,9 +22775,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22795,9 +22805,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22811,9 +22821,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22825,9 +22835,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22848,9 +22858,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22874,9 +22884,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22888,9 +22898,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22921,9 +22931,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22946,9 +22956,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -22966,9 +22976,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23002,9 +23012,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23016,9 +23026,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23030,9 +23040,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23044,9 +23054,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23087,9 +23097,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23127,9 +23137,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23147,9 +23157,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23173,9 +23183,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23196,9 +23206,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23212,9 +23222,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23235,9 +23245,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23255,9 +23265,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23281,9 +23291,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23295,9 +23305,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23318,9 +23328,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23344,9 +23354,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23358,9 +23368,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23381,9 +23391,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23397,9 +23407,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23420,9 +23430,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23436,9 +23446,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23474,9 +23484,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23500,9 +23510,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23545,9 +23555,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23573,9 +23583,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23630,9 +23640,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23704,9 +23714,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23730,9 +23740,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23810,9 +23820,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23846,9 +23856,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23869,9 +23879,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23909,9 +23919,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23935,9 +23945,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23968,9 +23978,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -23998,9 +24008,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -24024,9 +24034,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -24038,9 +24048,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -24114,9 +24124,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -24147,9 +24157,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -24163,9 +24173,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -24208,9 +24218,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -24272,9 +24282,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -24288,9 +24298,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -24332,9 +24342,9 @@ "children": [], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -24348,9 +24358,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" } @@ -24487,9 +24497,9 @@ ], "tokenizers": { "Latn": { - "full_object": "SpaCyTokenizer(\"id\")", - "original_lang_name": "indonesian", - "original_lang_code": "ind", + "full_object": "SpaCyTokenizer(\"ms\")", + "original_lang_name": "malay", + "original_lang_code": "msa", "script": "Latn", "class_name": "SpaCyTokenizer" }