{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 64, "strategy": "LongestFirst", "stride": 0 }, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "<|endoftext|>" }, "added_tokens": [ { "id": 0, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "Y", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 3, "content": "e", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 4, "content": "a", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 5, "content": "h", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 6, "content": ".", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 7, "content": "\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 8, "content": "C", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 9, "content": "o", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 10, "content": "m", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 11, "content": "p", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 12, "content": "u", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 13, "content": "n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 14, "content": "d", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 15, "content": "'", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 16, "content": "s", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 17, "content": " ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 18, "content": "t", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 19, "content": "i", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 20, "content": "g", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 21, "content": "l", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 22, "content": "k", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 23, "content": "x", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 24, "content": ",", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 25, "content": "r", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 26, "content": "N", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 27, "content": "w", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 28, "content": "v", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 29, "content": "T", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 30, "content": "f", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 31, "content": "A", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 32, "content": "b", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 33, "content": "E", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 34, "content": "c", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 35, "content": "y", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 36, "content": "O", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 37, "content": "S", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 38, "content": "j", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 39, "content": "I", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 40, "content": "R", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 41, "content": "?", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 42, "content": "D", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 43, "content": "-", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 44, "content": "q", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 45, "content": "W", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 46, "content": "Q", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 47, "content": "M", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 48, "content": "B", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 49, "content": "H", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 50, "content": "L", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 51, "content": "F", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 52, "content": "P", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 53, "content": "G", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 54, "content": "U", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 55, "content": "J", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 56, "content": "X", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 57, "content": ";", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 58, "content": "2", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 59, "content": "V", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 60, "content": "‘", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 61, "content": "’", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 62, "content": "!", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 63, "content": "é", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 64, "content": "/", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 65, "content": "1", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 66, "content": ":", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 67, "content": "K", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 68, "content": "z", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 69, "content": "Z", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 70, "content": "3", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 71, "content": "6", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 72, "content": "9", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 73, "content": "&", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 74, "content": "4", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 75, "content": "5", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 76, "content": "0", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 77, "content": "=", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 78, "content": "8", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 79, "content": "7", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 80, "content": "£", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 81, "content": "(", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 82, "content": ")", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 83, "content": "—", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 84, "content": "*", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 85, "content": "\"", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 86, "content": "%", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 87, "content": "“", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 88, "content": "”", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 89, "content": "_", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 90, "content": "+", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 91, "content": "$", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 92, "content": "^", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 93, "content": "ʊ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 94, "content": "č", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 95, "content": "#", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 96, "content": "æ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 97, "content": "ʌ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 98, "content": "ɷ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 99, "content": "ɩ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 100, "content": "ɛ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 101, "content": "ə", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 102, "content": "↫", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 103, "content": "à", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 104, "content": "|", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 105, "content": "[", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 106, "content": "]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 107, "content": "°", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 108, "content": "Ö", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 109, "content": "À", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 110, "content": "ç", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 111, "content": "è", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 112, "content": "É", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 113, "content": "â", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 114, "content": "Ø", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 115, "content": "ê", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 116, "content": "~", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 117, "content": "ë", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 118, "content": "―", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 119, "content": "ï", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 120, "content": "ô", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 121, "content": "ü", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 122, "content": "⁄", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 123, "content": "Â", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 124, "content": "á", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 125, "content": "`", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 126, "content": "Æ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 127, "content": "È", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 128, "content": "�", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 129, "content": "ñ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 130, "content": "í", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 131, "content": "ú", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 132, "content": "ā", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 133, "content": "ó", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 134, "content": "ĕ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 135, "content": "ō", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 136, "content": "ä", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 137, "content": "Ó", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 138, "content": "ē", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 139, "content": "′", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 140, "content": "ī", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 141, "content": "Á", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 142, "content": "û", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 143, "content": "ö", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 144, "content": "ń", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 145, "content": "ă", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 146, "content": "†", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 147, "content": "ĭ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 148, "content": "ū", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 149, "content": "Ú", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 150, "content": "î", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 151, "content": "Í", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 152, "content": "§", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 153, "content": "Ō", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 154, "content": "Ā", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 155, "content": "@", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 156, "content": "}", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 157, "content": "☛", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 158, "content": "☚", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 159, "content": "Ñ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 160, "content": "{", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 161, "content": "–", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 162, "content": "·", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 163, "content": "Ü", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 164, "content": "©", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 165, "content": "Ë", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 166, "content": "♪", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 167, "content": "ã", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 168, "content": "¡", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 169, "content": "¤", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 170, "content": "÷", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 171, "content": "Û", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 172, "content": "Î", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 173, "content": "\\", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 174, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 175, "content": "¶", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 176, "content": "Ä", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 177, "content": "ì", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 178, "content": "ò", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 179, "content": "¢", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 180, "content": "ù", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 181, "content": "Ê", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 182, "content": "±", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 183, "content": "Ð", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 184, "content": "Ô", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 185, "content": "¿", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 186, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 187, "content": "Ï", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 188, "content": "ý", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 189, "content": "Þ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 190, "content": "Ã", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 191, "content": "Ý", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 192, "content": "Ì", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 193, "content": "⬄", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 194, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 195, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 196, "content": "♫", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 197, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 198, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 199, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 200, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 201, "content": "œ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 202, "content": "Œ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 203, "content": "Ÿ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 204, "content": "ł", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 205, "content": "ż", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 206, "content": "š", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 207, "content": "Š", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 208, "content": "ð", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 209, "content": "®", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 210, "content": "¬", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 211, "content": "Å", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 212, "content": "¦", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 213, "content": "Ç", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 214, "content": "å", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 215, "content": "ˆ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 216, "content": "ş", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 217, "content": "ø", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 218, "content": "×", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 219, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 220, "content": "€", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 221, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 222, "content": "؟", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 223, "content": "ـ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 224, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 225, "content": "Ò", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 226, "content": "Ù", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 227, "content": "þ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 228, "content": "ž", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 229, "content": "„", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 230, "content": "ė", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 231, "content": "‚", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 232, "content": "ć", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 233, "content": "ś", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 234, "content": "ą", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 235, "content": "Č", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 236, "content": "ğ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 237, "content": "ș", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 238, "content": "Ś", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 239, "content": "ß", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 240, "content": "∂", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 241, "content": "ŭ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 242, "content": "ˈ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 243, "content": "ɫ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 244, "content": "ʋ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 245, "content": "ɐ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 246, "content": "ı", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 247, "content": "ň", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 248, "content": "đ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 249, "content": "ř", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 250, "content": "ě", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 251, "content": "₤", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 252, "content": "ǔ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 253, "content": "ễ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 254, "content": "ệ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 255, "content": "ề", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 256, "content": "ơ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 257, "content": "Ż", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 258, "content": "ő", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 259, "content": "−", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 260, "content": "→", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 261, "content": "ę", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 262, "content": "ɑ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 263, "content": "ː", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 264, "content": "ɔ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 265, "content": "ţ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 266, "content": "Ž", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 267, "content": "Ł", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 268, "content": "õ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 269, "content": "Đ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 270, "content": "ỗ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 271, "content": "ư", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 272, "content": "ờ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 273, "content": "≈", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 274, "content": "⊙", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 275, "content": "•", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 276, "content": "Ř", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 277, "content": "ů", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 278, "content": "ħ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 279, "content": "ṭ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 280, "content": "ġ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 281, "content": "ǁ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 282, "content": "Ľ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 283, "content": "İ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 284, "content": "ț", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 285, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 286, "content": "⟨", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 287, "content": "⟩", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 288, "content": "ộ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 289, "content": "ầ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 290, "content": "ữ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 291, "content": "ủ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 292, "content": "⋅", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 293, "content": "ʒ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 294, "content": "Ş", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 295, "content": "ɪ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 296, "content": "₹", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 297, "content": "☯", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 298, "content": "ļ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 299, "content": "♯", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 300, "content": "♭", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 301, "content": "ʃ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 302, "content": "ɨ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 303, "content": "Ș", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 304, "content": "ź", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 305, "content": "Ṃ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 306, "content": "ḷ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 307, "content": "ʑ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 308, "content": "ứ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 309, "content": "ạ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 310, "content": "ṅ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 311, "content": "་", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 312, "content": "ー", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 313, "content": "ŋ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 314, "content": "ʼ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 315, "content": "į", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 316, "content": "ņ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 317, "content": "ḍ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 318, "content": "ṇ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 319, "content": "ṣ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 320, "content": "၁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 321, "content": "၂", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 322, "content": "၃", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 323, "content": "၄", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 324, "content": "၅", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 325, "content": "၆", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 326, "content": "၇", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 327, "content": "၈", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 328, "content": "၉", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 329, "content": "၀", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 330, "content": "ĺ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 331, "content": "ķ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 332, "content": "Ḥ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 333, "content": "ʻ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 334, "content": "¥", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 335, "content": "ǒ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 336, "content": "ọ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 337, "content": "˥", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 338, "content": "˨", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 339, "content": "˩", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 340, "content": "ˌ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 341, "content": "۰", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 342, "content": "ʾ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 343, "content": "ḥ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 344, "content": "ʿ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 345, "content": "ď", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 346, "content": "Ṭ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 347, "content": "ả", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 348, "content": "█", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 349, "content": "Ĥ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 350, "content": "∴", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 351, "content": "ŏ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 352, "content": "ʁ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 353, "content": "ľ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 354, "content": "Õ", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false } ], "normalizer": null, "pre_tokenizer": null, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": {}, "merges": [] } }