|
{ |
|
"version": "1.0", |
|
"truncation": { |
|
"direction": "Right", |
|
"max_length": 64, |
|
"strategy": "LongestFirst", |
|
"stride": 0 |
|
}, |
|
"padding": { |
|
"strategy": "BatchLongest", |
|
"direction": "Right", |
|
"pad_to_multiple_of": null, |
|
"pad_id": 0, |
|
"pad_type_id": 0, |
|
"pad_token": "<|endoftext|>" |
|
}, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "<|endoftext|>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "<pad>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "Y", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 3, |
|
"content": "e", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 4, |
|
"content": "a", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 5, |
|
"content": "h", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 6, |
|
"content": ".", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 7, |
|
"content": "\n", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 8, |
|
"content": "C", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 9, |
|
"content": "o", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 10, |
|
"content": "m", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 11, |
|
"content": "p", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 12, |
|
"content": "u", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 13, |
|
"content": "n", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 14, |
|
"content": "d", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 15, |
|
"content": "'", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 16, |
|
"content": "s", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 17, |
|
"content": " ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 18, |
|
"content": "t", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 19, |
|
"content": "i", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 20, |
|
"content": "g", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 21, |
|
"content": "l", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 22, |
|
"content": "k", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 23, |
|
"content": "x", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 24, |
|
"content": ",", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 25, |
|
"content": "r", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 26, |
|
"content": "N", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 27, |
|
"content": "w", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 28, |
|
"content": "v", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 29, |
|
"content": "T", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 30, |
|
"content": "f", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 31, |
|
"content": "A", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 32, |
|
"content": "b", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 33, |
|
"content": "E", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 34, |
|
"content": "c", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 35, |
|
"content": "y", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 36, |
|
"content": "O", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 37, |
|
"content": "S", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 38, |
|
"content": "j", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 39, |
|
"content": "I", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 40, |
|
"content": "R", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 41, |
|
"content": "?", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 42, |
|
"content": "D", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 43, |
|
"content": "-", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 44, |
|
"content": "q", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 45, |
|
"content": "W", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 46, |
|
"content": "Q", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 47, |
|
"content": "M", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 48, |
|
"content": "B", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 49, |
|
"content": "H", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 50, |
|
"content": "L", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 51, |
|
"content": "F", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 52, |
|
"content": "P", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 53, |
|
"content": "G", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 54, |
|
"content": "U", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 55, |
|
"content": "J", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 56, |
|
"content": "X", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 57, |
|
"content": ";", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 58, |
|
"content": "2", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 59, |
|
"content": "V", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 60, |
|
"content": "‘", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 61, |
|
"content": "’", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 62, |
|
"content": "!", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 63, |
|
"content": "é", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 64, |
|
"content": "/", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 65, |
|
"content": "1", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 66, |
|
"content": ":", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 67, |
|
"content": "K", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 68, |
|
"content": "z", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 69, |
|
"content": "Z", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 70, |
|
"content": "3", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 71, |
|
"content": "6", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 72, |
|
"content": "9", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 73, |
|
"content": "&", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 74, |
|
"content": "4", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 75, |
|
"content": "5", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 76, |
|
"content": "0", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 77, |
|
"content": "=", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 78, |
|
"content": "8", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 79, |
|
"content": "7", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 80, |
|
"content": "£", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 81, |
|
"content": "(", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 82, |
|
"content": ")", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 83, |
|
"content": "—", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 84, |
|
"content": "*", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 85, |
|
"content": "\"", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 86, |
|
"content": "%", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 87, |
|
"content": "“", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 88, |
|
"content": "”", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 89, |
|
"content": "_", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 90, |
|
"content": "+", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 91, |
|
"content": "$", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 92, |
|
"content": "^", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 93, |
|
"content": "ʊ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 94, |
|
"content": "č", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 95, |
|
"content": "#", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 96, |
|
"content": "æ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 97, |
|
"content": "ʌ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 98, |
|
"content": "ɷ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 99, |
|
"content": "ɩ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 100, |
|
"content": "ɛ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 101, |
|
"content": "ə", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 102, |
|
"content": "↫", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 103, |
|
"content": "à", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 104, |
|
"content": "|", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 105, |
|
"content": "[", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 106, |
|
"content": "]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 107, |
|
"content": "°", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 108, |
|
"content": "Ö", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 109, |
|
"content": "À", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 110, |
|
"content": "ç", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 111, |
|
"content": "è", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 112, |
|
"content": "É", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 113, |
|
"content": "â", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 114, |
|
"content": "Ø", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 115, |
|
"content": "ê", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 116, |
|
"content": "~", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 117, |
|
"content": "ë", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 118, |
|
"content": "―", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 119, |
|
"content": "ï", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 120, |
|
"content": "ô", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 121, |
|
"content": "ü", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 122, |
|
"content": "⁄", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 123, |
|
"content": "Â", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 124, |
|
"content": "á", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 125, |
|
"content": "`", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 126, |
|
"content": "Æ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 127, |
|
"content": "È", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 128, |
|
"content": "�", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 129, |
|
"content": "ñ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 130, |
|
"content": "í", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 131, |
|
"content": "ú", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 132, |
|
"content": "ā", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 133, |
|
"content": "ó", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 134, |
|
"content": "ĕ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 135, |
|
"content": "ō", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 136, |
|
"content": "ä", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 137, |
|
"content": "Ó", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 138, |
|
"content": "ē", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 139, |
|
"content": "′", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 140, |
|
"content": "ī", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 141, |
|
"content": "Á", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 142, |
|
"content": "û", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 143, |
|
"content": "ö", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 144, |
|
"content": "ń", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 145, |
|
"content": "ă", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 146, |
|
"content": "†", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 147, |
|
"content": "ĭ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 148, |
|
"content": "ū", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 149, |
|
"content": "Ú", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 150, |
|
"content": "î", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 151, |
|
"content": "Í", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 152, |
|
"content": "§", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 153, |
|
"content": "Ō", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 154, |
|
"content": "Ā", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 155, |
|
"content": "@", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 156, |
|
"content": "}", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 157, |
|
"content": "☛", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 158, |
|
"content": "☚", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 159, |
|
"content": "Ñ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 160, |
|
"content": "{", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 161, |
|
"content": "–", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 162, |
|
"content": "·", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 163, |
|
"content": "Ü", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 164, |
|
"content": "©", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 165, |
|
"content": "Ë", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 166, |
|
"content": "♪", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 167, |
|
"content": "ã", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 168, |
|
"content": "¡", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 169, |
|
"content": "¤", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 170, |
|
"content": "÷", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 171, |
|
"content": "Û", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 172, |
|
"content": "Î", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 173, |
|
"content": "\\", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 174, |
|
"content": "", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 175, |
|
"content": "¶", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 176, |
|
"content": "Ä", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 177, |
|
"content": "ì", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 178, |
|
"content": "ò", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 179, |
|
"content": "¢", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 180, |
|
"content": "ù", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 181, |
|
"content": "Ê", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 182, |
|
"content": "±", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 183, |
|
"content": "Ð", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 184, |
|
"content": "Ô", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 185, |
|
"content": "¿", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 186, |
|
"content": "", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 187, |
|
"content": "Ï", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 188, |
|
"content": "ý", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 189, |
|
"content": "Þ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 190, |
|
"content": "Ã", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 191, |
|
"content": "Ý", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 192, |
|
"content": "Ì", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 193, |
|
"content": "⬄", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 194, |
|
"content": "", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 195, |
|
"content": "", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 196, |
|
"content": "♫", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 197, |
|
"content": "", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 198, |
|
"content": "", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 199, |
|
"content": "", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 200, |
|
"content": "", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 201, |
|
"content": "œ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 202, |
|
"content": "Œ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 203, |
|
"content": "Ÿ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 204, |
|
"content": "ł", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 205, |
|
"content": "ż", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 206, |
|
"content": "š", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 207, |
|
"content": "Š", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 208, |
|
"content": "ð", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 209, |
|
"content": "®", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 210, |
|
"content": "¬", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 211, |
|
"content": "Å", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 212, |
|
"content": "¦", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 213, |
|
"content": "Ç", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 214, |
|
"content": "å", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 215, |
|
"content": "ˆ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 216, |
|
"content": "ş", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 217, |
|
"content": "ø", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 218, |
|
"content": "×", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 219, |
|
"content": "", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 220, |
|
"content": "€", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 221, |
|
"content": "", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 222, |
|
"content": "؟", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 223, |
|
"content": "ـ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 224, |
|
"content": "", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 225, |
|
"content": "Ò", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 226, |
|
"content": "Ù", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 227, |
|
"content": "þ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 228, |
|
"content": "ž", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 229, |
|
"content": "„", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 230, |
|
"content": "ė", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 231, |
|
"content": "‚", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 232, |
|
"content": "ć", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 233, |
|
"content": "ś", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 234, |
|
"content": "ą", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 235, |
|
"content": "Č", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 236, |
|
"content": "ğ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 237, |
|
"content": "ș", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 238, |
|
"content": "Ś", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 239, |
|
"content": "ß", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 240, |
|
"content": "∂", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 241, |
|
"content": "ŭ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 242, |
|
"content": "ˈ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 243, |
|
"content": "ɫ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 244, |
|
"content": "ʋ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 245, |
|
"content": "ɐ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 246, |
|
"content": "ı", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 247, |
|
"content": "ň", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 248, |
|
"content": "đ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 249, |
|
"content": "ř", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 250, |
|
"content": "ě", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 251, |
|
"content": "₤", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 252, |
|
"content": "ǔ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 253, |
|
"content": "ễ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 254, |
|
"content": "ệ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 255, |
|
"content": "ề", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 256, |
|
"content": "ơ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 257, |
|
"content": "Ż", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 258, |
|
"content": "ő", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 259, |
|
"content": "−", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 260, |
|
"content": "→", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 261, |
|
"content": "ę", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 262, |
|
"content": "ɑ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 263, |
|
"content": "ː", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 264, |
|
"content": "ɔ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 265, |
|
"content": "ţ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 266, |
|
"content": "Ž", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 267, |
|
"content": "Ł", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 268, |
|
"content": "õ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 269, |
|
"content": "Đ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 270, |
|
"content": "ỗ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 271, |
|
"content": "ư", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 272, |
|
"content": "ờ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 273, |
|
"content": "≈", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 274, |
|
"content": "⊙", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 275, |
|
"content": "•", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 276, |
|
"content": "Ř", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 277, |
|
"content": "ů", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 278, |
|
"content": "ħ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 279, |
|
"content": "ṭ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 280, |
|
"content": "ġ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 281, |
|
"content": "ǁ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 282, |
|
"content": "Ľ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 283, |
|
"content": "İ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 284, |
|
"content": "ț", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 285, |
|
"content": "", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 286, |
|
"content": "⟨", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 287, |
|
"content": "⟩", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 288, |
|
"content": "ộ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 289, |
|
"content": "ầ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 290, |
|
"content": "ữ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 291, |
|
"content": "ủ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 292, |
|
"content": "⋅", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 293, |
|
"content": "ʒ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 294, |
|
"content": "Ş", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 295, |
|
"content": "ɪ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 296, |
|
"content": "₹", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 297, |
|
"content": "☯", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 298, |
|
"content": "ļ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 299, |
|
"content": "♯", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 300, |
|
"content": "♭", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 301, |
|
"content": "ʃ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 302, |
|
"content": "ɨ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 303, |
|
"content": "Ș", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 304, |
|
"content": "ź", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 305, |
|
"content": "Ṃ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 306, |
|
"content": "ḷ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 307, |
|
"content": "ʑ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 308, |
|
"content": "ứ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 309, |
|
"content": "ạ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 310, |
|
"content": "ṅ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 311, |
|
"content": "་", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 312, |
|
"content": "ー", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 313, |
|
"content": "ŋ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 314, |
|
"content": "ʼ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 315, |
|
"content": "į", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 316, |
|
"content": "ņ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 317, |
|
"content": "ḍ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 318, |
|
"content": "ṇ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 319, |
|
"content": "ṣ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 320, |
|
"content": "၁", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 321, |
|
"content": "၂", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 322, |
|
"content": "၃", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 323, |
|
"content": "၄", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 324, |
|
"content": "၅", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 325, |
|
"content": "၆", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 326, |
|
"content": "၇", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 327, |
|
"content": "၈", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 328, |
|
"content": "၉", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 329, |
|
"content": "၀", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 330, |
|
"content": "ĺ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 331, |
|
"content": "ķ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 332, |
|
"content": "Ḥ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 333, |
|
"content": "ʻ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 334, |
|
"content": "¥", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 335, |
|
"content": "ǒ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 336, |
|
"content": "ọ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 337, |
|
"content": "˥", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 338, |
|
"content": "˨", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 339, |
|
"content": "˩", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 340, |
|
"content": "ˌ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 341, |
|
"content": "۰", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 342, |
|
"content": "ʾ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 343, |
|
"content": "ḥ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 344, |
|
"content": "ʿ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 345, |
|
"content": "ď", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 346, |
|
"content": "Ṭ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 347, |
|
"content": "ả", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 348, |
|
"content": "█", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 349, |
|
"content": "Ĥ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 350, |
|
"content": "∴", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 351, |
|
"content": "ŏ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 352, |
|
"content": "ʁ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 353, |
|
"content": "ľ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
}, |
|
{ |
|
"id": 354, |
|
"content": "Õ", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": false |
|
} |
|
], |
|
"normalizer": null, |
|
"pre_tokenizer": null, |
|
"post_processor": null, |
|
"decoder": null, |
|
"model": { |
|
"type": "BPE", |
|
"dropout": null, |
|
"unk_token": null, |
|
"continuing_subword_prefix": null, |
|
"end_of_word_suffix": null, |
|
"fuse_unk": false, |
|
"byte_fallback": false, |
|
"ignore_merges": false, |
|
"vocab": {}, |
|
"merges": [] |
|
} |
|
} |