alps / unitable /vocab /vocab_html.json
yumikimi381's picture
Upload folder using huggingface_hub
daf0288 verified
{
"version": "1.0",
"truncation": null,
"padding": {
"strategy": "BatchLongest",
"direction": "Right",
"pad_to_multiple_of": null,
"pad_id": 2,
"pad_type_id": 0,
"pad_token": "<pad>"
},
"added_tokens": [
{
"id": 0,
"content": "<sos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<eos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "<empty>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "<sep>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6,
"content": "[table]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 7,
"content": "[html]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 8,
"content": "[cell]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 9,
"content": "[bbox]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 10,
"content": "[cell+bbox]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 11,
"content": "<td></td>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 12,
"content": "<td>[]</td>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 13,
"content": "<td",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 14,
"content": "></td>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 15,
"content": ">[]</td>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 16,
"content": "<tr>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 17,
"content": "</tr>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 18,
"content": "<tbody>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 19,
"content": "</tbody>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 20,
"content": "<thead>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 21,
"content": "</thead>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 22,
"content": " rowspan=\"2\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 23,
"content": " rowspan=\"3\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 24,
"content": " rowspan=\"4\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 25,
"content": " rowspan=\"5\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 26,
"content": " rowspan=\"6\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 27,
"content": " rowspan=\"7\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 28,
"content": " rowspan=\"8\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 29,
"content": " rowspan=\"9\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 30,
"content": " rowspan=\"10\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 31,
"content": " rowspan=\"11\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 32,
"content": " rowspan=\"12\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 33,
"content": " rowspan=\"13\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 34,
"content": " rowspan=\"14\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 35,
"content": " rowspan=\"15\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 36,
"content": " rowspan=\"16\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 37,
"content": " rowspan=\"17\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 38,
"content": " rowspan=\"18\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 39,
"content": " rowspan=\"19\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 40,
"content": " colspan=\"2\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 41,
"content": " colspan=\"3\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 42,
"content": " colspan=\"4\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 43,
"content": " colspan=\"5\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 44,
"content": " colspan=\"6\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 45,
"content": " colspan=\"7\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 46,
"content": " colspan=\"8\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 47,
"content": " colspan=\"9\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 48,
"content": " colspan=\"10\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 49,
"content": " colspan=\"11\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 50,
"content": " colspan=\"12\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 51,
"content": " colspan=\"13\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 52,
"content": " colspan=\"14\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 53,
"content": " colspan=\"15\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 54,
"content": " colspan=\"16\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 55,
"content": " colspan=\"17\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 56,
"content": " colspan=\"18\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 57,
"content": " colspan=\"19\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 58,
"content": " colspan=\"25\"",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "NFD"
},
{
"type": "Lowercase"
},
{
"type": "StripAccents"
},
{
"type": "Strip",
"strip_left": true,
"strip_right": true
}
]
},
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": {
"type": "WordPiece",
"prefix": "##",
"cleanup": true
},
"model": {
"type": "WordPiece",
"unk_token": "<unk>",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 100,
"vocab": {
"<sos>": 0,
"<eos>": 1,
"<pad>": 2,
"<unk>": 3,
"<empty>": 4,
"<sep>": 5,
"[table]": 6,
"[html]": 7,
"[cell]": 8,
"[bbox]": 9,
"[cell+bbox]": 10,
"<td></td>": 11,
"<td>[]</td>": 12,
"<td": 13,
"></td>": 14,
">[]</td>": 15,
"<tr>": 16,
"</tr>": 17,
"<tbody>": 18,
"</tbody>": 19,
"<thead>": 20,
"</thead>": 21,
" rowspan=\"2\"": 22,
" rowspan=\"3\"": 23,
" rowspan=\"4\"": 24,
" rowspan=\"5\"": 25,
" rowspan=\"6\"": 26,
" rowspan=\"7\"": 27,
" rowspan=\"8\"": 28,
" rowspan=\"9\"": 29,
" rowspan=\"10\"": 30,
" rowspan=\"11\"": 31,
" rowspan=\"12\"": 32,
" rowspan=\"13\"": 33,
" rowspan=\"14\"": 34,
" rowspan=\"15\"": 35,
" rowspan=\"16\"": 36,
" rowspan=\"17\"": 37,
" rowspan=\"18\"": 38,
" rowspan=\"19\"": 39,
" colspan=\"2\"": 40,
" colspan=\"3\"": 41,
" colspan=\"4\"": 42,
" colspan=\"5\"": 43,
" colspan=\"6\"": 44,
" colspan=\"7\"": 45,
" colspan=\"8\"": 46,
" colspan=\"9\"": 47,
" colspan=\"10\"": 48,
" colspan=\"11\"": 49,
" colspan=\"12\"": 50,
" colspan=\"13\"": 51,
" colspan=\"14\"": 52,
" colspan=\"15\"": 53,
" colspan=\"16\"": 54,
" colspan=\"17\"": 55,
" colspan=\"18\"": 56,
" colspan=\"19\"": 57,
" colspan=\"25\"": 58
}
}
}