{ "version": "1.0", "truncation": null, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "" }, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "NFD" }, "pre_tokenizer": { "type": "Sequence", "pretokenizers": [ { "type": "Whitespace" }, { "type": "Split", "pattern": { "Regex": "[\\[\\]\\(\\)\\.]" }, "behavior": "MergedWithPrevious", "invert": false }, { "type": "Split", "pattern": { "Regex": "Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|H" }, "behavior": "Isolated", "invert": false }, { "type": "Split", "pattern": { "Regex": "=|#|-|\\+|\\\\|\\/|:|~|@|\\?|>|\\*|\\$|\\%[0-9]{2}|[0-9]" }, "behavior": "MergedWithPrevious", "invert": false } ] }, "post_processor": { "type": "TemplateProcessing", "single": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 3 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 2 ], "tokens": [ "" ] } } }, "decoder": null, "model": { "type": "WordLevel", "vocab": { "": 0, "": 1, "": 2, "": 3, "C": 4, "(": 5, ")": 6, "H": 7, "O": 8, "1": 9, "c": 10, "N": 11, "@": 12, "=": 13, "[": 14, "]": 15, "n": 16, "2": 17, "3": 18, "S": 19, "F": 20, "o": 21, "#": 22, "s": 23, "*": 24, "Cl": 25, "-": 26, "/": 27, "4": 28, "5": 29, "Br": 30, "\\": 31, "6": 32, "+": 33, "8": 34, "I": 35, "9": 36, "0": 37, "7": 38, "B": 39, "i": 40, "P": 41 }, "unk_token": "" } }