Transformers
mohammad-khodadad commited on
Commit
c136633
·
verified ·
1 Parent(s): fb7a84e

Upload tokenizer

Browse files
Files changed (3) hide show
  1. tokenizer.json +8 -53
  2. tokenizer_config.json +0 -40
  3. vocab.txt +6 -6
tokenizer.json CHANGED
@@ -47,51 +47,6 @@
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
50
- },
51
- {
52
- "id": 30470,
53
- "content": "iminomethyl",
54
- "single_word": false,
55
- "lstrip": false,
56
- "rstrip": false,
57
- "normalized": true,
58
- "special": false
59
- },
60
- {
61
- "id": 30471,
62
- "content": "trihydroxypropyl",
63
- "single_word": false,
64
- "lstrip": false,
65
- "rstrip": false,
66
- "normalized": true,
67
- "special": false
68
- },
69
- {
70
- "id": 30472,
71
- "content": "propane",
72
- "single_word": false,
73
- "lstrip": false,
74
- "rstrip": false,
75
- "normalized": true,
76
- "special": false
77
- },
78
- {
79
- "id": 30473,
80
- "content": "aminopurin",
81
- "single_word": false,
82
- "lstrip": false,
83
- "rstrip": false,
84
- "normalized": true,
85
- "special": false
86
- },
87
- {
88
- "id": 30474,
89
- "content": "##chromene",
90
- "single_word": false,
91
- "lstrip": false,
92
- "rstrip": false,
93
- "normalized": true,
94
- "special": false
95
  }
96
  ],
97
  "normalizer": {
@@ -696,8 +651,8 @@
696
  "chl": 503,
697
  "cycl": 504,
698
  "dro": 505,
699
- "##tit": 506,
700
- "##opyran": 507,
701
  "dif": 508,
702
  "##amin": 509,
703
  "##anim": 510,
@@ -921,8 +876,8 @@
921
  "##quinoline": 728,
922
  "12s": 729,
923
  "17r": 730,
924
- "##uino": 731,
925
- "##xal": 732,
926
  "##uinoxal": 733,
927
  "thiophene": 734,
928
  "methylprop": 735,
@@ -1185,10 +1140,10 @@
1185
  "##azocin": 992,
1186
  "##benzoic": 993,
1187
  "##roprop": 994,
1188
- "cyclohexane": 995,
1189
- "diazabicyclo": 996,
1190
- "dioxopyrimidin": 997,
1191
- "carbaldehyde": 998,
1192
  "!": 999,
1193
  "\"": 1000,
1194
  "#": 1001,
 
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "normalizer": {
 
651
  "chl": 503,
652
  "cycl": 504,
653
  "dro": 505,
654
+ "##opyran": 506,
655
+ "##tit": 507,
656
  "dif": 508,
657
  "##amin": 509,
658
  "##anim": 510,
 
876
  "##quinoline": 728,
877
  "12s": 729,
878
  "17r": 730,
879
+ "##xal": 731,
880
+ "##uino": 732,
881
  "##uinoxal": 733,
882
  "thiophene": 734,
883
  "methylprop": 735,
 
1140
  "##azocin": 992,
1141
  "##benzoic": 993,
1142
  "##roprop": 994,
1143
+ "[unused990]": 995,
1144
+ "[unused991]": 996,
1145
+ "[unused992]": 997,
1146
+ "[unused993]": 998,
1147
  "!": 999,
1148
  "\"": 1000,
1149
  "#": 1001,
tokenizer_config.json CHANGED
@@ -39,46 +39,6 @@
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
- },
43
- "30470": {
44
- "content": "iminomethyl",
45
- "lstrip": false,
46
- "normalized": true,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": false
50
- },
51
- "30471": {
52
- "content": "trihydroxypropyl",
53
- "lstrip": false,
54
- "normalized": true,
55
- "rstrip": false,
56
- "single_word": false,
57
- "special": false
58
- },
59
- "30472": {
60
- "content": "propane",
61
- "lstrip": false,
62
- "normalized": true,
63
- "rstrip": false,
64
- "single_word": false,
65
- "special": false
66
- },
67
- "30473": {
68
- "content": "aminopurin",
69
- "lstrip": false,
70
- "normalized": true,
71
- "rstrip": false,
72
- "single_word": false,
73
- "special": false
74
- },
75
- "30474": {
76
- "content": "##chromene",
77
- "lstrip": false,
78
- "normalized": true,
79
- "rstrip": false,
80
- "single_word": false,
81
- "special": false
82
  }
83
  },
84
  "clean_up_tokenization_spaces": true,
 
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
  },
44
  "clean_up_tokenization_spaces": true,
vocab.txt CHANGED
@@ -504,8 +504,8 @@ argon
504
  chl
505
  cycl
506
  dro
507
- ##tit
508
  ##opyran
 
509
  dif
510
  ##amin
511
  ##anim
@@ -729,8 +729,8 @@ methanone
729
  ##quinoline
730
  12s
731
  17r
732
- ##uino
733
  ##xal
 
734
  ##uinoxal
735
  thiophene
736
  methylprop
@@ -993,10 +993,10 @@ oxir
993
  ##azocin
994
  ##benzoic
995
  ##roprop
996
- cyclohexane
997
- diazabicyclo
998
- dioxopyrimidin
999
- carbaldehyde
1000
  !
1001
  "
1002
  #
 
504
  chl
505
  cycl
506
  dro
 
507
  ##opyran
508
+ ##tit
509
  dif
510
  ##amin
511
  ##anim
 
729
  ##quinoline
730
  12s
731
  17r
 
732
  ##xal
733
+ ##uino
734
  ##uinoxal
735
  thiophene
736
  methylprop
 
993
  ##azocin
994
  ##benzoic
995
  ##roprop
996
+ [unused990]
997
+ [unused991]
998
+ [unused992]
999
+ [unused993]
1000
  !
1001
  "
1002
  #