Upload tokenizer
Browse files- tokenizer.json +8 -53
- tokenizer_config.json +0 -40
- vocab.txt +6 -6
tokenizer.json
CHANGED
@@ -47,51 +47,6 @@
|
|
47 |
"rstrip": false,
|
48 |
"normalized": false,
|
49 |
"special": true
|
50 |
-
},
|
51 |
-
{
|
52 |
-
"id": 30470,
|
53 |
-
"content": "iminomethyl",
|
54 |
-
"single_word": false,
|
55 |
-
"lstrip": false,
|
56 |
-
"rstrip": false,
|
57 |
-
"normalized": true,
|
58 |
-
"special": false
|
59 |
-
},
|
60 |
-
{
|
61 |
-
"id": 30471,
|
62 |
-
"content": "trihydroxypropyl",
|
63 |
-
"single_word": false,
|
64 |
-
"lstrip": false,
|
65 |
-
"rstrip": false,
|
66 |
-
"normalized": true,
|
67 |
-
"special": false
|
68 |
-
},
|
69 |
-
{
|
70 |
-
"id": 30472,
|
71 |
-
"content": "propane",
|
72 |
-
"single_word": false,
|
73 |
-
"lstrip": false,
|
74 |
-
"rstrip": false,
|
75 |
-
"normalized": true,
|
76 |
-
"special": false
|
77 |
-
},
|
78 |
-
{
|
79 |
-
"id": 30473,
|
80 |
-
"content": "aminopurin",
|
81 |
-
"single_word": false,
|
82 |
-
"lstrip": false,
|
83 |
-
"rstrip": false,
|
84 |
-
"normalized": true,
|
85 |
-
"special": false
|
86 |
-
},
|
87 |
-
{
|
88 |
-
"id": 30474,
|
89 |
-
"content": "##chromene",
|
90 |
-
"single_word": false,
|
91 |
-
"lstrip": false,
|
92 |
-
"rstrip": false,
|
93 |
-
"normalized": true,
|
94 |
-
"special": false
|
95 |
}
|
96 |
],
|
97 |
"normalizer": {
|
@@ -696,8 +651,8 @@
|
|
696 |
"chl": 503,
|
697 |
"cycl": 504,
|
698 |
"dro": 505,
|
699 |
-
"##
|
700 |
-
"##
|
701 |
"dif": 508,
|
702 |
"##amin": 509,
|
703 |
"##anim": 510,
|
@@ -921,8 +876,8 @@
|
|
921 |
"##quinoline": 728,
|
922 |
"12s": 729,
|
923 |
"17r": 730,
|
924 |
-
"##
|
925 |
-
"##
|
926 |
"##uinoxal": 733,
|
927 |
"thiophene": 734,
|
928 |
"methylprop": 735,
|
@@ -1185,10 +1140,10 @@
|
|
1185 |
"##azocin": 992,
|
1186 |
"##benzoic": 993,
|
1187 |
"##roprop": 994,
|
1188 |
-
"
|
1189 |
-
"
|
1190 |
-
"
|
1191 |
-
"
|
1192 |
"!": 999,
|
1193 |
"\"": 1000,
|
1194 |
"#": 1001,
|
|
|
47 |
"rstrip": false,
|
48 |
"normalized": false,
|
49 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
}
|
51 |
],
|
52 |
"normalizer": {
|
|
|
651 |
"chl": 503,
|
652 |
"cycl": 504,
|
653 |
"dro": 505,
|
654 |
+
"##opyran": 506,
|
655 |
+
"##tit": 507,
|
656 |
"dif": 508,
|
657 |
"##amin": 509,
|
658 |
"##anim": 510,
|
|
|
876 |
"##quinoline": 728,
|
877 |
"12s": 729,
|
878 |
"17r": 730,
|
879 |
+
"##xal": 731,
|
880 |
+
"##uino": 732,
|
881 |
"##uinoxal": 733,
|
882 |
"thiophene": 734,
|
883 |
"methylprop": 735,
|
|
|
1140 |
"##azocin": 992,
|
1141 |
"##benzoic": 993,
|
1142 |
"##roprop": 994,
|
1143 |
+
"[unused990]": 995,
|
1144 |
+
"[unused991]": 996,
|
1145 |
+
"[unused992]": 997,
|
1146 |
+
"[unused993]": 998,
|
1147 |
"!": 999,
|
1148 |
"\"": 1000,
|
1149 |
"#": 1001,
|
tokenizer_config.json
CHANGED
@@ -39,46 +39,6 @@
|
|
39 |
"rstrip": false,
|
40 |
"single_word": false,
|
41 |
"special": true
|
42 |
-
},
|
43 |
-
"30470": {
|
44 |
-
"content": "iminomethyl",
|
45 |
-
"lstrip": false,
|
46 |
-
"normalized": true,
|
47 |
-
"rstrip": false,
|
48 |
-
"single_word": false,
|
49 |
-
"special": false
|
50 |
-
},
|
51 |
-
"30471": {
|
52 |
-
"content": "trihydroxypropyl",
|
53 |
-
"lstrip": false,
|
54 |
-
"normalized": true,
|
55 |
-
"rstrip": false,
|
56 |
-
"single_word": false,
|
57 |
-
"special": false
|
58 |
-
},
|
59 |
-
"30472": {
|
60 |
-
"content": "propane",
|
61 |
-
"lstrip": false,
|
62 |
-
"normalized": true,
|
63 |
-
"rstrip": false,
|
64 |
-
"single_word": false,
|
65 |
-
"special": false
|
66 |
-
},
|
67 |
-
"30473": {
|
68 |
-
"content": "aminopurin",
|
69 |
-
"lstrip": false,
|
70 |
-
"normalized": true,
|
71 |
-
"rstrip": false,
|
72 |
-
"single_word": false,
|
73 |
-
"special": false
|
74 |
-
},
|
75 |
-
"30474": {
|
76 |
-
"content": "##chromene",
|
77 |
-
"lstrip": false,
|
78 |
-
"normalized": true,
|
79 |
-
"rstrip": false,
|
80 |
-
"single_word": false,
|
81 |
-
"special": false
|
82 |
}
|
83 |
},
|
84 |
"clean_up_tokenization_spaces": true,
|
|
|
39 |
"rstrip": false,
|
40 |
"single_word": false,
|
41 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
}
|
43 |
},
|
44 |
"clean_up_tokenization_spaces": true,
|
vocab.txt
CHANGED
@@ -504,8 +504,8 @@ argon
|
|
504 |
chl
|
505 |
cycl
|
506 |
dro
|
507 |
-
##tit
|
508 |
##opyran
|
|
|
509 |
dif
|
510 |
##amin
|
511 |
##anim
|
@@ -729,8 +729,8 @@ methanone
|
|
729 |
##quinoline
|
730 |
12s
|
731 |
17r
|
732 |
-
##uino
|
733 |
##xal
|
|
|
734 |
##uinoxal
|
735 |
thiophene
|
736 |
methylprop
|
@@ -993,10 +993,10 @@ oxir
|
|
993 |
##azocin
|
994 |
##benzoic
|
995 |
##roprop
|
996 |
-
|
997 |
-
|
998 |
-
|
999 |
-
|
1000 |
!
|
1001 |
"
|
1002 |
#
|
|
|
504 |
chl
|
505 |
cycl
|
506 |
dro
|
|
|
507 |
##opyran
|
508 |
+
##tit
|
509 |
dif
|
510 |
##amin
|
511 |
##anim
|
|
|
729 |
##quinoline
|
730 |
12s
|
731 |
17r
|
|
|
732 |
##xal
|
733 |
+
##uino
|
734 |
##uinoxal
|
735 |
thiophene
|
736 |
methylprop
|
|
|
993 |
##azocin
|
994 |
##benzoic
|
995 |
##roprop
|
996 |
+
[unused990]
|
997 |
+
[unused991]
|
998 |
+
[unused992]
|
999 |
+
[unused993]
|
1000 |
!
|
1001 |
"
|
1002 |
#
|