Upload 5 files
Browse files- added_tokens.json +14 -1
- special_tokens_map.json +2 -9
- spiece.model +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +15 -33
added_tokens.json
CHANGED
@@ -1,3 +1,16 @@
|
|
1 |
{
|
2 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
}
|
|
|
1 |
{
|
2 |
+
"amazeballs": 32104,
|
3 |
+
"bromance": 32112,
|
4 |
+
"crowdfunding": 32111,
|
5 |
+
"cryptocurrency": 32105,
|
6 |
+
"environment": 32103,
|
7 |
+
"facepalm": 32110,
|
8 |
+
"hangry": 32113,
|
9 |
+
"happy": 32100,
|
10 |
+
"intelligent": 32101,
|
11 |
+
"photobomb": 32109,
|
12 |
+
"response": 32102,
|
13 |
+
"upcycle": 32108,
|
14 |
+
"vlog": 32107,
|
15 |
+
"webinar": 32106
|
16 |
}
|
special_tokens_map.json
CHANGED
@@ -101,13 +101,6 @@
|
|
101 |
"<extra_id_98>",
|
102 |
"<extra_id_99>"
|
103 |
],
|
104 |
-
"bos_token": {
|
105 |
-
"content": "</s>",
|
106 |
-
"lstrip": false,
|
107 |
-
"normalized": false,
|
108 |
-
"rstrip": false,
|
109 |
-
"single_word": false
|
110 |
-
},
|
111 |
"eos_token": {
|
112 |
"content": "</s>",
|
113 |
"lstrip": false,
|
@@ -116,14 +109,14 @@
|
|
116 |
"single_word": false
|
117 |
},
|
118 |
"pad_token": {
|
119 |
-
"content": "
|
120 |
"lstrip": false,
|
121 |
"normalized": false,
|
122 |
"rstrip": false,
|
123 |
"single_word": false
|
124 |
},
|
125 |
"unk_token": {
|
126 |
-
"content": "
|
127 |
"lstrip": false,
|
128 |
"normalized": false,
|
129 |
"rstrip": false,
|
|
|
101 |
"<extra_id_98>",
|
102 |
"<extra_id_99>"
|
103 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
"eos_token": {
|
105 |
"content": "</s>",
|
106 |
"lstrip": false,
|
|
|
109 |
"single_word": false
|
110 |
},
|
111 |
"pad_token": {
|
112 |
+
"content": "<pad>",
|
113 |
"lstrip": false,
|
114 |
"normalized": false,
|
115 |
"rstrip": false,
|
116 |
"single_word": false
|
117 |
},
|
118 |
"unk_token": {
|
119 |
+
"content": "<unk>",
|
120 |
"lstrip": false,
|
121 |
"normalized": false,
|
122 |
"rstrip": false,
|
spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
|
3 |
+
size 791656
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
CHANGED
@@ -833,22 +833,6 @@
|
|
833 |
"special": true
|
834 |
},
|
835 |
"32100": {
|
836 |
-
"content": "[PAD]",
|
837 |
-
"lstrip": false,
|
838 |
-
"normalized": false,
|
839 |
-
"rstrip": false,
|
840 |
-
"single_word": false,
|
841 |
-
"special": true
|
842 |
-
},
|
843 |
-
"32101": {
|
844 |
-
"content": "hello_enthusiastic_morning",
|
845 |
-
"lstrip": false,
|
846 |
-
"normalized": true,
|
847 |
-
"rstrip": false,
|
848 |
-
"single_word": false,
|
849 |
-
"special": false
|
850 |
-
},
|
851 |
-
"32102": {
|
852 |
"content": "happy",
|
853 |
"lstrip": false,
|
854 |
"normalized": true,
|
@@ -856,7 +840,7 @@
|
|
856 |
"single_word": false,
|
857 |
"special": false
|
858 |
},
|
859 |
-
"
|
860 |
"content": "intelligent",
|
861 |
"lstrip": false,
|
862 |
"normalized": true,
|
@@ -864,7 +848,7 @@
|
|
864 |
"single_word": false,
|
865 |
"special": false
|
866 |
},
|
867 |
-
"
|
868 |
"content": "response",
|
869 |
"lstrip": false,
|
870 |
"normalized": true,
|
@@ -872,7 +856,7 @@
|
|
872 |
"single_word": false,
|
873 |
"special": false
|
874 |
},
|
875 |
-
"
|
876 |
"content": "environment",
|
877 |
"lstrip": false,
|
878 |
"normalized": true,
|
@@ -880,7 +864,7 @@
|
|
880 |
"single_word": false,
|
881 |
"special": false
|
882 |
},
|
883 |
-
"
|
884 |
"content": "amazeballs",
|
885 |
"lstrip": false,
|
886 |
"normalized": true,
|
@@ -888,7 +872,7 @@
|
|
888 |
"single_word": false,
|
889 |
"special": false
|
890 |
},
|
891 |
-
"
|
892 |
"content": "cryptocurrency",
|
893 |
"lstrip": false,
|
894 |
"normalized": true,
|
@@ -896,7 +880,7 @@
|
|
896 |
"single_word": false,
|
897 |
"special": false
|
898 |
},
|
899 |
-
"
|
900 |
"content": "webinar",
|
901 |
"lstrip": false,
|
902 |
"normalized": true,
|
@@ -904,7 +888,7 @@
|
|
904 |
"single_word": false,
|
905 |
"special": false
|
906 |
},
|
907 |
-
"
|
908 |
"content": "vlog",
|
909 |
"lstrip": false,
|
910 |
"normalized": true,
|
@@ -912,7 +896,7 @@
|
|
912 |
"single_word": false,
|
913 |
"special": false
|
914 |
},
|
915 |
-
"
|
916 |
"content": "upcycle",
|
917 |
"lstrip": false,
|
918 |
"normalized": true,
|
@@ -920,7 +904,7 @@
|
|
920 |
"single_word": false,
|
921 |
"special": false
|
922 |
},
|
923 |
-
"
|
924 |
"content": "photobomb",
|
925 |
"lstrip": false,
|
926 |
"normalized": true,
|
@@ -928,7 +912,7 @@
|
|
928 |
"single_word": false,
|
929 |
"special": false
|
930 |
},
|
931 |
-
"
|
932 |
"content": "facepalm",
|
933 |
"lstrip": false,
|
934 |
"normalized": true,
|
@@ -936,7 +920,7 @@
|
|
936 |
"single_word": false,
|
937 |
"special": false
|
938 |
},
|
939 |
-
"
|
940 |
"content": "crowdfunding",
|
941 |
"lstrip": false,
|
942 |
"normalized": true,
|
@@ -944,7 +928,7 @@
|
|
944 |
"single_word": false,
|
945 |
"special": false
|
946 |
},
|
947 |
-
"
|
948 |
"content": "bromance",
|
949 |
"lstrip": false,
|
950 |
"normalized": true,
|
@@ -952,7 +936,7 @@
|
|
952 |
"single_word": false,
|
953 |
"special": false
|
954 |
},
|
955 |
-
"
|
956 |
"content": "hangry",
|
957 |
"lstrip": false,
|
958 |
"normalized": true,
|
@@ -1063,13 +1047,11 @@
|
|
1063 |
"<extra_id_98>",
|
1064 |
"<extra_id_99>"
|
1065 |
],
|
1066 |
-
"bos_token": "</s>",
|
1067 |
"clean_up_tokenization_spaces": true,
|
1068 |
"eos_token": "</s>",
|
1069 |
"extra_ids": 100,
|
1070 |
"model_max_length": 512,
|
1071 |
-
"pad_token": "
|
1072 |
-
"sp_model_kwargs": {},
|
1073 |
"tokenizer_class": "T5Tokenizer",
|
1074 |
-
"unk_token": "
|
1075 |
}
|
|
|
833 |
"special": true
|
834 |
},
|
835 |
"32100": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
836 |
"content": "happy",
|
837 |
"lstrip": false,
|
838 |
"normalized": true,
|
|
|
840 |
"single_word": false,
|
841 |
"special": false
|
842 |
},
|
843 |
+
"32101": {
|
844 |
"content": "intelligent",
|
845 |
"lstrip": false,
|
846 |
"normalized": true,
|
|
|
848 |
"single_word": false,
|
849 |
"special": false
|
850 |
},
|
851 |
+
"32102": {
|
852 |
"content": "response",
|
853 |
"lstrip": false,
|
854 |
"normalized": true,
|
|
|
856 |
"single_word": false,
|
857 |
"special": false
|
858 |
},
|
859 |
+
"32103": {
|
860 |
"content": "environment",
|
861 |
"lstrip": false,
|
862 |
"normalized": true,
|
|
|
864 |
"single_word": false,
|
865 |
"special": false
|
866 |
},
|
867 |
+
"32104": {
|
868 |
"content": "amazeballs",
|
869 |
"lstrip": false,
|
870 |
"normalized": true,
|
|
|
872 |
"single_word": false,
|
873 |
"special": false
|
874 |
},
|
875 |
+
"32105": {
|
876 |
"content": "cryptocurrency",
|
877 |
"lstrip": false,
|
878 |
"normalized": true,
|
|
|
880 |
"single_word": false,
|
881 |
"special": false
|
882 |
},
|
883 |
+
"32106": {
|
884 |
"content": "webinar",
|
885 |
"lstrip": false,
|
886 |
"normalized": true,
|
|
|
888 |
"single_word": false,
|
889 |
"special": false
|
890 |
},
|
891 |
+
"32107": {
|
892 |
"content": "vlog",
|
893 |
"lstrip": false,
|
894 |
"normalized": true,
|
|
|
896 |
"single_word": false,
|
897 |
"special": false
|
898 |
},
|
899 |
+
"32108": {
|
900 |
"content": "upcycle",
|
901 |
"lstrip": false,
|
902 |
"normalized": true,
|
|
|
904 |
"single_word": false,
|
905 |
"special": false
|
906 |
},
|
907 |
+
"32109": {
|
908 |
"content": "photobomb",
|
909 |
"lstrip": false,
|
910 |
"normalized": true,
|
|
|
912 |
"single_word": false,
|
913 |
"special": false
|
914 |
},
|
915 |
+
"32110": {
|
916 |
"content": "facepalm",
|
917 |
"lstrip": false,
|
918 |
"normalized": true,
|
|
|
920 |
"single_word": false,
|
921 |
"special": false
|
922 |
},
|
923 |
+
"32111": {
|
924 |
"content": "crowdfunding",
|
925 |
"lstrip": false,
|
926 |
"normalized": true,
|
|
|
928 |
"single_word": false,
|
929 |
"special": false
|
930 |
},
|
931 |
+
"32112": {
|
932 |
"content": "bromance",
|
933 |
"lstrip": false,
|
934 |
"normalized": true,
|
|
|
936 |
"single_word": false,
|
937 |
"special": false
|
938 |
},
|
939 |
+
"32113": {
|
940 |
"content": "hangry",
|
941 |
"lstrip": false,
|
942 |
"normalized": true,
|
|
|
1047 |
"<extra_id_98>",
|
1048 |
"<extra_id_99>"
|
1049 |
],
|
|
|
1050 |
"clean_up_tokenization_spaces": true,
|
1051 |
"eos_token": "</s>",
|
1052 |
"extra_ids": 100,
|
1053 |
"model_max_length": 512,
|
1054 |
+
"pad_token": "<pad>",
|
|
|
1055 |
"tokenizer_class": "T5Tokenizer",
|
1056 |
+
"unk_token": "<unk>"
|
1057 |
}
|