Upload 5 files
Browse files- .gitattributes +1 -0
- optimizer_state.pt +3 -0
- special_tokens_map.json +1 -7
- tokenizer.json +23 -2
- tokenizer_config.json +40 -10
- vocab.json +0 -0
.gitattributes
CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
Phillnet.png filter=lfs diff=lfs merge=lfs -text
|
37 |
Model_Overview.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
Phillnet.png filter=lfs diff=lfs merge=lfs -text
|
37 |
Model_Overview.png filter=lfs diff=lfs merge=lfs -text
|
38 |
+
vocab.json filter=lfs diff=lfs merge=lfs -text
|
optimizer_state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:baeabefd42201c6174aa5e7478f82e9aaec22e292c465e8dd44a7a8238f9ada8
|
3 |
+
size 2838829242
|
special_tokens_map.json
CHANGED
@@ -108,13 +108,7 @@
|
|
108 |
"rstrip": false,
|
109 |
"single_word": false
|
110 |
},
|
111 |
-
"pad_token":
|
112 |
-
"content": "<pad>",
|
113 |
-
"lstrip": false,
|
114 |
-
"normalized": false,
|
115 |
-
"rstrip": false,
|
116 |
-
"single_word": false
|
117 |
-
},
|
118 |
"unk_token": {
|
119 |
"content": "<unk>",
|
120 |
"lstrip": false,
|
|
|
108 |
"rstrip": false,
|
109 |
"single_word": false
|
110 |
},
|
111 |
+
"pad_token": "</s>",
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
"unk_token": {
|
113 |
"content": "<unk>",
|
114 |
"lstrip": false,
|
tokenizer.json
CHANGED
@@ -1,7 +1,19 @@
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
-
"truncation":
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
"added_tokens": [
|
6 |
{
|
7 |
"id": 0,
|
@@ -1064,6 +1076,15 @@
|
|
1064 |
"rstrip": false,
|
1065 |
"normalized": true,
|
1066 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1067 |
}
|
1068 |
],
|
1069 |
"normalizer": {
|
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
+
"truncation": {
|
4 |
+
"direction": "Right",
|
5 |
+
"max_length": 512,
|
6 |
+
"strategy": "LongestFirst",
|
7 |
+
"stride": 0
|
8 |
+
},
|
9 |
+
"padding": {
|
10 |
+
"strategy": "BatchLongest",
|
11 |
+
"direction": "Right",
|
12 |
+
"pad_to_multiple_of": null,
|
13 |
+
"pad_id": 0,
|
14 |
+
"pad_type_id": 0,
|
15 |
+
"pad_token": "<pad>"
|
16 |
+
},
|
17 |
"added_tokens": [
|
18 |
{
|
19 |
"id": 0,
|
|
|
1076 |
"rstrip": false,
|
1077 |
"normalized": true,
|
1078 |
"special": false
|
1079 |
+
},
|
1080 |
+
{
|
1081 |
+
"id": 32114,
|
1082 |
+
"content": "[PAD]",
|
1083 |
+
"single_word": false,
|
1084 |
+
"lstrip": false,
|
1085 |
+
"rstrip": false,
|
1086 |
+
"normalized": false,
|
1087 |
+
"special": true
|
1088 |
}
|
1089 |
],
|
1090 |
"normalizer": {
|
tokenizer_config.json
CHANGED
@@ -1,4 +1,10 @@
|
|
1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"added_tokens_decoder": {
|
3 |
"0": {
|
4 |
"content": "<pad>",
|
@@ -943,6 +949,14 @@
|
|
943 |
"rstrip": false,
|
944 |
"single_word": false,
|
945 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
946 |
}
|
947 |
},
|
948 |
"additional_special_tokens": [
|
@@ -1047,24 +1061,40 @@
|
|
1047 |
"<extra_id_98>",
|
1048 |
"<extra_id_99>"
|
1049 |
],
|
|
|
1050 |
"clean_up_tokenization_spaces": true,
|
|
|
1051 |
"eos_token": "</s>",
|
1052 |
"extra_ids": 100,
|
1053 |
"max_length": 1024,
|
1054 |
"model_max_length": 1024,
|
1055 |
-
"pad_token": "<pad>",
|
1056 |
-
"pad_token_type_id": 0,
|
1057 |
-
"padding_side": "right",
|
1058 |
-
"tokenizer_class": "T5Tokenizer",
|
1059 |
-
"unk_token": "<unk>",
|
1060 |
-
"enable_token_classification": true,
|
1061 |
"normalization_rules": {
|
1062 |
"enable": true,
|
1063 |
"lowercase": true,
|
1064 |
-
"
|
1065 |
"pre_tokenization": {
|
1066 |
"punctuation_split": true,
|
1067 |
"split_digits": true
|
1068 |
-
}
|
1069 |
-
|
1070 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
{
|
2 |
+
"adaptive_tokenization": {
|
3 |
+
"enable": true,
|
4 |
+
"max_tokens": 100000,
|
5 |
+
"min_frequency": 3,
|
6 |
+
"retain_variant_forms": true
|
7 |
+
},
|
8 |
"added_tokens_decoder": {
|
9 |
"0": {
|
10 |
"content": "<pad>",
|
|
|
949 |
"rstrip": false,
|
950 |
"single_word": false,
|
951 |
"special": false
|
952 |
+
},
|
953 |
+
"32114": {
|
954 |
+
"content": "[PAD]",
|
955 |
+
"lstrip": false,
|
956 |
+
"normalized": false,
|
957 |
+
"rstrip": false,
|
958 |
+
"single_word": false,
|
959 |
+
"special": true
|
960 |
}
|
961 |
},
|
962 |
"additional_special_tokens": [
|
|
|
1061 |
"<extra_id_98>",
|
1062 |
"<extra_id_99>"
|
1063 |
],
|
1064 |
+
"bpe_dropout": 0.1,
|
1065 |
"clean_up_tokenization_spaces": true,
|
1066 |
+
"enable_token_classification": true,
|
1067 |
"eos_token": "</s>",
|
1068 |
"extra_ids": 100,
|
1069 |
"max_length": 1024,
|
1070 |
"model_max_length": 1024,
|
|
|
|
|
|
|
|
|
|
|
|
|
1071 |
"normalization_rules": {
|
1072 |
"enable": true,
|
1073 |
"lowercase": true,
|
1074 |
+
"nmt_normalization": true,
|
1075 |
"pre_tokenization": {
|
1076 |
"punctuation_split": true,
|
1077 |
"split_digits": true
|
1078 |
+
},
|
1079 |
+
"strip_accents": true
|
1080 |
+
},
|
1081 |
+
"pad_to_multiple_of": null,
|
1082 |
+
"pad_token": "</s>",
|
1083 |
+
"pad_token_type_id": 0,
|
1084 |
+
"padding_side": "right",
|
1085 |
+
"spe_tokenization": {
|
1086 |
+
"coverage": 0.9995,
|
1087 |
+
"enable": true,
|
1088 |
+
"nbest_size": 64
|
1089 |
+
},
|
1090 |
+
"stride": 0,
|
1091 |
+
"subword_regularization": {
|
1092 |
+
"alpha": 0.1,
|
1093 |
+
"dropout": 0.1,
|
1094 |
+
"enable": true
|
1095 |
+
},
|
1096 |
+
"tokenizer_class": "T5Tokenizer",
|
1097 |
+
"truncation_side": "right",
|
1098 |
+
"truncation_strategy": "longest_first",
|
1099 |
+
"unk_token": "<unk>"
|
1100 |
+
}
|
vocab.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|