Curt-Park
commited on
Commit
·
47ff573
1
Parent(s):
966f777
Add exception for token overflow
Browse files- .gitignore +3 -0
- model_repository/postprocessing/1/__pycache__/model.cpython-38.pyc +0 -0
- model_repository/postprocessing/1/gpt2-vocab.json +0 -0
- model_repository/postprocessing/1/utils/__pycache__/__init__.cpython-38.pyc +0 -0
- model_repository/postprocessing/1/utils/__pycache__/gpt_token_encoder.cpython-38.pyc +0 -0
- model_repository/postprocessing/1/utils/gpt_token_encoder.py +3 -1
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/6636bda4a1fd7a63653dffb22683b8162c8de956 +0 -0
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/69e076d9141589e88803bb7fffc48167082fc2e7 +0 -1
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/817762d631ad6f9c799f6b9dc713c46420e65546 +0 -1
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/84ef7fb594b5c0979e48bdeddb60a0adef33df0b +0 -0
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/b4f914dd9ab5282b31b0aafc5f6569151f910856 +0 -1
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/e7c373e4b1189082ddcda33aaf2839bb1c04bb81 +0 -0
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/refs/main +0 -1
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/added_tokens.json +0 -1
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/merges.txt +0 -1
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/special_tokens_map.json +0 -1
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/tokenizer.json +0 -1
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/tokenizer_config.json +0 -1
- model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/vocab.json +0 -1
- model_repository/preprocessing/1/__pycache__/model.cpython-38.pyc +0 -0
- model_repository/preprocessing/1/__pycache__/word_list.cpython-38.pyc +0 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__
|
2 |
+
*.pyc
|
3 |
+
.cache
|
model_repository/postprocessing/1/__pycache__/model.cpython-38.pyc
DELETED
Binary file (4.05 kB)
|
|
model_repository/postprocessing/1/gpt2-vocab.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
model_repository/postprocessing/1/utils/__pycache__/__init__.cpython-38.pyc
DELETED
Binary file (133 Bytes)
|
|
model_repository/postprocessing/1/utils/__pycache__/gpt_token_encoder.cpython-38.pyc
DELETED
Binary file (4.99 kB)
|
|
model_repository/postprocessing/1/utils/gpt_token_encoder.py
CHANGED
@@ -151,7 +151,9 @@ class Encoder:
|
|
151 |
return bpe_tokens
|
152 |
|
153 |
def decode(self, tokens):
|
154 |
-
text = "".join(
|
|
|
|
|
155 |
text = bytearray([self.byte_decoder[c] for c in text]).decode(
|
156 |
"utf-8", errors=self.errors
|
157 |
)
|
|
|
151 |
return bpe_tokens
|
152 |
|
153 |
def decode(self, tokens):
|
154 |
+
text = "".join(
|
155 |
+
[self.decoder[min(token, 50256)] for token in tokens]
|
156 |
+
)
|
157 |
text = bytearray([self.byte_decoder[c] for c in text]).decode(
|
158 |
"utf-8", errors=self.errors
|
159 |
)
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/6636bda4a1fd7a63653dffb22683b8162c8de956
DELETED
The diff for this file is too large to render.
See raw diff
|
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/69e076d9141589e88803bb7fffc48167082fc2e7
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{" ": 50285, " ": 50271, " ": 50260, " ": 50261, " ": 50272, "\t\t\t\t": 50292, "\t\t\t\t\t\t\t": 50289, " ": 50273, " ": 50284, " ": 50283, " ": 50263, " ": 50258, " ": 50269, " ": 50257, " ": 50265, " ": 50275, " ": 50267, " ": 50270, " ": 50278, " ": 50286, " ": 50276, " ": 50259, "\t\t\t\t\t\t": 50290, " ": 50268, " ": 50279, "\t\t\t\t\t\t\t\t\t": 50287, "\t\t\t": 50293, " ": 50264, " ": 50266, " ": 50277, "\t\t\t\t\t": 50291, "\t\t": 50294, " ": 50281, " ": 50274, "\t\t\t\t\t\t\t\t": 50288, " ": 50282, " ": 50262, " ": 50280}
|
|
|
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/817762d631ad6f9c799f6b9dc713c46420e65546
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}
|
|
|
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/84ef7fb594b5c0979e48bdeddb60a0adef33df0b
DELETED
The diff for this file is too large to render.
See raw diff
|
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/b4f914dd9ab5282b31b0aafc5f6569151f910856
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "CodeGenTokenizer"}
|
|
|
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/e7c373e4b1189082ddcda33aaf2839bb1c04bb81
DELETED
The diff for this file is too large to render.
See raw diff
|
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/refs/main
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
40b7a3b6e99e73bdb497a14b740e7167b3413c74
|
|
|
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/added_tokens.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../blobs/69e076d9141589e88803bb7fffc48167082fc2e7
|
|
|
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/merges.txt
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../blobs/6636bda4a1fd7a63653dffb22683b8162c8de956
|
|
|
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/special_tokens_map.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../blobs/817762d631ad6f9c799f6b9dc713c46420e65546
|
|
|
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/tokenizer.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../blobs/e7c373e4b1189082ddcda33aaf2839bb1c04bb81
|
|
|
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/tokenizer_config.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../blobs/b4f914dd9ab5282b31b0aafc5f6569151f910856
|
|
|
|
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/vocab.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
../../blobs/84ef7fb594b5c0979e48bdeddb60a0adef33df0b
|
|
|
|
model_repository/preprocessing/1/__pycache__/model.cpython-38.pyc
DELETED
Binary file (6.52 kB)
|
|
model_repository/preprocessing/1/__pycache__/word_list.cpython-38.pyc
DELETED
Binary file (1.32 kB)
|
|