Curt-Park commited on
Commit
47ff573
·
1 Parent(s): 966f777

Add exception for token overflow

Browse files
Files changed (21) hide show
  1. .gitignore +3 -0
  2. model_repository/postprocessing/1/__pycache__/model.cpython-38.pyc +0 -0
  3. model_repository/postprocessing/1/gpt2-vocab.json +0 -0
  4. model_repository/postprocessing/1/utils/__pycache__/__init__.cpython-38.pyc +0 -0
  5. model_repository/postprocessing/1/utils/__pycache__/gpt_token_encoder.cpython-38.pyc +0 -0
  6. model_repository/postprocessing/1/utils/gpt_token_encoder.py +3 -1
  7. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/6636bda4a1fd7a63653dffb22683b8162c8de956 +0 -0
  8. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/69e076d9141589e88803bb7fffc48167082fc2e7 +0 -1
  9. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/817762d631ad6f9c799f6b9dc713c46420e65546 +0 -1
  10. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/84ef7fb594b5c0979e48bdeddb60a0adef33df0b +0 -0
  11. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/b4f914dd9ab5282b31b0aafc5f6569151f910856 +0 -1
  12. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/e7c373e4b1189082ddcda33aaf2839bb1c04bb81 +0 -0
  13. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/refs/main +0 -1
  14. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/added_tokens.json +0 -1
  15. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/merges.txt +0 -1
  16. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/special_tokens_map.json +0 -1
  17. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/tokenizer.json +0 -1
  18. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/tokenizer_config.json +0 -1
  19. model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/vocab.json +0 -1
  20. model_repository/preprocessing/1/__pycache__/model.cpython-38.pyc +0 -0
  21. model_repository/preprocessing/1/__pycache__/word_list.cpython-38.pyc +0 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__
2
+ *.pyc
3
+ .cache
model_repository/postprocessing/1/__pycache__/model.cpython-38.pyc DELETED
Binary file (4.05 kB)
 
model_repository/postprocessing/1/gpt2-vocab.json CHANGED
The diff for this file is too large to render. See raw diff
 
model_repository/postprocessing/1/utils/__pycache__/__init__.cpython-38.pyc DELETED
Binary file (133 Bytes)
 
model_repository/postprocessing/1/utils/__pycache__/gpt_token_encoder.cpython-38.pyc DELETED
Binary file (4.99 kB)
 
model_repository/postprocessing/1/utils/gpt_token_encoder.py CHANGED
@@ -151,7 +151,9 @@ class Encoder:
151
  return bpe_tokens
152
 
153
  def decode(self, tokens):
154
- text = "".join([self.decoder[token] for token in tokens])
 
 
155
  text = bytearray([self.byte_decoder[c] for c in text]).decode(
156
  "utf-8", errors=self.errors
157
  )
 
151
  return bpe_tokens
152
 
153
  def decode(self, tokens):
154
+ text = "".join(
155
+ [self.decoder[min(token, 50256)] for token in tokens]
156
+ )
157
  text = bytearray([self.byte_decoder[c] for c in text]).decode(
158
  "utf-8", errors=self.errors
159
  )
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/6636bda4a1fd7a63653dffb22683b8162c8de956 DELETED
The diff for this file is too large to render. See raw diff
 
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/69e076d9141589e88803bb7fffc48167082fc2e7 DELETED
@@ -1 +0,0 @@
1
- {" ": 50285, " ": 50271, " ": 50260, " ": 50261, " ": 50272, "\t\t\t\t": 50292, "\t\t\t\t\t\t\t": 50289, " ": 50273, " ": 50284, " ": 50283, " ": 50263, " ": 50258, " ": 50269, " ": 50257, " ": 50265, " ": 50275, " ": 50267, " ": 50270, " ": 50278, " ": 50286, " ": 50276, " ": 50259, "\t\t\t\t\t\t": 50290, " ": 50268, " ": 50279, "\t\t\t\t\t\t\t\t\t": 50287, "\t\t\t": 50293, " ": 50264, " ": 50266, " ": 50277, "\t\t\t\t\t": 50291, "\t\t": 50294, " ": 50281, " ": 50274, "\t\t\t\t\t\t\t\t": 50288, " ": 50282, " ": 50262, " ": 50280}
 
 
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/817762d631ad6f9c799f6b9dc713c46420e65546 DELETED
@@ -1 +0,0 @@
1
- {"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}
 
 
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/84ef7fb594b5c0979e48bdeddb60a0adef33df0b DELETED
The diff for this file is too large to render. See raw diff
 
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/b4f914dd9ab5282b31b0aafc5f6569151f910856 DELETED
@@ -1 +0,0 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "CodeGenTokenizer"}
 
 
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/blobs/e7c373e4b1189082ddcda33aaf2839bb1c04bb81 DELETED
The diff for this file is too large to render. See raw diff
 
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/refs/main DELETED
@@ -1 +0,0 @@
1
- 40b7a3b6e99e73bdb497a14b740e7167b3413c74
 
 
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/added_tokens.json DELETED
@@ -1 +0,0 @@
1
- ../../blobs/69e076d9141589e88803bb7fffc48167082fc2e7
 
 
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/merges.txt DELETED
@@ -1 +0,0 @@
1
- ../../blobs/6636bda4a1fd7a63653dffb22683b8162c8de956
 
 
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/special_tokens_map.json DELETED
@@ -1 +0,0 @@
1
- ../../blobs/817762d631ad6f9c799f6b9dc713c46420e65546
 
 
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/tokenizer.json DELETED
@@ -1 +0,0 @@
1
- ../../blobs/e7c373e4b1189082ddcda33aaf2839bb1c04bb81
 
 
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/tokenizer_config.json DELETED
@@ -1 +0,0 @@
1
- ../../blobs/b4f914dd9ab5282b31b0aafc5f6569151f910856
 
 
model_repository/preprocessing/1/.cache/models--Salesforce--codegen-350M-mono/snapshots/40b7a3b6e99e73bdb497a14b740e7167b3413c74/vocab.json DELETED
@@ -1 +0,0 @@
1
- ../../blobs/84ef7fb594b5c0979e48bdeddb60a0adef33df0b
 
 
model_repository/preprocessing/1/__pycache__/model.cpython-38.pyc DELETED
Binary file (6.52 kB)
 
model_repository/preprocessing/1/__pycache__/word_list.cpython-38.pyc DELETED
Binary file (1.32 kB)