Curt-Park
commited on
Commit
·
d58794f
1
Parent(s):
47ff573
Fix type issue
Browse files
model_repository/codegen-350M-mono-gptj/1/config.ini
CHANGED
@@ -8,4 +8,4 @@ rotary_embedding = 32
|
|
8 |
vocab_size = 51200
|
9 |
start_id = 1
|
10 |
end_id = 2
|
11 |
-
weight_data_type =
|
|
|
8 |
vocab_size = 51200
|
9 |
start_id = 1
|
10 |
end_id = 2
|
11 |
+
weight_data_type = fp32
|
model_repository/postprocessing/1/utils/gpt_token_encoder.py
CHANGED
@@ -152,7 +152,7 @@ class Encoder:
|
|
152 |
|
153 |
def decode(self, tokens):
|
154 |
text = "".join(
|
155 |
-
[self.decoder[
|
156 |
)
|
157 |
text = bytearray([self.byte_decoder[c] for c in text]).decode(
|
158 |
"utf-8", errors=self.errors
|
|
|
152 |
|
153 |
def decode(self, tokens):
|
154 |
text = "".join(
|
155 |
+
[self.decoder[token] for token in tokens]
|
156 |
)
|
157 |
text = bytearray([self.byte_decoder[c] for c in text]).decode(
|
158 |
"utf-8", errors=self.errors
|
model_repository/preprocessing/1/gpt2-merges.txt
DELETED
The diff for this file is too large to render.
See raw diff
|
|
model_repository/preprocessing/1/gpt2-vocab.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
model_repository/preprocessing/1/word_list.py
CHANGED
@@ -13,8 +13,6 @@
|
|
13 |
# limitations under the License.
|
14 |
|
15 |
import csv
|
16 |
-
import os
|
17 |
-
import sys
|
18 |
from pathlib import Path
|
19 |
|
20 |
import numpy as np
|
@@ -24,7 +22,7 @@ from transformers import AutoTokenizer
|
|
24 |
def to_word_list_format(word_dict):
|
25 |
cache_dir = Path(__file__).parent / ".cache"
|
26 |
tokenizer = AutoTokenizer.from_pretrained(
|
27 |
-
"
|
28 |
)
|
29 |
|
30 |
flat_ids = []
|
|
|
13 |
# limitations under the License.
|
14 |
|
15 |
import csv
|
|
|
|
|
16 |
from pathlib import Path
|
17 |
|
18 |
import numpy as np
|
|
|
22 |
def to_word_list_format(word_dict):
|
23 |
cache_dir = Path(__file__).parent / ".cache"
|
24 |
tokenizer = AutoTokenizer.from_pretrained(
|
25 |
+
"Salesforce/codegen-350M-mono", cache_dir=cache_dir
|
26 |
)
|
27 |
|
28 |
flat_ids = []
|