Fix type issue

Files changed (5) hide show

model_repository/codegen-350M-mono-gptj/1/config.ini CHANGED Viewed

@@ -8,4 +8,4 @@ rotary_embedding = 32
 vocab_size = 51200
 start_id = 1
 end_id = 2
-weight_data_type = fp16

 vocab_size = 51200
 start_id = 1
 end_id = 2
+weight_data_type = fp32

model_repository/postprocessing/1/utils/gpt_token_encoder.py CHANGED Viewed

@@ -152,7 +152,7 @@ class Encoder:
     def decode(self, tokens):
         text = "".join(
-            [self.decoder[min(token, 50256)] for token in tokens]
         )
         text = bytearray([self.byte_decoder[c] for c in text]).decode(
             "utf-8", errors=self.errors

     def decode(self, tokens):
         text = "".join(
+            [self.decoder[token] for token in tokens]
         )
         text = bytearray([self.byte_decoder[c] for c in text]).decode(
             "utf-8", errors=self.errors

model_repository/preprocessing/1/gpt2-merges.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

model_repository/preprocessing/1/gpt2-vocab.json DELETED Viewed

The diff for this file is too large to render. See raw diff

model_repository/preprocessing/1/word_list.py CHANGED Viewed

@@ -13,8 +13,6 @@
 # limitations under the License.
 import csv
-import os
-import sys
 from pathlib import Path
 import numpy as np
@@ -24,7 +22,7 @@ from transformers import AutoTokenizer
 def to_word_list_format(word_dict):
     cache_dir = Path(__file__).parent / ".cache"
     tokenizer = AutoTokenizer.from_pretrained(
-        "EleutherAI/gpt-j-6B", cache_dir=cache_dir
     )
     flat_ids = []

 # limitations under the License.
 import csv
 from pathlib import Path
 import numpy as np
 def to_word_list_format(word_dict):
     cache_dir = Path(__file__).parent / ".cache"
     tokenizer = AutoTokenizer.from_pretrained(
+        "Salesforce/codegen-350M-mono", cache_dir=cache_dir
     )
     flat_ids = []