Curt-Park commited on
Commit
d58794f
·
1 Parent(s): 47ff573

Fix type issue

Browse files
model_repository/codegen-350M-mono-gptj/1/config.ini CHANGED
@@ -8,4 +8,4 @@ rotary_embedding = 32
8
  vocab_size = 51200
9
  start_id = 1
10
  end_id = 2
11
- weight_data_type = fp16
 
8
  vocab_size = 51200
9
  start_id = 1
10
  end_id = 2
11
+ weight_data_type = fp32
model_repository/postprocessing/1/utils/gpt_token_encoder.py CHANGED
@@ -152,7 +152,7 @@ class Encoder:
152
 
153
  def decode(self, tokens):
154
  text = "".join(
155
- [self.decoder[min(token, 50256)] for token in tokens]
156
  )
157
  text = bytearray([self.byte_decoder[c] for c in text]).decode(
158
  "utf-8", errors=self.errors
 
152
 
153
  def decode(self, tokens):
154
  text = "".join(
155
+ [self.decoder[token] for token in tokens]
156
  )
157
  text = bytearray([self.byte_decoder[c] for c in text]).decode(
158
  "utf-8", errors=self.errors
model_repository/preprocessing/1/gpt2-merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
model_repository/preprocessing/1/gpt2-vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
model_repository/preprocessing/1/word_list.py CHANGED
@@ -13,8 +13,6 @@
13
  # limitations under the License.
14
 
15
  import csv
16
- import os
17
- import sys
18
  from pathlib import Path
19
 
20
  import numpy as np
@@ -24,7 +22,7 @@ from transformers import AutoTokenizer
24
  def to_word_list_format(word_dict):
25
  cache_dir = Path(__file__).parent / ".cache"
26
  tokenizer = AutoTokenizer.from_pretrained(
27
- "EleutherAI/gpt-j-6B", cache_dir=cache_dir
28
  )
29
 
30
  flat_ids = []
 
13
  # limitations under the License.
14
 
15
  import csv
 
 
16
  from pathlib import Path
17
 
18
  import numpy as np
 
22
  def to_word_list_format(word_dict):
23
  cache_dir = Path(__file__).parent / ".cache"
24
  tokenizer = AutoTokenizer.from_pretrained(
25
+ "Salesforce/codegen-350M-mono", cache_dir=cache_dir
26
  )
27
 
28
  flat_ids = []