Qwen
/

Text Generation
Transformers
Safetensors
Chinese
English
qwen
custom_code
yangapku commited on
Commit
e7abfa5
1 Parent(s): 39e911d

update tokenization.py

Browse files
Files changed (1) hide show
  1. tokenization_qwen.py +2 -1
tokenization_qwen.py CHANGED
@@ -42,6 +42,7 @@ SPECIAL_TOKENS = tuple(
42
  start=SPECIAL_START_ID,
43
  )
44
  )
 
45
 
46
 
47
  def _load_tiktoken_bpe(tiktoken_bpe_file: str) -> Dict[bytes, int]:
@@ -160,7 +161,7 @@ class QWenTokenizer(PreTrainedTokenizer):
160
  raise ValueError("Adding regular tokens is not supported")
161
  for token in new_tokens:
162
  surface_form = token.content if isinstance(token, AddedToken) else token
163
- if surface_form not in SPECIAL_TOKENS:
164
  raise ValueError("Adding unknown special tokens is not supported")
165
  return 0
166
 
 
42
  start=SPECIAL_START_ID,
43
  )
44
  )
45
+ SPECIAL_TOKENS_SET = set(t for i, t in SPECIAL_TOKENS)
46
 
47
 
48
  def _load_tiktoken_bpe(tiktoken_bpe_file: str) -> Dict[bytes, int]:
 
161
  raise ValueError("Adding regular tokens is not supported")
162
  for token in new_tokens:
163
  surface_form = token.content if isinstance(token, AddedToken) else token
164
+ if surface_form not in SPECIAL_TOKENS_SET:
165
  raise ValueError("Adding unknown special tokens is not supported")
166
  return 0
167