5w4n commited on
Commit
52f4fdb
1 Parent(s): 75515fa

Add simbolo tokenizers

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -39,6 +39,7 @@ tokenizer_names_to_test = [
39
  "google/flan-t5-base",
40
  "facebook/mbart-large-50",
41
  "EleutherAI/gpt-neox-20b",
 
42
  ]
43
 
44
  with st.sidebar:
@@ -70,8 +71,9 @@ with st.sidebar:
70
  links = [
71
  (
72
  f"[{tokenizer_name}](https://huggingface.co/{tokenizer_name})"
73
- if tokenizer_name != "openai/gpt4"
74
- else f"[{tokenizer_name}](https://github.com/openai/tiktoken)"
 
75
  )
76
  for tokenizer_name in selected_tokenizers
77
  ]
 
39
  "google/flan-t5-base",
40
  "facebook/mbart-large-50",
41
  "EleutherAI/gpt-neox-20b",
42
+ "simbolo-ai/multilingual-partial-syllable-tokenizer",
43
  ]
44
 
45
  with st.sidebar:
 
71
  links = [
72
  (
73
  f"[{tokenizer_name}](https://huggingface.co/{tokenizer_name})"
74
+ if tokenizer_name
75
+ not in ["openai/gpt4", "simbolo-ai/multilingual-partial-syllable-tokenizer"]
76
+ else f"[{tokenizer_name}](https://github.com/{tokenizer_name.split('/')[0]}/{tokenizer_name.split('/')[1] if tokenizer_name != 'openai/gpt4' else 'tiktoken'})"
77
  )
78
  for tokenizer_name in selected_tokenizers
79
  ]