import spacy from spacy.tokenizer import Tokenizer @spacy.registry.tokenizers("custom_tokenizer") def create_custom_tokenizer(): def create_tokenizer(nlp): infixes = nlp.Defaults.infixes + [ r"/", r"-", r",", r":", ] prefixes = nlp.Defaults.prefixes + [ r"-", ] prefix_regex = spacy.util.compile_prefix_regex(prefixes) infix_regex = spacy.util.compile_infix_regex(infixes) return Tokenizer( nlp.vocab, prefix_search=prefix_regex.search, infix_finditer=infix_regex.finditer, ) return create_tokenizer