Update modeling_chatglm.py
Browse files- modeling_chatglm.py +1 -1
modeling_chatglm.py
CHANGED
@@ -1071,7 +1071,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
|
|
1071 |
def text_split_by_punctuation(original_text, return_dict=False):
|
1072 |
# text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text) # separate period without space
|
1073 |
text = original_text
|
1074 |
-
custom_sent_tokenizer = PunktSentenceTokenizer(
|
1075 |
punctuations = r"([。;!?])" # For Chinese support
|
1076 |
|
1077 |
separated = custom_sent_tokenizer.tokenize(text)
|
|
|
1071 |
def text_split_by_punctuation(original_text, return_dict=False):
|
1072 |
# text = re.sub(r'([a-z])\.([A-Z])', r'\1. \2', original_text) # separate period without space
|
1073 |
text = original_text
|
1074 |
+
custom_sent_tokenizer = PunktSentenceTokenizer()
|
1075 |
punctuations = r"([。;!?])" # For Chinese support
|
1076 |
|
1077 |
separated = custom_sent_tokenizer.tokenize(text)
|