File size: 628 Bytes
d093ea4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# -*- coding: utf-8 -*-
import re
from pysbd.punctuation_replacer import replace_punctuation


class ExclamationWords(object):
    """
    Searches for exclamation points that are part of words
    and not ending punctuation and replaces them.
    """
    EXCLAMATION_WORDS = "!Xũ !Kung ǃʼOǃKung !Xuun !Kung-Ekoka ǃHu ǃKhung ǃKu ǃung ǃXo ǃXû ǃXung ǃXũ !Xun Yahoo! Y!J Yum!".split()
    EXCLAMATION_REGEX = r"|".join(re.escape(w) for w in EXCLAMATION_WORDS)

    @classmethod
    def apply_rules(cls, text):
        return re.sub(ExclamationWords.EXCLAMATION_REGEX, replace_punctuation,
                      text)