Spaces:
Running
Running
# -*- coding: utf-8 -*- | |
import re | |
from pysbd.abbreviation_replacer import AbbreviationReplacer | |
from pysbd.lang.common import Common, Standard | |
from pysbd.utils import Rule | |
class Persian(Common, Standard): | |
iso_code = 'fa' | |
Punctuations = ['?', '!', ':', '.', '؟'] | |
SENTENCE_BOUNDARY_REGEX = r'.*?[:\.!\?؟]|.*?\Z|.*?$' | |
# Rubular: http://rubular.com/r/RX5HpdDIyv | |
ReplaceColonBetweenNumbersRule = Rule(r'(?<=\d):(?=\d)', '♭') | |
# Rubular: http://rubular.com/r/kPRgApNHUg | |
ReplaceNonSentenceBoundaryCommaRule = Rule(r'،(?=\s\S+،)', '♬') | |
class AbbreviationReplacer(AbbreviationReplacer): | |
SENTENCE_STARTERS = [] | |
def __init__(self, text, lang): | |
super().__init__(text, lang) | |
def scan_for_replacements(self, txt, am, index, character_array): | |
txt = re.sub('(?<={0})\.'.format(am), '∯', txt) | |
return txt | |