Spaces:
Running
Running
import re | |
contractions = { | |
r"(?<![\w.])no(s)?(?![$\w])": r"em o\g<1>", | |
r"(?<![\w.])na(s)?(?![$\w])": r"em a\g<1>", | |
r"(?<![\w.])da(s)?(?![$\w])": r"de a\g<1>", | |
r"(?<![\w.])do(s)?(?![$\w])": r"de o\g<1>", | |
r"(?<![\w.])ao(s)?(?![$\w])": r"a o\g<1>", | |
r"(?<![\w.])à(s)?(?![$\w])": r"a a\g<1>", | |
r"(?<![\w.])pela(s)?(?![$\w])": r"por a\g<1>", | |
r"(?<![\w.])pelo(s)?(?![$\w])": r"por o\g<1>", | |
r"(?<![\w.])nesta(s)?(?![$\w])": r"em esta\g<1>", | |
r"(?<![\w.])neste(s)?(?![$\w])": r"em este\g<1>", | |
r"(?<![\w.])nessa(s)?(?![$\w])": r"em essa\g<1>", | |
r"(?<![\w.])nesse(s)?(?![$\w])": r"em esse\g<1>", | |
r"(?<![\w.])num(?![$\w])": r"em um", | |
r"(?<![\w.])nuns(?![$\w])": r"em uns", | |
r"(?<![\w.])numa(s)?(?![$\w])": r"em uma\g<1>", | |
r"(?<![\w.])nisso(?![$\w])": r"em isso", | |
r"(?<![\w.])naquele(s)?(?![$\w])": r"em aquele\g<1>", | |
r"(?<![\w.])naquela(s)?(?![$\w])": r"em aquela\g<1>", | |
r"(?<![\w.])naquilo(?![$\w])": r"em aquilo", | |
r"(?<![\w.])duma(s)?(?![$\w])": r"de uma\g<1>", | |
r"(?<![\w.])daqui(?![$\w])": r"de aqui", | |
r"(?<![\w.])dali(?![$\w])": r"de ali", | |
r"(?<![\w.])daquele(s)?(?![$\w])": r"de aquele\g<1>", | |
r"(?<![\w.])daquela(s)?(?![$\w])": r"de aquela\g<1>", | |
r"(?<![\w.])deste(s)?(?![$\w])": r"de este\g<1>", | |
r"(?<![\w.])desta(s)?(?![$\w])": r"de esta\g<1>", | |
r"(?<![\w.])desse(s)?(?![$\w])": r"de esse\g<1>", | |
r"(?<![\w.])dessa(s)?(?![$\w])": r"de essa\g<1>", | |
r"(?<![\w.])daí(?![$\w])": r"de aí", | |
r"(?<![\w.])dum(?![$\w])": r"de um", | |
r"(?<![\w.])donde(?![$\w])": r"de onde", | |
r"(?<![\w.])disto(?![$\w])": r"de isto", | |
r"(?<![\w.])disso(?![$\w])": r"de isso", | |
r"(?<![\w.])daquilo(?![$\w])": r"de aquilo", | |
r"(?<![\w.])dela(s)?(?![$\w])": r"de ela\g<1>", | |
r"(?<![\w.])dele(s)?(?![$\w])": r"de ele\g<1>", | |
r"(?<![\w.])nisto(?![$\w])": r"em isto", | |
r"(?<![\w.])nele(s)?(?![$\w])": r"em ele\g<1>", | |
r"(?<![\w.])nela(s)?(?![$\w])": r"em ela\g<1>", | |
r"(?<![\w.])d'?ele(s)?(?![$\w])": r"de ele\g<1>", | |
r"(?<![\w.])d'?ela(s)?(?![$\w])": r"de ela\g<1>", | |
r"(?<![\w.])noutro(s)?(?![$\w])": r"em outro\g<1>", | |
r"(?<![\w.])aonde(?![$\w])": r"a onde", | |
r"(?<![\w.])àquela(s)?(?![$\w])": r"a aquela\g<1>", | |
r"(?<![\w.])àquele(s)?(?![$\w])": r"a aquele\g<1>", | |
r"(?<![\w.])àquilo(?![$\w])": r"a aquelo", | |
r"(?<![\w.])contigo(?![$\w])": r"com ti", | |
r"(?<![\w.])né(?![$\w])": r"não é", | |
r"(?<![\w.])comigo(?![$\w])": r"com mim", | |
r"(?<![\w.])contigo(?![$\w])": r"com ti", | |
r"(?<![\w.])conosco(?![$\w])": r"com nós", | |
r"(?<![\w.])consigo(?![$\w])": r"com si", | |
r"(?<![\w.])pra(?![$\w])": r"para a", | |
r"(?<![\w.])pro(?![$\w])": r"para o", | |
} | |
def replace_keep_case(word, replacement, text): | |
""" | |
Custom function for replace keeping the original case. | |
Parameters | |
---------- | |
word: str | |
Text to be replaced. | |
replacement: str | |
String to replace word. | |
text: | |
Text to be processed. | |
Returns | |
------- | |
str: | |
Processed string | |
""" | |
def func(match): | |
g = match.group() | |
repl = match.expand(replacement) | |
if g.islower(): | |
return repl.lower() | |
if g.istitle(): | |
return repl.capitalize() | |
if g.isupper(): | |
return repl.upper() | |
return repl | |
return re.sub(word, func, text, flags=re.I) | |
def expand_contractions(text: str) -> str: | |
""" | |
Replace contractions to their based form. | |
Parameters | |
---------- | |
text: str | |
Text that may contain contractions. | |
Returns | |
------- | |
str: | |
Text with expanded contractions. | |
""" | |
for contraction in contractions.keys(): | |
replace_str = contractions[contraction] | |
text = replace_keep_case(contraction, replace_str, text) | |
return text | |