Spaces:
Running
on
Zero
Running
on
Zero
_pad = "$" | |
_punctuation = '-´;:,.!?¡¿—…"«»“” ()†/=' | |
_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' | |
_letters_ipa = "éýíó'̯'͡ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲ'̩'ᵻ" | |
# Export all symbols: | |
symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa) | |
letters = list(_letters) + list(_letters_ipa) | |
dicts = {} | |
for i in range(len((symbols))): | |
dicts[symbols[i]] = i | |
class TextCleaner: | |
def __init__(self, dummy=None): | |
self.word_index_dictionary = dicts | |
print(len(dicts)) | |
def __call__(self, text): | |
indexes = [] | |
for char in text: | |
try: | |
indexes.append(self.word_index_dictionary[char]) | |
except KeyError: | |
print(text) | |
return indexes | |