Spaces:
Sleeping
Sleeping
File size: 745 Bytes
a608bb4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
import re
class TextAnalyzer:
def __init__(self,sentences):
self.sentences=sentences
self.clean_sentences()
def get_tokens(self):
words = [word for sentence in self.sentences for word in sentence.split()]
return words
def get_sentences(self):
return self.sentences
def clean_sentences(self):
cleaned_sentences = []
for sentence in self.sentences:
# Remove specific punctuation marks
sentence = re.sub(r'[፣,),(]', '', sentence)
# Remove extra spaces
sentence = re.sub(r'\s+', ' ', sentence).strip()
cleaned_sentences.append(sentence)
self.sentences=cleaned_sentences
cleaned_sentences=None
|