import re class TextAnalyzer: def __init__(self,sentences): self.sentences=sentences self.clean_sentences() def get_tokens(self): words = [word for sentence in self.sentences for word in sentence.split()] return words def get_sentences(self): return self.sentences def clean_sentences(self): cleaned_sentences = [] for sentence in self.sentences: # Remove specific punctuation marks sentence = re.sub(r'[፣,),(]', '', sentence) # Remove extra spaces sentence = re.sub(r'\s+', ' ', sentence).strip() cleaned_sentences.append(sentence) self.sentences=cleaned_sentences cleaned_sentences=None