Spaces:
Running
Running
import re | |
class TextAnalyzer: | |
def __init__(self,sentences): | |
self.sentences=sentences | |
self.clean_sentences() | |
def get_tokens(self): | |
words = [word for sentence in self.sentences for word in sentence.split()] | |
return words | |
def get_sentences(self): | |
return self.sentences | |
def clean_sentences(self): | |
cleaned_sentences = [] | |
for sentence in self.sentences: | |
# Remove specific punctuation marks | |
sentence = re.sub(r'[፣,),(]', '', sentence) | |
# Remove extra spaces | |
sentence = re.sub(r'\s+', ' ', sentence).strip() | |
cleaned_sentences.append(sentence) | |
self.sentences=cleaned_sentences | |
cleaned_sentences=None | |