Nigerian_languages / app /utils /text_processing.py
Gabriel Okiri
Initial commit
4bb9d41
raw
history blame contribute delete
267 Bytes
import re
from typing import List
def clean_text(text: str) -> str:
text = re.sub(r'\s+', ' ', text)
text = text.strip()
return text
def split_into_sentences(text: str) -> List[str]:
return [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()]