Nigerian_languages / app /utils /data_processing.py
Gabriel Okiri
Initial commit
4bb9d41
raw
history blame contribute delete
394 Bytes
import pandas as pd
from typing import List, Dict
import os
def load_language_data(data_dir: str, language: str) -> List[str]:
filepath = os.path.join(data_dir, f"{language.lower()}/texts.txt")
with open(filepath, 'r', encoding='utf-8') as f:
return f.readlines()
def preprocess_text(text: str) -> str:
text = text.strip()
text = ' '.join(text.split())
return text