import pandas as pd from typing import List, Dict import os def load_language_data(data_dir: str, language: str) -> List[str]: filepath = os.path.join(data_dir, f"{language.lower()}/texts.txt") with open(filepath, 'r', encoding='utf-8') as f: return f.readlines() def preprocess_text(text: str) -> str: text = text.strip() text = ' '.join(text.split()) return text