Spaces:
Sleeping
Sleeping
import pandas as pd | |
from typing import List, Dict | |
import os | |
def load_language_data(data_dir: str, language: str) -> List[str]: | |
filepath = os.path.join(data_dir, f"{language.lower()}/texts.txt") | |
with open(filepath, 'r', encoding='utf-8') as f: | |
return f.readlines() | |
def preprocess_text(text: str) -> str: | |
text = text.strip() | |
text = ' '.join(text.split()) | |
return text |