Spaces:
Sleeping
Sleeping
File size: 394 Bytes
4bb9d41 |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
import pandas as pd
from typing import List, Dict
import os
def load_language_data(data_dir: str, language: str) -> List[str]:
filepath = os.path.join(data_dir, f"{language.lower()}/texts.txt")
with open(filepath, 'r', encoding='utf-8') as f:
return f.readlines()
def preprocess_text(text: str) -> str:
text = text.strip()
text = ' '.join(text.split())
return text |