tgritsaev's picture
Upload 198 files
affcd23 verified
raw
history blame contribute delete
578 Bytes
import re
from typing import List, Union
import numpy as np
from torch import Tensor
class BaseTextEncoder:
def encode(self, text) -> Tensor:
raise NotImplementedError()
def decode(self, vector: Union[Tensor, np.ndarray, List[int]]):
raise NotImplementedError()
def __len__(self):
raise NotImplementedError()
def __getitem__(self, item: int) -> str:
raise NotImplementedError()
@staticmethod
def normalize_text(text: str):
text = text.lower()
text = re.sub(r"[^a-z ]", "", text)
return text