from gensim.models import KeyedVectors from typing import List, Dict class PreTrainedPipeline: def __init__(self, path=""): from huggingface_hub import hf_hub_download self.model = KeyedVectors.load_word2vec_format( hf_hub_download(repo_id="lang-uk/word2vec-uk", filename="ubercorpus.cased.tokenized.300d"), binary=False ) def __call__(self, inputs: str) -> List[Dict]: """ Args: inputs (:obj:`str`): a string containing some text Return: A :obj:`str` """ inputs = inputs.strip() return [{"generated_text": "\n".join(f"{k}\t{v}" for k, v in self.model.most_similar(inputs, topn=30))}]