File size: 659 Bytes
e8aad19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from transformers import AutoTokenizer, AutoModelForMaskedLM
import os

# Disabling parallelism to avoid deadlocks in the hf tokenizer
os.environ["TOKENIZERS_PARALLELISM"] = "false"

class LanguageModel:
    def __init__(
        self, 
        model_name
    ) -> None:
    
        print("Downloading language model...")
        self.__tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.__model = AutoModelForMaskedLM.from_pretrained(model_name)
    
    def initTokenizer(
        self
    ) -> AutoTokenizer:

        return self.__tokenizer
    
    def initModel(
        self
    ) -> AutoModelForMaskedLM:

        return self.__model