from transformers import AutoTokenizer import transformers import torch from huggingface_hub import login import os import logging login(token = os.getenv('HF_TOKEN')) class Model(torch.nn.Module): number_of_models = 0 __model_list__ = [ "lmsys/vicuna-7b-v1.5", "google-t5/t5-large", "mistralai/Mistral-7B-Instruct-v0.1", "meta-llama/Meta-Llama-3.1-8B-Instruct" ] def __init__(self, model_name="lmsys/vicuna-7b-v1.5") -> None: super(Model, self).__init__() self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.name = model_name logging.info(f'start loading model {self.name}') self.pipeline = transformers.pipeline( "summarization" if model_name=="google-t5/t5-large" else "text-generation", model=model_name, tokenizer=self.tokenizer, torch_dtype=torch.bfloat16, device_map="auto", ) logging.info(f'Loaded model {self.name}') self.update() @classmethod def update(cls): cls.number_of_models += 1 def return_mode_name(self): return self.name def return_tokenizer(self): return self.tokenizer def return_model(self): return self.pipeline def gen(self, content, temp=0.1, max_length=500): if self.name == "google-t5/t5-large": sequences = self.pipeline( content, max_new_tokens=max_length, do_sample=True, temperature=temp, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id, ) return sequences[-1]['summary_text'] else: sequences = self.pipeline( content, max_new_tokens=max_length, do_sample=True, temperature=temp, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id, return_full_text=False ) return sequences[-1]['generated_text']