from transformers import AutoTokenizer, AutoModelForCausalLM import transformers import torch class Model(): def __init__(self, model="lmsys/vicuna-7b-v1.5") -> None: self.tokenizer = AutoTokenizer.from_pretrained(model) self.pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=self.tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", ) def gen(self, content, temp=0.1, max_length=500): sequences = self.pipeline( content, max_new_tokens=max_length, do_sample=True, temperature=temp, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id, return_full_text=False ) return sequences[-1]['generated_text'] #'\n'.join([seq['generated_text'] for seq in sequences])