import os from transformers import AutoTokenizer, AutoModelForCausalLM import torch class Model: def __init__(self, model_url) -> None: self.model_url = model_url self.tokenizer = None self.model = None self.device = "cpu" self.dir_name = None def download_model(self) -> bool: self.dir_name = "model" if not os.path.exists(self.dir_name) or not os.listdir(self.dir_name): os.makedirs(self.dir_name) tokenizer = AutoTokenizer.from_pretrained(self.model_url) model = AutoModelForCausalLM.from_pretrained(self.model_url) model.save_pretrained(self.dir_name) tokenizer.save_pretrained(self.dir_name) print(f"Model saved on '{self.dir_name}' directory.") return True else: print("Model is already downloaded and ready to use.") return False def load_local_model(self): tokenizer = AutoTokenizer.from_pretrained(self.dir_name) model = AutoModelForCausalLM.from_pretrained(self.dir_name) if self.device == "cuda" and torch.cuda.is_available(): model.to("cuda") self.model = model self.tokenizer = tokenizer def inference(self, prompt_list) -> list: if self.model != None and self.tokenizer != None: self.model.eval() model_inferences = [] for prompt in prompt_list: inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) with torch.no_grad(): outputs = self.model.generate(input_ids = inputs["input_ids"], max_new_tokens=512) response = self.tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0] model_inferences.append(response) return model_inferences else: print("Model was not able to make inference, make sure you've loaded the model.") def set_cuda(self) -> str: self.device = "cuda" def set_cpu(self) -> str: self.device = "cpu"