Spaces:
Sleeping
Sleeping
import os | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
class Model: | |
def __init__(self, model_url) -> None: | |
self.model_url = model_url | |
self.tokenizer = None | |
self.model = None | |
self.device = "cpu" | |
self.dir_name = None | |
def download_model(self) -> bool: | |
self.dir_name = "model" | |
if not os.path.exists(self.dir_name) or not os.listdir(self.dir_name): | |
os.makedirs(self.dir_name) | |
tokenizer = AutoTokenizer.from_pretrained(self.model_url) | |
model = AutoModelForCausalLM.from_pretrained(self.model_url) | |
model.save_pretrained(self.dir_name) | |
tokenizer.save_pretrained(self.dir_name) | |
print(f"Model saved on '{self.dir_name}' directory.") | |
return True | |
else: | |
print("Model is already downloaded and ready to use.") | |
return False | |
def load_local_model(self): | |
tokenizer = AutoTokenizer.from_pretrained(self.dir_name) | |
model = AutoModelForCausalLM.from_pretrained(self.dir_name) | |
if self.device == "cuda" and torch.cuda.is_available(): | |
model.to("cuda") | |
self.model = model | |
self.tokenizer = tokenizer | |
def inference(self, prompt_list) -> list: | |
if self.model != None and self.tokenizer != None: | |
self.model.eval() | |
model_inferences = [] | |
for prompt in prompt_list: | |
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) | |
with torch.no_grad(): | |
outputs = self.model.generate(input_ids = inputs["input_ids"], max_new_tokens=512) | |
response = self.tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0] | |
model_inferences.append(response) | |
return model_inferences | |
else: | |
print("Model was not able to make inference, make sure you've loaded the model.") | |
def set_cuda(self) -> str: | |
self.device = "cuda" | |
def set_cpu(self) -> str: | |
self.device = "cpu" |