from transformers import AutoModelForCausalLM, AutoTokenizer import torch device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) @torch.no_grad() def create_model(): tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-zephyr-3b') model = AutoModelForCausalLM.from_pretrained( 'stabilityai/stablelm-zephyr-3b', trust_remote_code=True, device_map="auto" ) return model,tokenizer