|
import gc |
|
import numpy as np |
|
import os |
|
import subprocess |
|
import torch |
|
import shutil |
|
import json |
|
import time |
|
import transformers |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel, PreTrainedTokenizer, PreTrainedTokenizerFast, GenerationConfig, AutoTokenizer, LlamaForCausalLM |
|
from tkinter.filedialog import askdirectory, askopenfilename |
|
from colorama import init, Fore, Style |
|
|
|
from peft import PeftModel |
|
|
|
import torch.nn as nn |
|
|
|
model_path1 = '/home/kasm-user/Desktop/google-gemma-2-9b' |
|
lora_path = '/home/kasm-user/Desktop/google-gemma-2-9b/lora/testinglora' |
|
save_path = '/home/kasm-user/Desktop/google-gemma-2-9b/lora/testinglora/out' |
|
|
|
max_shard_size = "4000MB" |
|
|
|
print("Starting script, please wait...") |
|
|
|
class NoInit: |
|
def __enter__(self): |
|
def noop(*args, **kwargs): |
|
pass |
|
|
|
(k, u, n) = ( |
|
torch.nn.init.kaiming_uniform_, |
|
torch.nn.init.uniform_, |
|
torch.nn.init.normal_, |
|
) |
|
torch.nn.init.kaiming_uniform_ = noop |
|
torch.nn.init.uniform_ = noop |
|
torch.nn.init.normal_ = noop |
|
|
|
transformers.modeling_utils._init_weights = False |
|
self.funcs = (k, u, n) |
|
|
|
def __exit__(self, *args): |
|
(k, u, n) = self.funcs |
|
( |
|
torch.nn.init.kaiming_uniform_, |
|
torch.nn.init.uniform_, |
|
torch.nn.init.normal_, |
|
) = ( |
|
k, |
|
u, |
|
n, |
|
) |
|
transformers.modeling_utils._init_weights = True |
|
|
|
with torch.no_grad(): |
|
|
|
with NoInit(): |
|
torch.set_default_dtype(torch.float32) |
|
|
|
device = torch.device("cpu") |
|
print(device) |
|
|
|
|
|
print("Loading Model 1 (" + model_path1 + ")...") |
|
model1 = AutoModelForCausalLM.from_pretrained(model_path1, torch_dtype=torch.float32) |
|
model1 = model1.to(device) |
|
model1.eval() |
|
print("Model 1 Loaded. Dtype: " + str(model1.dtype)) |
|
|
|
|
|
print("Loading LoRa (" + lora_path + ")...") |
|
model1 = PeftModel.from_pretrained(model1, lora_path, torch_dtype=torch.float32) |
|
model1 = model1.merge_and_unload() |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path1) |
|
|
|
print("Saving merged model (" + save_path + ")...") |
|
model1.to(dtype=torch.bfloat16) |
|
model1.save_pretrained(save_path, max_shard_size=max_shard_size, safe_serialization=True, progressbar=True) |
|
tokenizer.save_pretrained(save_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|