HaileyStorm's picture
Upload 2 files
7b54808 verified
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
print("Loading checkpoint...")
# Define the paths to your checkpoint files
checkpoint_paths = [
'./llama3-5b/model-00001-of-00003.pt',
'./llama3-5b/model-00002-of-00003.pt',
'./llama3-5b/model-00003-of-00003.pt'
]
# Initialize an empty state dictionary
merged_state_dict = {}
# Load each checkpoint and merge them
for checkpoint_path in checkpoint_paths:
checkpoint = torch.load(checkpoint_path, map_location='cpu')
merged_state_dict.update(checkpoint)
print("Loading original model...")
# Define the original model name or path
original_model_name = "../../slice_with_mergekit/merged/"
# Load the model configuration and create a new model instance
model = AutoModelForCausalLM.from_pretrained(original_model_name, state_dict=merged_state_dict)
print("Converting to fp16...")
# Convert model parameters to float16
model.half()
print("Saving model...")
# Save the model in the safetensors format
output_dir = './llama3-5b/hf/'
model.save_pretrained(output_dir, safe_serialization=True)
print("Saving tokenizer...")
# Save the tokenizer as well
tokenizer = AutoTokenizer.from_pretrained(original_model_name)
tokenizer.save_pretrained(output_dir)
print(f"Merged model saved to {output_dir}")