Upload 10 files

4e8c1b1 verified 6 months ago

1.59 kB

	import torch
	from transformers import AutoTokenizer, AutoModel, AutoConfig
	import os

	# Define the model name and output paths
	model_name = "ibm-granite/granite-embedding-30m-english"
	onnx_model_path = "./granite_embedding_model.onnx"
	tokenizer_path = "./tokenizer"
	config_path = "./config"

	# Load the model, tokenizer, and config
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModel.from_pretrained(model_name)
	config = AutoConfig.from_pretrained(model_name)

	# Save the tokenizer and config for later use
	tokenizer.save_pretrained(tokenizer_path)
	config.save_pretrained(config_path)

	# Set the model to evaluation mode
	model.eval()

	# Example input for tracing
	dummy_input = tokenizer("This is a test sentence.", return_tensors="pt")
	input_ids = dummy_input["input_ids"]
	attention_mask = dummy_input["attention_mask"]

	# Export the model to ONNX
	torch.onnx.export(
	model,
	(input_ids, attention_mask), # The model's inputs
	onnx_model_path, # Path to save the ONNX model
	input_names=["input_ids", "attention_mask"], # Input names
	output_names=["output"], # Output names
	dynamic_axes={
	"input_ids": {
	0: "batch_size",
	1: "sequence_length",
	}, # Batch size and sequence length can vary
	"attention_mask": {0: "batch_size", 1: "sequence_length"},
	"output": {0: "batch_size", 1: "sequence_length"},
	},
	opset_version=14, # ONNX opset version
	)

	print(f"Model saved as ONNX to {onnx_model_path}")
	print(f"Tokenizer saved to {tokenizer_path}")
	print(f"Config saved to {config_path}")