|
import torch |
|
from transformers import AutoTokenizer, AutoModel, AutoConfig |
|
import os |
|
|
|
|
|
model_name = "ibm-granite/granite-embedding-30m-english" |
|
onnx_model_path = "./granite_embedding_model.onnx" |
|
tokenizer_path = "./tokenizer" |
|
config_path = "./config" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModel.from_pretrained(model_name) |
|
config = AutoConfig.from_pretrained(model_name) |
|
|
|
|
|
tokenizer.save_pretrained(tokenizer_path) |
|
config.save_pretrained(config_path) |
|
|
|
|
|
model.eval() |
|
|
|
|
|
dummy_input = tokenizer("This is a test sentence.", return_tensors="pt") |
|
input_ids = dummy_input["input_ids"] |
|
attention_mask = dummy_input["attention_mask"] |
|
|
|
|
|
torch.onnx.export( |
|
model, |
|
(input_ids, attention_mask), |
|
onnx_model_path, |
|
input_names=["input_ids", "attention_mask"], |
|
output_names=["output"], |
|
dynamic_axes={ |
|
"input_ids": { |
|
0: "batch_size", |
|
1: "sequence_length", |
|
}, |
|
"attention_mask": {0: "batch_size", 1: "sequence_length"}, |
|
"output": {0: "batch_size", 1: "sequence_length"}, |
|
}, |
|
opset_version=14, |
|
) |
|
|
|
print(f"Model saved as ONNX to {onnx_model_path}") |
|
print(f"Tokenizer saved to {tokenizer_path}") |
|
print(f"Config saved to {config_path}") |
|
|