import torch from transformers import AutoTokenizer, AutoModel, AutoConfig import os # Define the model name and output paths model_name = "ibm-granite/granite-embedding-30m-english" onnx_model_path = "./granite_embedding_model.onnx" tokenizer_path = "./tokenizer" config_path = "./config" # Load the model, tokenizer, and config tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) config = AutoConfig.from_pretrained(model_name) # Save the tokenizer and config for later use tokenizer.save_pretrained(tokenizer_path) config.save_pretrained(config_path) # Set the model to evaluation mode model.eval() # Example input for tracing dummy_input = tokenizer("This is a test sentence.", return_tensors="pt") input_ids = dummy_input["input_ids"] attention_mask = dummy_input["attention_mask"] # Export the model to ONNX torch.onnx.export( model, (input_ids, attention_mask), # The model's inputs onnx_model_path, # Path to save the ONNX model input_names=["input_ids", "attention_mask"], # Input names output_names=["output"], # Output names dynamic_axes={ "input_ids": { 0: "batch_size", 1: "sequence_length", }, # Batch size and sequence length can vary "attention_mask": {0: "batch_size", 1: "sequence_length"}, "output": {0: "batch_size", 1: "sequence_length"}, }, opset_version=14, # ONNX opset version ) print(f"Model saved as ONNX to {onnx_model_path}") print(f"Tokenizer saved to {tokenizer_path}") print(f"Config saved to {config_path}")