|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import onnxruntime as ort |
|
import numpy as np |
|
|
|
def convert_and_test_onnx(model_name, output_path="language_detection.onnx", test_text="This is a test sentence."): |
|
""" |
|
Converts a Hugging Face model to ONNX, modifies the tokenizer, and tests the ONNX model. |
|
""" |
|
try: |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
|
|
|
|
if hasattr(tokenizer.backend_tokenizer.normalizer, "normalizations"): |
|
tokenizer.backend_tokenizer.normalizer.normalizations = [] |
|
tokenizer.save_pretrained("./modified_tokenizer") |
|
|
|
|
|
dummy_input = tokenizer("This is a test sentence.", return_tensors="pt") |
|
|
|
torch.onnx.export( |
|
model, |
|
(dummy_input["input_ids"], dummy_input["attention_mask"]), |
|
output_path, |
|
input_names=["input_ids", "attention_mask"], |
|
output_names=["output"], |
|
dynamic_axes={ |
|
"input_ids": {0: "batch", 1: "sequence"}, |
|
"attention_mask": {0: "batch", 1: "sequence"}, |
|
"output": {0: "batch"}, |
|
}, |
|
opset_version=14, |
|
) |
|
|
|
print(f"Model successfully converted and saved to {output_path}") |
|
|
|
|
|
ort_session = ort.InferenceSession(output_path) |
|
tokenizer_test = AutoTokenizer.from_pretrained("./modified_tokenizer") |
|
|
|
|
|
inputs = tokenizer_test(test_text, return_tensors="np", return_token_type_ids=False) |
|
|
|
ort_inputs = {k: v for k, v in inputs.items()} |
|
|
|
ort_outputs = ort_session.run(None, ort_inputs) |
|
logits = ort_outputs[0] |
|
predicted_class_id = np.argmax(logits, axis=-1) |
|
|
|
label_list = model.config.id2label |
|
predicted_label = label_list[predicted_class_id[0]] |
|
|
|
print(f"Test text: {test_text}") |
|
print(f"Predicted label: {predicted_label}") |
|
|
|
except Exception as e: |
|
print(f"Error during conversion or testing: {e}") |
|
|
|
if __name__ == "__main__": |
|
model_name = "dewdev/language_detection" |
|
test_text = "मैंने राजा को हिंदी में एक पत्र लिखा।" |
|
convert_and_test_onnx(model_name, test_text=test_text) |
|
|