first commit

Files changed (8) hide show

README.md +55 -3
config.json +32 -0
model_optimized.onnx +3 -0
ort_config.json +39 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +57 -0
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,3 +1,55 @@
----
-license: apache-2.0
----

+https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-4-v2
+optimized with onnx o4
+```
+from pathlib import Path
+from transformers import AutoTokenizer
+from optimum.onnxruntime import ORTModelForSequenceClassification, ORTOptimizer
+from optimum.onnxruntime import AutoOptimizationConfig
+model = "cross-encoder/ms-marco-MiniLM-L-4-v2"
+tokenizer = AutoTokenizer.from_pretrained(model)
+ort_model = ORTModelForSequenceClassification.from_pretrained(model, export=True)
+save_dir = Path("/tmp/optimized_models")
+save_dir.mkdir(exist_ok=True, parents=True)
+optimizer = ORTOptimizer.from_pretrained(ort_model)
+optimizer.optimize(
+    optimization_config=AutoOptimizationConfig.O4(),
+    save_dir=save_dir,
+)
+```
+Run it with onnx
+```
+import torch
+from transformers import AutoTokenizer
+from transformers.pipelines.text_classification import ClassificationFunction
+from optimum.pipelines import pipeline as ort_pipeline
+from optimum.onnxruntime import ORTModelForSequenceClassification
+model = "cross-encoder/ms-marco-MiniLM-L-4-v2"
+device = torch.device(0) if torch.cuda.is_available() else -1
+tokenizer = AutoTokenizer.from_pretrained(model)
+ort_model = ORTModelForSequenceClassification.from_pretrained(
+    model, file_name="model_optimized.onnx")
+cross_encoder = ort_pipeline(
+    task="text-classification",
+    model=ort_model,
+    tokenizer=tokenizer,
+    device=device,
+    function_to_apply=ClassificationFunction.SIGMOID,
+    padding=True,
+    truncation=True)
+cross_encoder([{
+    "text":
+    "What is the purpose of life?",
+    "text_pair":
+    "The purpose of life is subjective and determined by each individual. Some may believe the purpose of life is to seek knowledge and education, to find happiness and fulfillment, or to live with purpose by helping others."
+}])
+```

config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "_name_or_path": "cross-encoder/ms-marco-MiniLM-L-4-v2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 4,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "sbert_ce_default_activation_function": "torch.nn.modules.linear.Identity",
+  "transformers_version": "4.35.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

model_optimized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5eb9de59623fe41c52b808e33d6c62b74cecf3f72a569c0c8eafb88920dc1998
+size 38349770

ort_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "one_external_file": true,
+  "opset": null,
+  "optimization": {
+    "disable_attention": null,
+    "disable_attention_fusion": false,
+    "disable_bias_gelu": null,
+    "disable_bias_gelu_fusion": false,
+    "disable_bias_skip_layer_norm": null,
+    "disable_bias_skip_layer_norm_fusion": false,
+    "disable_embed_layer_norm": true,
+    "disable_embed_layer_norm_fusion": true,
+    "disable_gelu": null,
+    "disable_gelu_fusion": false,
+    "disable_group_norm_fusion": true,
+    "disable_layer_norm": null,
+    "disable_layer_norm_fusion": false,
+    "disable_packed_kv": true,
+    "disable_rotary_embeddings": false,
+    "disable_shape_inference": false,
+    "disable_skip_layer_norm": null,
+    "disable_skip_layer_norm_fusion": false,
+    "enable_gelu_approximation": true,
+    "enable_gemm_fast_gelu_fusion": false,
+    "enable_transformers_specific_optimizations": true,
+    "fp16": true,
+    "no_attention_mask": false,
+    "optimization_level": 2,
+    "optimize_for_gpu": true,
+    "optimize_with_onnxruntime_only": null,
+    "use_mask_index": false,
+    "use_multi_head_attention": false,
+    "use_raw_attention_mask": false
+  },
+  "optimum_version": "1.14.1",
+  "quantization": {},
+  "transformers_version": "4.35.2",
+  "use_external_data_format": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff