Florian commited on
Commit
56b89fa
·
1 Parent(s): 69e7dc3

first commit

Browse files
README.md CHANGED
@@ -1,3 +1,55 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-4-v2
2
+
3
+ optimized with onnx o4
4
+
5
+ ```
6
+ from pathlib import Path
7
+ from transformers import AutoTokenizer
8
+ from optimum.onnxruntime import ORTModelForSequenceClassification, ORTOptimizer
9
+ from optimum.onnxruntime import AutoOptimizationConfig
10
+
11
+ model = "cross-encoder/ms-marco-MiniLM-L-4-v2"
12
+ tokenizer = AutoTokenizer.from_pretrained(model)
13
+ ort_model = ORTModelForSequenceClassification.from_pretrained(model, export=True)
14
+
15
+ save_dir = Path("/tmp/optimized_models")
16
+ save_dir.mkdir(exist_ok=True, parents=True)
17
+
18
+ optimizer = ORTOptimizer.from_pretrained(ort_model)
19
+ optimizer.optimize(
20
+ optimization_config=AutoOptimizationConfig.O4(),
21
+ save_dir=save_dir,
22
+ )
23
+ ```
24
+
25
+ Run it with onnx
26
+
27
+ ```
28
+ import torch
29
+ from transformers import AutoTokenizer
30
+ from transformers.pipelines.text_classification import ClassificationFunction
31
+ from optimum.pipelines import pipeline as ort_pipeline
32
+ from optimum.onnxruntime import ORTModelForSequenceClassification
33
+
34
+
35
+ model = "cross-encoder/ms-marco-MiniLM-L-4-v2"
36
+ device = torch.device(0) if torch.cuda.is_available() else -1
37
+ tokenizer = AutoTokenizer.from_pretrained(model)
38
+ ort_model = ORTModelForSequenceClassification.from_pretrained(
39
+ model, file_name="model_optimized.onnx")
40
+ cross_encoder = ort_pipeline(
41
+ task="text-classification",
42
+ model=ort_model,
43
+ tokenizer=tokenizer,
44
+ device=device,
45
+ function_to_apply=ClassificationFunction.SIGMOID,
46
+ padding=True,
47
+ truncation=True)
48
+
49
+ cross_encoder([{
50
+ "text":
51
+ "What is the purpose of life?",
52
+ "text_pair":
53
+ "The purpose of life is subjective and determined by each individual. Some may believe the purpose of life is to seek knowledge and education, to find happiness and fulfillment, or to live with purpose by helping others."
54
+ }])
55
+ ```
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "cross-encoder/ms-marco-MiniLM-L-4-v2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 384,
12
+ "id2label": {
13
+ "0": "LABEL_0"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 1536,
17
+ "label2id": {
18
+ "LABEL_0": 0
19
+ },
20
+ "layer_norm_eps": 1e-12,
21
+ "max_position_embeddings": 512,
22
+ "model_type": "bert",
23
+ "num_attention_heads": 12,
24
+ "num_hidden_layers": 4,
25
+ "pad_token_id": 0,
26
+ "position_embedding_type": "absolute",
27
+ "sbert_ce_default_activation_function": "torch.nn.modules.linear.Identity",
28
+ "transformers_version": "4.35.2",
29
+ "type_vocab_size": 2,
30
+ "use_cache": true,
31
+ "vocab_size": 30522
32
+ }
model_optimized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eb9de59623fe41c52b808e33d6c62b74cecf3f72a569c0c8eafb88920dc1998
3
+ size 38349770
ort_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "one_external_file": true,
3
+ "opset": null,
4
+ "optimization": {
5
+ "disable_attention": null,
6
+ "disable_attention_fusion": false,
7
+ "disable_bias_gelu": null,
8
+ "disable_bias_gelu_fusion": false,
9
+ "disable_bias_skip_layer_norm": null,
10
+ "disable_bias_skip_layer_norm_fusion": false,
11
+ "disable_embed_layer_norm": true,
12
+ "disable_embed_layer_norm_fusion": true,
13
+ "disable_gelu": null,
14
+ "disable_gelu_fusion": false,
15
+ "disable_group_norm_fusion": true,
16
+ "disable_layer_norm": null,
17
+ "disable_layer_norm_fusion": false,
18
+ "disable_packed_kv": true,
19
+ "disable_rotary_embeddings": false,
20
+ "disable_shape_inference": false,
21
+ "disable_skip_layer_norm": null,
22
+ "disable_skip_layer_norm_fusion": false,
23
+ "enable_gelu_approximation": true,
24
+ "enable_gemm_fast_gelu_fusion": false,
25
+ "enable_transformers_specific_optimizations": true,
26
+ "fp16": true,
27
+ "no_attention_mask": false,
28
+ "optimization_level": 2,
29
+ "optimize_for_gpu": true,
30
+ "optimize_with_onnxruntime_only": null,
31
+ "use_mask_index": false,
32
+ "use_multi_head_attention": false,
33
+ "use_raw_attention_mask": false
34
+ },
35
+ "optimum_version": "1.14.1",
36
+ "quantization": {},
37
+ "transformers_version": "4.35.2",
38
+ "use_external_data_format": false
39
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff