echarlaix HF staff commited on
Commit
eca3b14
·
1 Parent(s): 5946389

Upload model

Browse files
README.md CHANGED
@@ -1,3 +1,43 @@
1
  ---
 
 
 
 
 
 
 
 
 
 
 
 
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language:
3
+ - en
4
+ - fr
5
+ - ro
6
+ - de
7
+ datasets:
8
+ - c4
9
+ tags:
10
+ - int8
11
+ - summarization
12
+ - translation
13
+
14
  license: apache-2.0
15
  ---
16
+
17
+ ## [t5-small](https://huggingface.co/t5-small) exported to the ONNX format and dynamically quantized.
18
+
19
+ ## Model description
20
+
21
+ [T5](https://huggingface.co/docs/transformers/model_doc/t5#t5) is an encoder-decoder model pre-trained on a multi-task mixture of unsupervised and supervised tasks and for which each task is converted into a text-to-text format.
22
+
23
+ For more information, please take a look at the original paper.
24
+
25
+ Paper: [Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer](https://arxiv.org/pdf/1910.10683.pdf)
26
+
27
+ Authors: *Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu*
28
+
29
+
30
+ ## Usage example
31
+
32
+ You can use this model with Transformers *pipeline*.
33
+
34
+ ```python
35
+ from transformers import AutoTokenizer, pipeline
36
+ from optimum.onnxruntime import ORTModelForSeq2SeqLM
37
+ tokenizer = AutoTokenizer.from_pretrained("echarlaix/t5-small-dynamic")
38
+ model = ORTModelForSeq2SeqLM.from_pretrained("echarlaix/t5-small-dynamic")
39
+ translator = pipeline("translation_en_to_fr", model=model, tokenizer=tokenizer)
40
+ text = "He never went out without a book under his arm, and he often came back with two."
41
+ results = translator(text)
42
+ print(results)
43
+ ```
config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "t5-small",
3
+ "architectures": [
4
+ "T5WithLMHeadModel"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dropout_rate": 0.1,
11
+ "eos_token_id": 1,
12
+ "feed_forward_proj": "relu",
13
+ "initializer_factor": 1.0,
14
+ "is_encoder_decoder": true,
15
+ "layer_norm_epsilon": 1e-06,
16
+ "model_type": "t5",
17
+ "n_positions": 512,
18
+ "num_decoder_layers": 6,
19
+ "num_heads": 8,
20
+ "num_layers": 6,
21
+ "output_past": true,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "task_specific_params": {
26
+ "summarization": {
27
+ "early_stopping": true,
28
+ "length_penalty": 2.0,
29
+ "max_length": 200,
30
+ "min_length": 30,
31
+ "no_repeat_ngram_size": 3,
32
+ "num_beams": 4,
33
+ "prefix": "summarize: "
34
+ },
35
+ "translation_en_to_de": {
36
+ "early_stopping": true,
37
+ "max_length": 300,
38
+ "num_beams": 4,
39
+ "prefix": "translate English to German: "
40
+ },
41
+ "translation_en_to_fr": {
42
+ "early_stopping": true,
43
+ "max_length": 300,
44
+ "num_beams": 4,
45
+ "prefix": "translate English to French: "
46
+ },
47
+ "translation_en_to_ro": {
48
+ "early_stopping": true,
49
+ "max_length": 300,
50
+ "num_beams": 4,
51
+ "prefix": "translate English to Romanian: "
52
+ }
53
+ },
54
+ "transformers_version": "4.19.0.dev0",
55
+ "use_cache": true,
56
+ "vocab_size": 32128
57
+ }
decoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a08e2c556fc3c0929c040a53a68cc3e8e0027ecb42056925577cc52d9f06cdb
3
+ size 58259052
decoder_with_past_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d0cf75b07500325987ba6396abf76fb8ff971e32ee1e76bf010492f6c91f2fe
3
+ size 55103207
encoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60328e59847197fb74cec4331fdf96fe08071ec4cf15a35e873a5ee93d3a641c
3
+ size 35433618
ort_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "opset": 13,
3
+ "optimization": {},
4
+ "optimum_version": "1.4.0.dev0",
5
+ "quantization": {
6
+ "activations_dtype": "QUInt8",
7
+ "activations_symmetric": false,
8
+ "format": "QOperator",
9
+ "is_static": false,
10
+ "mode": "IntegerOps",
11
+ "nodes_to_exclude": [],
12
+ "nodes_to_quantize": [],
13
+ "operators_to_quantize": [
14
+ "MatMul",
15
+ "Add",
16
+ "Gather",
17
+ "Transpose"
18
+ ],
19
+ "per_channel": false,
20
+ "qdq_add_pair_to_weight": false,
21
+ "qdq_dedicated_pair": false,
22
+ "qdq_op_type_per_channel_support_to_axis": {
23
+ "MatMul": 1
24
+ },
25
+ "reduce_range": false,
26
+ "weights_dtype": "QInt8",
27
+ "weights_symmetric": true
28
+ },
29
+ "transformers_version": "4.20.1",
30
+ "use_external_data_format": false
31
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff