Xenova HF staff commited on
Commit
9231464
·
verified ·
1 Parent(s): 746495b

Upload folder using huggingface_hub

Browse files
onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30b37c72933c37c1dcbbd58abf613845ace73fef92e8dec4ff78903a03aaa881
3
+ size 176581010
onnx/model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb4f3688801b2996b76f708e4d5625c2d669ff254a78428f839f594ad49bf290
3
+ size 146085627
onnx/model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3216c0aa38d61b9e59659e737d5c5f2a966f65bfeafd5913ef1d3d8fb4fa3d81
3
+ size 88576823
onnx/model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d49e880a41129d2f442b92bf95dc914826b0c3169a62ee55c83ec8f8fce6e1fd
3
+ size 45391656
onnx/model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f20615398dceb5aa47d6b35d36fec990cda2d647ac4682ed3cc9f94b9ae85c5f
3
+ size 146640027
onnx/model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7595346cadedf91130719b87bff3073eff181c3fdd1e63f62310de4c6895ba40
3
+ size 45391749
onnx/model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7595346cadedf91130719b87bff3073eff181c3fdd1e63f62310de4c6895ba40
3
+ size 45391749
quantize_config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fp16": {},
3
+ "q8": {
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "Add",
8
+ "BatchNormalization",
9
+ "Cast",
10
+ "Concat",
11
+ "Constant",
12
+ "Conv",
13
+ "Div",
14
+ "Erf",
15
+ "Flatten",
16
+ "Gather",
17
+ "Gemm",
18
+ "GlobalAveragePool",
19
+ "Identity",
20
+ "MatMul",
21
+ "Mul",
22
+ "ReduceMean",
23
+ "Relu",
24
+ "Reshape",
25
+ "Shape",
26
+ "Sigmoid",
27
+ "Slice",
28
+ "Softmax",
29
+ "Split",
30
+ "Sqrt",
31
+ "Squeeze",
32
+ "Sub",
33
+ "Transpose",
34
+ "Unsqueeze"
35
+ ],
36
+ "weight_type": "QUInt8"
37
+ }
38
+ },
39
+ "per_channel": false,
40
+ "reduce_range": false
41
+ },
42
+ "int8": {
43
+ "per_model_config": {
44
+ "model": {
45
+ "op_types": [
46
+ "Add",
47
+ "BatchNormalization",
48
+ "Cast",
49
+ "Concat",
50
+ "Constant",
51
+ "Conv",
52
+ "Div",
53
+ "Erf",
54
+ "Flatten",
55
+ "Gather",
56
+ "Gemm",
57
+ "GlobalAveragePool",
58
+ "Identity",
59
+ "MatMul",
60
+ "Mul",
61
+ "ReduceMean",
62
+ "Relu",
63
+ "Reshape",
64
+ "Shape",
65
+ "Sigmoid",
66
+ "Slice",
67
+ "Softmax",
68
+ "Split",
69
+ "Sqrt",
70
+ "Squeeze",
71
+ "Sub",
72
+ "Transpose",
73
+ "Unsqueeze"
74
+ ],
75
+ "weight_type": "QInt8"
76
+ }
77
+ },
78
+ "per_channel": false,
79
+ "reduce_range": false
80
+ },
81
+ "uint8": {
82
+ "per_model_config": {
83
+ "model": {
84
+ "op_types": [
85
+ "Add",
86
+ "BatchNormalization",
87
+ "Cast",
88
+ "Concat",
89
+ "Constant",
90
+ "Conv",
91
+ "Div",
92
+ "Erf",
93
+ "Flatten",
94
+ "Gather",
95
+ "Gemm",
96
+ "GlobalAveragePool",
97
+ "Identity",
98
+ "MatMul",
99
+ "Mul",
100
+ "ReduceMean",
101
+ "Relu",
102
+ "Reshape",
103
+ "Shape",
104
+ "Sigmoid",
105
+ "Slice",
106
+ "Softmax",
107
+ "Split",
108
+ "Sqrt",
109
+ "Squeeze",
110
+ "Sub",
111
+ "Transpose",
112
+ "Unsqueeze"
113
+ ],
114
+ "weight_type": "QUInt8"
115
+ }
116
+ },
117
+ "per_channel": false,
118
+ "reduce_range": false
119
+ },
120
+ "q4": {
121
+ "block_size": 32,
122
+ "is_symmetric": true,
123
+ "accuracy_level": null
124
+ },
125
+ "bnb4": {
126
+ "block_size": 64,
127
+ "quant_type": 1
128
+ }
129
+ }