aychang commited on
Commit
850f95e
1 Parent(s): ba0da3e

Upload model, config, and model card

Browse files
Files changed (3) hide show
  1. 1/model.pt +3 -0
  2. README.md +19 -0
  3. config.pbtxt +61 -0
1/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eafd4c3b57ec8980f22603fa255774f4cf214ca91c7ef1ce544157326e82c68
3
+ size 1330968444
README.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ thumbnail:
5
+ tags:
6
+ - question-answering
7
+ - torchscript
8
+ - FastNN
9
+ license: mit
10
+ datasets:
11
+ - squad
12
+ metrics:
13
+ ---
14
+
15
+ # TorchScript model of bert-large-cased-whole-word-masking-finetuned-squad
16
+
17
+ ## Model description
18
+
19
+ A serialized torchscript model of bert-large-cased-whole-word-masking-finetuned-squad with a config.pbtxt for deployment using NVIDIA Triton Inference Server.
config.pbtxt ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "bert-large-cased-whole-word-masking-finetuned-squad"
2
+ platform: "pytorch_libtorch"
3
+ max_batch_size: 64
4
+ input [
5
+ {
6
+ name: "input__0"
7
+ data_type: TYPE_INT64
8
+ dims: [512]
9
+ },
10
+ {
11
+ name: "input__1"
12
+ data_type: TYPE_INT64
13
+ dims: [512]
14
+ },
15
+ {
16
+ name: "input__2"
17
+ data_type: TYPE_INT64
18
+ dims: [512]
19
+ },
20
+ {
21
+ name: "input__3"
22
+ data_type: TYPE_INT64
23
+ dims: [1]
24
+ },
25
+ {
26
+ name: "input__4"
27
+ data_type: TYPE_INT64
28
+ dims: [1]
29
+ },
30
+ {
31
+ name: "input__5"
32
+ data_type: TYPE_FP32
33
+ dims: [512]
34
+ }
35
+ ]
36
+ output [
37
+ {
38
+ name: "output__0"
39
+ data_type: TYPE_FP32
40
+ dims: [512]
41
+ },
42
+ {
43
+ name: "output__1"
44
+ data_type: TYPE_FP32
45
+ dims: [512]
46
+ },
47
+ {
48
+ name: "output__2"
49
+ data_type: TYPE_INT64
50
+ dims: [1]
51
+ }
52
+ ]
53
+ dynamic_batching {
54
+ preferred_batch_size: [ 1,2,4,8,16,32,64 ]
55
+ max_queue_delay_microseconds: 30000
56
+
57
+ }
58
+ version_policy: { latest { num_versions : 1 }}
59
+ optimization {
60
+ graph { level: 1 }
61
+ }