digopala commited on
Commit
f5b9543
·
verified ·
1 Parent(s): 274ef25

Upload k8s.yaml

Browse files

Add HPA, probes, and preprocessor microservice

Files changed (1) hide show
  1. k8s.yaml +33 -6
k8s.yaml CHANGED
@@ -3,7 +3,7 @@ kind: Deployment
3
  metadata:
4
  name: triton-deployment
5
  spec:
6
- replicas: 1
7
  selector:
8
  matchLabels:
9
  app: triton
@@ -17,16 +17,43 @@ spec:
17
  spec:
18
  containers:
19
  - name: triton
20
- image: nvcr.io/nvidia/tritonserver:22.10-py3
21
- args: ["tritonserver", "--model-repository=/models"]
22
  ports:
23
- - containerPort: 8000
24
- - containerPort: 8001
25
- - containerPort: 8002
 
 
 
26
  volumeMounts:
27
  - name: model-volume
28
  mountPath: /models
 
 
 
 
 
 
 
 
29
  volumes:
30
  - name: model-volume
31
  persistentVolumeClaim:
32
  claimName: model-pvc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  metadata:
4
  name: triton-deployment
5
  spec:
6
+ replicas: 2
7
  selector:
8
  matchLabels:
9
  app: triton
 
17
  spec:
18
  containers:
19
  - name: triton
20
+ image: nvcr.io/nvidia/tritonserver:23.01-py3
21
+ args: ["tritonserver","--model-repository=/models","--http-port=8000","--grpc-port=8001","--metrics-port=8002"]
22
  ports:
23
+ - {containerPort: 8000, name: http}
24
+ - {containerPort: 8001, name: grpc}
25
+ - {containerPort: 8002, name: metrics}
26
+ resources:
27
+ requests: { cpu: "500m", memory: "1Gi" }
28
+ limits: { cpu: "2", memory: "4Gi" }
29
  volumeMounts:
30
  - name: model-volume
31
  mountPath: /models
32
+ readinessProbe:
33
+ httpGet: { path: /v2/health/ready, port: 8000 }
34
+ initialDelaySeconds: 10
35
+ periodSeconds: 5
36
+ livenessProbe:
37
+ httpGet: { path: /v2/health/live, port: 8000 }
38
+ initialDelaySeconds: 20
39
+ periodSeconds: 10
40
  volumes:
41
  - name: model-volume
42
  persistentVolumeClaim:
43
  claimName: model-pvc
44
+ ---
45
+ apiVersion: v1
46
+ kind: Service
47
+ metadata:
48
+ name: triton-service
49
+ spec:
50
+ selector:
51
+ app: triton
52
+ ports:
53
+ - name: http
54
+ port: 8000
55
+ targetPort: 8000
56
+ - name: grpc
57
+ port: 8001
58
+ targetPort: 8001
59
+ type: ClusterIP