Upload k8s.yaml
Browse filesAdd HPA, probes, and preprocessor microservice
k8s.yaml
CHANGED
@@ -3,7 +3,7 @@ kind: Deployment
|
|
3 |
metadata:
|
4 |
name: triton-deployment
|
5 |
spec:
|
6 |
-
replicas:
|
7 |
selector:
|
8 |
matchLabels:
|
9 |
app: triton
|
@@ -17,16 +17,43 @@ spec:
|
|
17 |
spec:
|
18 |
containers:
|
19 |
- name: triton
|
20 |
-
image: nvcr.io/nvidia/tritonserver:
|
21 |
-
args: ["tritonserver",
|
22 |
ports:
|
23 |
-
- containerPort: 8000
|
24 |
-
- containerPort: 8001
|
25 |
-
- containerPort: 8002
|
|
|
|
|
|
|
26 |
volumeMounts:
|
27 |
- name: model-volume
|
28 |
mountPath: /models
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
volumes:
|
30 |
- name: model-volume
|
31 |
persistentVolumeClaim:
|
32 |
claimName: model-pvc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
metadata:
|
4 |
name: triton-deployment
|
5 |
spec:
|
6 |
+
replicas: 2
|
7 |
selector:
|
8 |
matchLabels:
|
9 |
app: triton
|
|
|
17 |
spec:
|
18 |
containers:
|
19 |
- name: triton
|
20 |
+
image: nvcr.io/nvidia/tritonserver:23.01-py3
|
21 |
+
args: ["tritonserver","--model-repository=/models","--http-port=8000","--grpc-port=8001","--metrics-port=8002"]
|
22 |
ports:
|
23 |
+
- {containerPort: 8000, name: http}
|
24 |
+
- {containerPort: 8001, name: grpc}
|
25 |
+
- {containerPort: 8002, name: metrics}
|
26 |
+
resources:
|
27 |
+
requests: { cpu: "500m", memory: "1Gi" }
|
28 |
+
limits: { cpu: "2", memory: "4Gi" }
|
29 |
volumeMounts:
|
30 |
- name: model-volume
|
31 |
mountPath: /models
|
32 |
+
readinessProbe:
|
33 |
+
httpGet: { path: /v2/health/ready, port: 8000 }
|
34 |
+
initialDelaySeconds: 10
|
35 |
+
periodSeconds: 5
|
36 |
+
livenessProbe:
|
37 |
+
httpGet: { path: /v2/health/live, port: 8000 }
|
38 |
+
initialDelaySeconds: 20
|
39 |
+
periodSeconds: 10
|
40 |
volumes:
|
41 |
- name: model-volume
|
42 |
persistentVolumeClaim:
|
43 |
claimName: model-pvc
|
44 |
+
---
|
45 |
+
apiVersion: v1
|
46 |
+
kind: Service
|
47 |
+
metadata:
|
48 |
+
name: triton-service
|
49 |
+
spec:
|
50 |
+
selector:
|
51 |
+
app: triton
|
52 |
+
ports:
|
53 |
+
- name: http
|
54 |
+
port: 8000
|
55 |
+
targetPort: 8000
|
56 |
+
- name: grpc
|
57 |
+
port: 8001
|
58 |
+
targetPort: 8001
|
59 |
+
type: ClusterIP
|