Upload 4 files
Browse filesAdd production-ready AI inference system assets for healthcare architecture
- .gitattributes +1 -0
- A_flowchart_in_the_image_illustrates_an_AI_inferen.png +3 -0
- README.md +37 -9
- docker-compose.yaml +19 -0
- k8s.yaml +38 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
A_flowchart_in_the_image_illustrates_an_AI_inferen.png filter=lfs diff=lfs merge=lfs -text
|
A_flowchart_in_the_image_illustrates_an_AI_inferen.png
ADDED
![]() |
Git LFS Details
|
README.md
CHANGED
@@ -1,12 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# AI Inference Architecture for Healthcare
|
2 |
+
|
3 |
+
This project provides a scalable, production-ready AI inference architecture designed for healthcare and pharmaceutical applications. It integrates Triton Inference Server, FastAPI, Kubernetes, and Torch/ONNX models, allowing for secure, reliable, and fast deployment of AI workloads such as LLMs, image segmentation, or biomedical predictions.
|
4 |
+
|
5 |
+
## Key Features
|
6 |
+
|
7 |
+
- Modular container-based architecture
|
8 |
+
- Routing layer using FastAPI or NGINX
|
9 |
+
- LLM model support via TorchScript / ONNX
|
10 |
+
- Optional user auth, billing hooks, and monitoring
|
11 |
+
- Designed for HIPAA-compliant environments
|
12 |
+
|
13 |
+
## Deployment Options
|
14 |
+
|
15 |
+
- **Standalone (Local)**: via `docker-compose.yaml`
|
16 |
+
- **Production (Kubernetes)**: via `k8s.yaml`
|
17 |
+
|
18 |
---
|
19 |
+
|
20 |
+
## Quickstart (Docker Compose)
|
21 |
+
|
22 |
+
```bash
|
23 |
+
docker compose up --build
|
24 |
+
```
|
25 |
+
|
26 |
+
## Kubernetes
|
27 |
+
|
28 |
+
```bash
|
29 |
+
kubectl apply -f k8s.yaml
|
30 |
+
```
|
31 |
+
|
32 |
---
|
33 |
|
34 |
+
## Who is this for?
|
35 |
+
|
36 |
+
Healthcare ML teams, pharma startups, or infrastructure engineers looking to fast-track AI deployment pipelines with production best practices.
|
37 |
+
|
38 |
+
## License
|
39 |
+
|
40 |
+
Apache 2.0
|
docker-compose.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: "3.9"
|
2 |
+
services:
|
3 |
+
inference:
|
4 |
+
image: nvcr.io/nvidia/tritonserver:23.03-py3
|
5 |
+
ports:
|
6 |
+
- "8000:8000"
|
7 |
+
- "8001:8001"
|
8 |
+
volumes:
|
9 |
+
- ./models:/models
|
10 |
+
command: [
|
11 |
+
"tritonserver",
|
12 |
+
"--model-repository=/models"
|
13 |
+
]
|
14 |
+
api:
|
15 |
+
image: tiangolo/uvicorn-gunicorn-fastapi:python3.9
|
16 |
+
volumes:
|
17 |
+
- ./app:/app
|
18 |
+
ports:
|
19 |
+
- "8080:80"
|
k8s.yaml
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
apiVersion: apps/v1
|
2 |
+
kind: Deployment
|
3 |
+
metadata:
|
4 |
+
name: triton-inference
|
5 |
+
spec:
|
6 |
+
replicas: 1
|
7 |
+
selector:
|
8 |
+
matchLabels:
|
9 |
+
app: triton
|
10 |
+
template:
|
11 |
+
metadata:
|
12 |
+
labels:
|
13 |
+
app: triton
|
14 |
+
spec:
|
15 |
+
containers:
|
16 |
+
- name: triton
|
17 |
+
image: nvcr.io/nvidia/tritonserver:23.03-py3
|
18 |
+
ports:
|
19 |
+
- containerPort: 8000
|
20 |
+
args: ["tritonserver", "--model-repository=/models"]
|
21 |
+
volumeMounts:
|
22 |
+
- mountPath: /models
|
23 |
+
name: model-volume
|
24 |
+
volumes:
|
25 |
+
- name: model-volume
|
26 |
+
emptyDir: {}
|
27 |
+
---
|
28 |
+
apiVersion: v1
|
29 |
+
kind: Service
|
30 |
+
metadata:
|
31 |
+
name: triton-service
|
32 |
+
spec:
|
33 |
+
selector:
|
34 |
+
app: triton
|
35 |
+
ports:
|
36 |
+
- protocol: TCP
|
37 |
+
port: 80
|
38 |
+
targetPort: 8000
|