digopala commited on
Commit
e3674cb
·
verified ·
1 Parent(s): 7145d38

Upload 4 files

Browse files

Add production-ready AI inference system assets for healthcare architecture

.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ A_flowchart_in_the_image_illustrates_an_AI_inferen.png filter=lfs diff=lfs merge=lfs -text
A_flowchart_in_the_image_illustrates_an_AI_inferen.png ADDED

Git LFS Details

  • SHA256: e6c1d647d18f2bc6375b574c643189e0805eec7d5aeafaa3176b474b29e27f6c
  • Pointer size: 131 Bytes
  • Size of remote file: 639 kB
README.md CHANGED
@@ -1,12 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- title: Ai Inference Architecture Healthcare
3
- emoji: 👁
4
- colorFrom: pink
5
- colorTo: gray
6
- sdk: static
7
- pinned: false
8
- license: apache-2.0
9
- short_description: AI Inference Architecture for Healthcare & LLMs
 
 
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
1
+ # AI Inference Architecture for Healthcare
2
+
3
+ This project provides a scalable, production-ready AI inference architecture designed for healthcare and pharmaceutical applications. It integrates Triton Inference Server, FastAPI, Kubernetes, and Torch/ONNX models, allowing for secure, reliable, and fast deployment of AI workloads such as LLMs, image segmentation, or biomedical predictions.
4
+
5
+ ## Key Features
6
+
7
+ - Modular container-based architecture
8
+ - Routing layer using FastAPI or NGINX
9
+ - LLM model support via TorchScript / ONNX
10
+ - Optional user auth, billing hooks, and monitoring
11
+ - Designed for HIPAA-compliant environments
12
+
13
+ ## Deployment Options
14
+
15
+ - **Standalone (Local)**: via `docker-compose.yaml`
16
+ - **Production (Kubernetes)**: via `k8s.yaml`
17
+
18
  ---
19
+
20
+ ## Quickstart (Docker Compose)
21
+
22
+ ```bash
23
+ docker compose up --build
24
+ ```
25
+
26
+ ## Kubernetes
27
+
28
+ ```bash
29
+ kubectl apply -f k8s.yaml
30
+ ```
31
+
32
  ---
33
 
34
+ ## Who is this for?
35
+
36
+ Healthcare ML teams, pharma startups, or infrastructure engineers looking to fast-track AI deployment pipelines with production best practices.
37
+
38
+ ## License
39
+
40
+ Apache 2.0
docker-compose.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "3.9"
2
+ services:
3
+ inference:
4
+ image: nvcr.io/nvidia/tritonserver:23.03-py3
5
+ ports:
6
+ - "8000:8000"
7
+ - "8001:8001"
8
+ volumes:
9
+ - ./models:/models
10
+ command: [
11
+ "tritonserver",
12
+ "--model-repository=/models"
13
+ ]
14
+ api:
15
+ image: tiangolo/uvicorn-gunicorn-fastapi:python3.9
16
+ volumes:
17
+ - ./app:/app
18
+ ports:
19
+ - "8080:80"
k8s.yaml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ apiVersion: apps/v1
2
+ kind: Deployment
3
+ metadata:
4
+ name: triton-inference
5
+ spec:
6
+ replicas: 1
7
+ selector:
8
+ matchLabels:
9
+ app: triton
10
+ template:
11
+ metadata:
12
+ labels:
13
+ app: triton
14
+ spec:
15
+ containers:
16
+ - name: triton
17
+ image: nvcr.io/nvidia/tritonserver:23.03-py3
18
+ ports:
19
+ - containerPort: 8000
20
+ args: ["tritonserver", "--model-repository=/models"]
21
+ volumeMounts:
22
+ - mountPath: /models
23
+ name: model-volume
24
+ volumes:
25
+ - name: model-volume
26
+ emptyDir: {}
27
+ ---
28
+ apiVersion: v1
29
+ kind: Service
30
+ metadata:
31
+ name: triton-service
32
+ spec:
33
+ selector:
34
+ app: triton
35
+ ports:
36
+ - protocol: TCP
37
+ port: 80
38
+ targetPort: 8000