Spaces:
Runtime error
Runtime error
name: Deploy PyTorch Training with EC2 Runner and Docker Compose with Advanced Deployment | |
on: | |
push: | |
branches: | |
- master | |
jobs: | |
start-runner: | |
name: Start self-hosted EC2 runner | |
runs-on: ubuntu-latest | |
outputs: | |
label: ${{ steps.start-ec2-runner.outputs.label }} | |
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: ${{ secrets.AWS_REGION }} | |
- name: Start EC2 runner | |
id: start-ec2-runner | |
uses: machulav/ec2-github-runner@v2 | |
with: | |
mode: start | |
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | |
ec2-image-id: ami-044b0717aadbc9dfa | |
ec2-instance-type: t2.xlarge | |
subnet-id: subnet-024811dee81325f1c | |
security-group-id: sg-0646c2a337a355a31 | |
deploy: | |
name: Deploy PyTorch Training Pipeline | |
needs: start-runner | |
runs-on: ${{ needs.start-runner.outputs.label }} | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v3 | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: ${{ secrets.AWS_REGION }} | |
- name: Cache Docker layers | |
uses: actions/cache@v3 | |
with: | |
path: /tmp/.buildx-cache | |
key: ${{ runner.os }}-docker-${{ github.sha }} | |
restore-keys: | | |
${{ runner.os }}-docker- | |
- name: Log in to Amazon ECR | |
id: login-ecr | |
uses: aws-actions/amazon-ecr-login@v2 | |
- name: Create .env file | |
run: | | |
echo "AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}" >> .env | |
echo "AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }}" >> .env | |
echo "AWS_REGION=${{ secrets.AWS_REGION }}" >> .env | |
echo "::add-mask::${{ secrets.AWS_ACCESS_KEY_ID }}" | |
echo "::add-mask::${{ secrets.AWS_SECRET_ACCESS_KEY }}" | |
- name: Run Docker Compose for all services | |
run: | | |
docker-compose build --no-cache | |
docker-compose up -d | |
docker-compose logs --follow train eval | |
docker-compose down --remove-orphans | |
- name: Build, tag, and push Docker image to Amazon ECR | |
env: | |
REGISTRY: ${{ steps.login-ecr.outputs.registry }} | |
REPOSITORY: soutrik71/mnist | |
IMAGE_TAG: ${{ github.sha }} | |
run: | | |
docker build -t $REGISTRY/$REPOSITORY:$IMAGE_TAG . | |
docker push $REGISTRY/$REPOSITORY:$IMAGE_TAG | |
docker tag $REGISTRY/$REPOSITORY:$IMAGE_TAG $REGISTRY/$REPOSITORY:latest | |
docker push $REGISTRY/$REPOSITORY:latest | |
- name: Pull Docker image from ECR and verify | |
env: | |
REGISTRY: ${{ steps.login-ecr.outputs.registry }} | |
REPOSITORY: soutrik71/mnist | |
IMAGE_TAG: ${{ github.sha }} | |
run: | | |
docker pull $REGISTRY/$REPOSITORY:$IMAGE_TAG | |
docker images | grep "$REGISTRY/$REPOSITORY" | |
- name: Clean up environment | |
run: | | |
rm -f .env | |
docker system prune -af --volumes | |
stop-runner: | |
name: Stop self-hosted EC2 runner | |
needs: | |
- start-runner | |
- deploy | |
runs-on: ubuntu-latest | |
if: ${{ always() }} | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: ${{ secrets.AWS_REGION }} | |
- name: Stop EC2 runner | |
uses: machulav/ec2-github-runner@v2 | |
with: | |
mode: stop | |
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | |
label: ${{ needs.start-runner.outputs.label }} | |
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} | |
- name: Validate EC2 termination | |
run: | | |
aws ec2 describe-instances --instance-ids ${{ needs.start-runner.outputs.ec2-instance-id }} \ | |
--query "Reservations[].Instances[].State.Name" --output text | grep "terminated" || echo "Runner not terminated." | |