From 33f14f65de89ba7aa8bee49858f725cce76be299 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Wed, 4 Dec 2024 21:58:28 +0000 Subject: [PATCH 01/62] comment out something doesn't work for fork --- .github/PULL_REQUEST_TEMPLATE.md | 28 ++++++++++++++++++++ .github/workflows/docker-nightly-publish.yml | 16 +++++------ .github/workflows/integration.yml | 21 ++++++++++++++- 3 files changed, 56 insertions(+), 9 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 80123c4b4..5f0ae9aed 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -4,3 +4,31 @@ Brief description of what this PR is about - If this change is a backward incompatible change, why must this change be made? - Interesting edge cases to note here + +## Type of change + +Please delete options that are not relevant. + +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] This change requires a documentation update + +## Checklist: + +- [ ] Have you [manually built the docker image](https://github.com/deepjavalibrary/djl-serving/blob/master/serving/docker/README.md#build-docker-image) and verify the change? +- [ ] Have you added tests that prove your fix is effective or that this feature works? +- [ ] Has code been commented, particularly in hard-to-understand areas? +- [ ] Have you made corresponding changes to the documentation? + +## Feature/Issue validation/testing + +Please describe the Unit or Integration tests that you ran to verify your changes and relevant result summary. Provide instructions so it can be reproduced. +Please also list any relevant details for your test configuration. + +- [ ] Test A +Logs for Test A + +- [ ] Test B +Logs for Test B + diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 06c265a05..acd64afbd 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -14,8 +14,8 @@ on: type: string required: true default: 'nightly' - schedule: - - cron: '0 13 * * *' + # schedule: + # - cron: '0 13 * * *' permissions: id-token: write @@ -34,12 +34,12 @@ jobs: /usr/share/dotnet /usr/local/lib/android /opt/ghc \ /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \ $AGENT_TOOLSDIRECTORY - - uses: actions/checkout@v4 - - name: Login to Docker - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + # - uses: actions/checkout@v4 + # - name: Login to Docker + # uses: docker/login-action@v3 + # with: + # username: ${{ secrets.DOCKER_USERNAME }} + # password: ${{ secrets.DOCKER_PASSWORD }} - name: install awscli run: | sudo apt-get update diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 389abdaa9..416558c28 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -7,6 +7,18 @@ on: description: 'The released version of DJL' required: false default: '' + workflow_call: + inputs: + djl-version: + description: 'The released version of DJL' + required: false + type: string + default: '' + override_image_uri: + description: 'Override the default docker image URI' + required: false + type: string + default: '' schedule: - cron: '0 15 * * *' @@ -205,7 +217,14 @@ jobs: - name: Install pip dependencies run: pip3 install requests numpy pillow wheel - name: Build container name - run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} + run: | + ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} + # Set docker image URI based on override or default value + if [ ! -z "${{ inputs.override_image_uri }}" ]; then + DOCKER_IMAGE_URI="${{ inputs.override_image_uri }}" + else + DOCKER_IMAGE_URI="deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG" + fi - name: Download models and dockers run: | docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG From 39a90506c96bc2e9227a05af56eaec490b5c5f27 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Wed, 4 Dec 2024 22:16:01 +0000 Subject: [PATCH 02/62] use my iam role --- .github/workflows/docker-nightly-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index acd64afbd..19dc5f6c9 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -47,7 +47,7 @@ jobs: - name: Configure AWS Credentials uses: aws-actions/configure-aws-credentials@v4 with: - role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving + role-to-assume: arn:aws:iam::243947502783:role/github_action_happy_amazonian aws-region: us-east-1 - name: Set up JDK 17 uses: actions/setup-java@v4 From c8233a36b79e05207dd6b9b0d699269aa8f2259e Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Wed, 4 Dec 2024 22:28:43 +0000 Subject: [PATCH 03/62] add checkout back --- .github/workflows/docker-nightly-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 19dc5f6c9..d6c41e75f 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -34,7 +34,7 @@ jobs: /usr/share/dotnet /usr/local/lib/android /opt/ghc \ /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \ $AGENT_TOOLSDIRECTORY - # - uses: actions/checkout@v4 + - uses: actions/checkout@v4 # - name: Login to Docker # uses: docker/login-action@v3 # with: From b96628bc14adec0dca833aa19233750479a415b8 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Wed, 4 Dec 2024 22:40:03 +0000 Subject: [PATCH 04/62] use my repo --- .github/workflows/docker-nightly-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index d6c41e75f..01a215b64 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -88,7 +88,7 @@ jobs: --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ ${{ matrix.arch }} - repo="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" + repo="243947502783.dkr.ecr.us-east-1.amazonaws.com/djl-tmp:latest" aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo tempTag="$repo:${{ matrix.arch }}-${GITHUB_SHA}" docker tag deepjavalibrary/djl-serving:${{ matrix.arch }}-nightly $tempTag From 85b15e58848c44a44b8e88155b1250244fbd87df Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Wed, 4 Dec 2024 22:41:26 +0000 Subject: [PATCH 05/62] remov ecreate runner --- .github/workflows/docker-nightly-publish.yml | 206 +++++++++---------- 1 file changed, 103 insertions(+), 103 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 01a215b64..a1102c898 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -111,109 +111,109 @@ jobs: docker tag deepjavalibrary/djl-serving:${{ env.SERVING_VERSION }} deepjavalibrary/djl-serving:latest docker push deepjavalibrary/djl-serving:latest - create-runner: - runs-on: [ self-hosted, scheduler ] - steps: - - name: Create new Graviton instance - id: create_aarch64 - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ - https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ - --fail \ - | jq '.token' | tr -d '"' ) - ./start_instance.sh action_graviton $token djl-serving - outputs: - aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }} + # create-runner: + # runs-on: [ self-hosted, scheduler ] + # steps: + # - name: Create new Graviton instance + # id: create_aarch64 + # run: | + # cd /home/ubuntu/djl_benchmark_script/scripts + # token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ + # https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ + # --fail \ + # | jq '.token' | tr -d '"' ) + # ./start_instance.sh action_graviton $token djl-serving + # outputs: + # aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }} - nightly-aarch64: - runs-on: [ self-hosted, aarch64 ] - timeout-minutes: 60 - needs: create-runner - steps: - - uses: actions/checkout@v4 - - name: Clean docker env - working-directory: serving/docker - run: | - yes | docker system prune -a --volumes - - name: Login to Docker - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - name: install awscli - run: | - sudo apt-get update - sudo apt-get install awscli -y - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving - aws-region: us-east-1 - - name: Set up JDK 17 - uses: actions/setup-java@v4 - with: - distribution: 'corretto' - java-version: 17 - - uses: actions/cache@v4 - with: - path: ~/.gradle/caches - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} - - name: Extract DJL and DJL Serving versions from TOML - id: get-versions - run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) - SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) - echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV - echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV - - name: Build serving package for nightly - if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} - run: | - ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot - - name: Build and push nightly docker image - if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} - working-directory: serving/docker - run: | - export NIGHTLY="-nightly" - docker compose build --no-cache \ - --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ - --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ - aarch64 - docker compose push aarch64 - - name: Build and push temp image - if: ${{ inputs.mode == 'temp' }} - working-directory: serving/docker - run: | - export NIGHTLY="-nightly" - docker compose build --no-cache \ - --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ - --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ - aarch64 - repo="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" - aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo - tempTag="$repo:aarch64-${GITHUB_SHA}" - docker tag deepjavalibrary/djl-serving:aarch64-nightly $tempTag - docker push $tempTag - - name: Build and push release docker image - if: ${{ inputs.mode == 'release' }} - working-directory: serving/docker - run: | - export BASE_RELEASE_VERSION="${{ env.SERVING_VERSION }}" - export RELEASE_VERSION="${{ env.SERVING_VERSION }}-" - docker compose build --no-cache \ - --build-arg djl_version=${{ env.DJL_VERSION }} \ - --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ - aarch64 - docker compose push aarch64 + # nightly-aarch64: + # runs-on: [ self-hosted, aarch64 ] + # timeout-minutes: 60 + # needs: create-runner + # steps: + # - uses: actions/checkout@v4 + # - name: Clean docker env + # working-directory: serving/docker + # run: | + # yes | docker system prune -a --volumes + # - name: Login to Docker + # uses: docker/login-action@v3 + # with: + # username: ${{ secrets.DOCKER_USERNAME }} + # password: ${{ secrets.DOCKER_PASSWORD }} + # - name: install awscli + # run: | + # sudo apt-get update + # sudo apt-get install awscli -y + # - name: Configure AWS Credentials + # uses: aws-actions/configure-aws-credentials@v4 + # with: + # role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving + # aws-region: us-east-1 + # - name: Set up JDK 17 + # uses: actions/setup-java@v4 + # with: + # distribution: 'corretto' + # java-version: 17 + # - uses: actions/cache@v4 + # with: + # path: ~/.gradle/caches + # key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} + # - name: Extract DJL and DJL Serving versions from TOML + # id: get-versions + # run: | + # DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) + # SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) + # echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV + # echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV + # - name: Build serving package for nightly + # if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} + # run: | + # ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot + # - name: Build and push nightly docker image + # if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} + # working-directory: serving/docker + # run: | + # export NIGHTLY="-nightly" + # docker compose build --no-cache \ + # --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ + # --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ + # aarch64 + # docker compose push aarch64 + # - name: Build and push temp image + # if: ${{ inputs.mode == 'temp' }} + # working-directory: serving/docker + # run: | + # export NIGHTLY="-nightly" + # docker compose build --no-cache \ + # --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ + # --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ + # aarch64 + # repo="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" + # aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo + # tempTag="$repo:aarch64-${GITHUB_SHA}" + # docker tag deepjavalibrary/djl-serving:aarch64-nightly $tempTag + # docker push $tempTag + # - name: Build and push release docker image + # if: ${{ inputs.mode == 'release' }} + # working-directory: serving/docker + # run: | + # export BASE_RELEASE_VERSION="${{ env.SERVING_VERSION }}" + # export RELEASE_VERSION="${{ env.SERVING_VERSION }}-" + # docker compose build --no-cache \ + # --build-arg djl_version=${{ env.DJL_VERSION }} \ + # --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ + # aarch64 + # docker compose push aarch64 - stop-runner: - if: always() - runs-on: [ self-hosted, scheduler ] - needs: [nightly-aarch64, create-runner] - steps: - - name: Stop all instances - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - instance_id=${{ needs.create-runner.outputs.aarch64_instance_id }} - ./stop_instance.sh $instance_id + # stop-runner: + # if: always() + # runs-on: [ self-hosted, scheduler ] + # needs: [nightly-aarch64, create-runner] + # steps: + # - name: Stop all instances + # run: | + # cd /home/ubuntu/djl_benchmark_script/scripts + # instance_id=${{ needs.create-runner.outputs.aarch64_instance_id }} + # ./stop_instance.sh $instance_id From ec5bf6574226f2a59d2249bb879f82a1d4097604 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Wed, 4 Dec 2024 22:59:18 +0000 Subject: [PATCH 06/62] fix-tag --- .github/workflows/docker-nightly-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index a1102c898..69b5fb2f4 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -88,7 +88,7 @@ jobs: --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ ${{ matrix.arch }} - repo="243947502783.dkr.ecr.us-east-1.amazonaws.com/djl-tmp:latest" + repo="243947502783.dkr.ecr.us-east-1.amazonaws.com/djl-tmp" aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo tempTag="$repo:${{ matrix.arch }}-${GITHUB_SHA}" docker tag deepjavalibrary/djl-serving:${{ matrix.arch }}-nightly $tempTag From 7505dffb8854608c02dda3b33b97adcb33efd2a0 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Wed, 4 Dec 2024 23:56:58 +0000 Subject: [PATCH 07/62] make everything push to ECR --- .github/workflows/docker-nightly-publish.yml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 69b5fb2f4..93aeffa49 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -70,7 +70,6 @@ jobs: run: | ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot - name: Build and push nightly docker image - if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} working-directory: serving/docker run: | export NIGHTLY="-nightly" @@ -79,15 +78,9 @@ jobs: --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ ${{ matrix.arch }} docker compose push ${{ matrix.arch }} - - name: Build and push temp image - if: ${{ inputs.mode == 'temp' }} + - name: Tag and push temp image to ECR repo working-directory: serving/docker run: | - export NIGHTLY="-nightly" - docker compose build --no-cache \ - --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ - --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ - ${{ matrix.arch }} repo="243947502783.dkr.ecr.us-east-1.amazonaws.com/djl-tmp" aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo tempTag="$repo:${{ matrix.arch }}-${GITHUB_SHA}" From 1f17c176ce644cda5aeb300087b9cb3e6c0adc13 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Thu, 5 Dec 2024 00:06:33 +0000 Subject: [PATCH 08/62] add mode in tag --- .github/workflows/docker-nightly-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 93aeffa49..6c8448b3e 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -83,7 +83,7 @@ jobs: run: | repo="243947502783.dkr.ecr.us-east-1.amazonaws.com/djl-tmp" aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo - tempTag="$repo:${{ matrix.arch }}-${GITHUB_SHA}" + tempTag="$repo:${{ matrix.arch }}-${GITHUB_SHA}-${{inputs.mode}}" docker tag deepjavalibrary/djl-serving:${{ matrix.arch }}-nightly $tempTag docker push $tempTag - name: Build and push release docker image From 3a748926389f6e33df01e872431d08096c62c493 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Thu, 5 Dec 2024 00:25:25 +0000 Subject: [PATCH 09/62] add condition to push --- .github/workflows/docker-nightly-publish.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 6c8448b3e..f998e6565 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -77,7 +77,10 @@ jobs: --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ ${{ matrix.arch }} - docker compose push ${{ matrix.arch }} + + if [[ "${{ inputs.mode }}" == "nightly" || -z "${{ inputs.mode }}" ]]; then + docker compose push ${{ matrix.arch }} + fi - name: Tag and push temp image to ECR repo working-directory: serving/docker run: | From f63a7fec833f352bbef776b6f4a8acbd76acc4c6 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Thu, 5 Dec 2024 01:15:16 +0000 Subject: [PATCH 10/62] remove blank lin --- .github/workflows/docker-nightly-publish.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index f998e6565..51a9935fd 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -77,7 +77,6 @@ jobs: --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ ${{ matrix.arch }} - if [[ "${{ inputs.mode }}" == "nightly" || -z "${{ inputs.mode }}" ]]; then docker compose push ${{ matrix.arch }} fi From 8d43fb3979f881ba09a37f184d4250eb01ccf58d Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Fri, 6 Dec 2024 02:08:34 +0000 Subject: [PATCH 11/62] add call integration workflow --- .github/workflows/docker-nightly-publish.yml | 46 ++++++++++++++++++-- .github/workflows/integration.yml | 8 ---- tests/integration/tests.py | 1 - 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 51a9935fd..9c88e7f81 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -7,6 +7,11 @@ on: description: 'release/nightly/temp, default is nightly' required: true default: 'nightly' + skip_nightly_integ_test: + description: 'buld and push the nightly without running integ test' + required: false + default: false + type: boolean workflow_call: inputs: mode: @@ -21,6 +26,10 @@ permissions: id-token: write contents: read +env: + AWS_ECR_REPO: "243947502783.dkr.ecr.us-east-1.amazonaws.com/djl-tmp" + DOCKER_HUB_REPO: "deepjavalibrary/djl-serving" + jobs: nightly-build: runs-on: ubuntu-latest @@ -81,13 +90,17 @@ jobs: docker compose push ${{ matrix.arch }} fi - name: Tag and push temp image to ECR repo + if: ${{ !inputs.skip_nightly_integ_test }} working-directory: serving/docker run: | - repo="243947502783.dkr.ecr.us-east-1.amazonaws.com/djl-tmp" aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo - tempTag="$repo:${{ matrix.arch }}-${GITHUB_SHA}-${{inputs.mode}}" - docker tag deepjavalibrary/djl-serving:${{ matrix.arch }}-nightly $tempTag + tempTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-${GITHUB_SHA}" + docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly $tempTag docker push $tempTag + - name: Push nightly to dockerhub + if: ${{ inputs.skip_nightly_integ_test }} + run: | + docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly - name: Build and push release docker image if: ${{ inputs.mode == 'release' }} working-directory: serving/docker @@ -106,6 +119,33 @@ jobs: docker tag deepjavalibrary/djl-serving:${{ env.SERVING_VERSION }} deepjavalibrary/djl-serving:latest docker push deepjavalibrary/djl-serving:latest + run-integration-tests: + if: ${{ inputs.mode == 'nightly' && !inputs.skip_integ_test }} + needs: [nightly-build] + uses: ./.github/workflows/integration.yml + secrets: inherit + with: + djl-version: temp + + push-to-dockerhub: + runs-on: ubuntu-latest + needs: [run-integration-tests] + strategy: + matrix: + arch: [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi ] + steps: + - name: Login to Docker + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: Pull Image from ECR and Push it to Dockerhub + run: | + aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + tempTag=" ${{env.AWS_ECR_REPO}}:${{ matrix.arch }}-${GITHUB_SHA}" + docker pull $tempTag + docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly + # create-runner: # runs-on: [ self-hosted, scheduler ] # steps: diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 416558c28..a63d37543 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -14,14 +14,6 @@ on: required: false type: string default: '' - override_image_uri: - description: 'Override the default docker image URI' - required: false - type: string - default: '' - schedule: - - cron: '0 15 * * *' - jobs: create-runners: diff --git a/tests/integration/tests.py b/tests/integration/tests.py index 7d85ba2d2..8f1b8b070 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -11,7 +11,6 @@ djl_version = os.environ.get('TEST_DJL_VERSION', '').strip() - def is_applicable_cuda_capability(arch: int) -> bool: import torch if not torch.cuda.is_available(): From 26745402c0c4c5d2310a85dd844536e4082f2022 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Fri, 6 Dec 2024 21:07:36 +0000 Subject: [PATCH 12/62] remove push for testing --- .github/workflows/docker-nightly-publish.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 9c88e7f81..8f03f4b21 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -141,10 +141,11 @@ jobs: password: ${{ secrets.DOCKER_PASSWORD }} - name: Pull Image from ECR and Push it to Dockerhub run: | - aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - tempTag=" ${{env.AWS_ECR_REPO}}:${{ matrix.arch }}-${GITHUB_SHA}" - docker pull $tempTag - docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly + echo TEST FAKE PUSHED + # aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + # tempTag=" ${{env.AWS_ECR_REPO}}:${{ matrix.arch }}-${GITHUB_SHA}" + # docker pull $tempTag + # docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly # create-runner: # runs-on: [ self-hosted, scheduler ] From 4133b2e1687b3a44c563d489f4cc1c8a6c10e5c1 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Fri, 6 Dec 2024 21:38:22 +0000 Subject: [PATCH 13/62] fix push condition --- .github/workflows/docker-nightly-publish.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 8f03f4b21..ffe6c84cd 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -86,11 +86,8 @@ jobs: --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ ${{ matrix.arch }} - if [[ "${{ inputs.mode }}" == "nightly" || -z "${{ inputs.mode }}" ]]; then - docker compose push ${{ matrix.arch }} - fi - name: Tag and push temp image to ECR repo - if: ${{ !inputs.skip_nightly_integ_test }} + if: ${{ !inputs.skip_nightly_integ_test && inputs.mode == 'nightly' }} working-directory: serving/docker run: | aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo @@ -98,9 +95,9 @@ jobs: docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly $tempTag docker push $tempTag - name: Push nightly to dockerhub - if: ${{ inputs.skip_nightly_integ_test }} + if: ${{ inputs.skip_nightly_integ_test && inputs.mode == 'nightly' }} run: | - docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly + docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly - name: Build and push release docker image if: ${{ inputs.mode == 'release' }} working-directory: serving/docker From 850a69a9c21a3644de5ec427571ae46c73b81454 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Fri, 6 Dec 2024 21:44:34 +0000 Subject: [PATCH 14/62] fix repo name --- .github/workflows/docker-nightly-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index ffe6c84cd..bd1ce396f 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -90,7 +90,7 @@ jobs: if: ${{ !inputs.skip_nightly_integ_test && inputs.mode == 'nightly' }} working-directory: serving/docker run: | - aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo + aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} tempTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-${GITHUB_SHA}" docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly $tempTag docker push $tempTag From f182852f8f4c5edbadeaaad2e6d1d0c49c111f23 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Fri, 6 Dec 2024 22:29:49 +0000 Subject: [PATCH 15/62] change repo for testing in djl --- .github/workflows/docker-nightly-publish.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index bd1ce396f..5927d332c 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -27,7 +27,8 @@ permissions: contents: read env: - AWS_ECR_REPO: "243947502783.dkr.ecr.us-east-1.amazonaws.com/djl-tmp" + # AWS_ECR_REPO: "243947502783.dkr.ecr.us-east-1.amazonaws.com/djl-tmp" + AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" DOCKER_HUB_REPO: "deepjavalibrary/djl-serving" jobs: @@ -87,7 +88,7 @@ jobs: --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ ${{ matrix.arch }} - name: Tag and push temp image to ECR repo - if: ${{ !inputs.skip_nightly_integ_test && inputs.mode == 'nightly' }} + if: ${{ !inputs.skip_nightly_integ_test && inputs.mode == 'nightly' || inputs.mode == 'temp'}} working-directory: serving/docker run: | aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} From 6f01ae3067c9939f2585a5e54d7d67e00cf3a206 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Fri, 6 Dec 2024 22:33:09 +0000 Subject: [PATCH 16/62] fix role --- .github/workflows/docker-nightly-publish.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 5927d332c..ebe477184 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -57,7 +57,8 @@ jobs: - name: Configure AWS Credentials uses: aws-actions/configure-aws-credentials@v4 with: - role-to-assume: arn:aws:iam::243947502783:role/github_action_happy_amazonian + # role-to-assume: arn:aws:iam::243947502783:role/github_action_happy_amazonian + role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving aws-region: us-east-1 - name: Set up JDK 17 uses: actions/setup-java@v4 From 5ace20e5b932f48339d69f73903062a08d710075 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Fri, 6 Dec 2024 23:39:42 +0000 Subject: [PATCH 17/62] fix neuron image, disable pytest capture --- .github/workflows/integration.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index a63d37543..965dbd433 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -169,7 +169,7 @@ jobs: env: TEST_DJL_VERSION: ${{ inputs.djl-version }} run: | - python -m pytest -k ${{ matrix.test.test }} tests.py + python -m pytest -s -k ${{ matrix.test.test }} tests.py - name: Cleanup working-directory: tests/integration run: | @@ -212,14 +212,14 @@ jobs: run: | ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} # Set docker image URI based on override or default value - if [ ! -z "${{ inputs.override_image_uri }}" ]; then - DOCKER_IMAGE_URI="${{ inputs.override_image_uri }}" + if [ ${{ github.event.inputs.djl-version }} ]; then + DOCKER_IMAGE_URI="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp:pytorch-inf2-${GITHUB_SHA}" else DOCKER_IMAGE_URI="deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG" fi - name: Download models and dockers run: | - docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG + docker pull $DOCKER_IMAGE_URI - name: Run djl_python unit/integration tests on container working-directory: engines/python/setup run: | @@ -232,7 +232,7 @@ jobs: -v $PWD/:/opt/ml/model/ \ -w /opt/ml/model \ --device=/dev/neuron0:/dev/neuron0 \ - deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG \ + $DOCKER_IMAGE_URI \ /bin/bash -c "'pip install /opt/ml/model/dist/*.whl pytest' && \ pytest djl_python/tests/neuron_test_scripts/ | tee logs/results.log" From e974d655f302241af9e1e4c7efd0aeefee706f70 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Sat, 7 Dec 2024 00:17:14 +0000 Subject: [PATCH 18/62] add docker credential --- .github/workflows/integration.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 965dbd433..0883dc1a1 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -14,6 +14,8 @@ on: required: false type: string default: '' +env: + AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" jobs: create-runners: @@ -164,11 +166,17 @@ jobs: wget https://publish.djl.ai/awscurl/awscurl chmod +x awscurl mkdir outputs + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving + aws-region: us-east-1 - name: Test working-directory: tests/integration env: TEST_DJL_VERSION: ${{ inputs.djl-version }} run: | + aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} python -m pytest -s -k ${{ matrix.test.test }} tests.py - name: Cleanup working-directory: tests/integration @@ -208,6 +216,11 @@ jobs: python-version: '3.10.x' - name: Install pip dependencies run: pip3 install requests numpy pillow wheel + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving + aws-region: us-east-1 - name: Build container name run: | ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} @@ -217,8 +230,10 @@ jobs: else DOCKER_IMAGE_URI="deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG" fi + echo "DOCKER_IMAGE_URI=$DOCKER_IMAGE_URI" >> $GITHUB_ENV - name: Download models and dockers run: | + aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} docker pull $DOCKER_IMAGE_URI - name: Run djl_python unit/integration tests on container working-directory: engines/python/setup From 8de85fad875baaf5840f6a95ca2007d8f0c2d988 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Sat, 7 Dec 2024 01:06:29 +0000 Subject: [PATCH 19/62] change env --- .github/workflows/integration.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 0883dc1a1..4f1c21cc8 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -230,11 +230,14 @@ jobs: else DOCKER_IMAGE_URI="deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG" fi + echo $DOCKER_IMAGE_URI echo "DOCKER_IMAGE_URI=$DOCKER_IMAGE_URI" >> $GITHUB_ENV - name: Download models and dockers run: | aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - docker pull $DOCKER_IMAGE_URI + echo ${{ env.DOCKER_IMAGE_URI }} + echo $DOCKER_IMAGE_URI + docker pull ${{ env.DOCKER_IMAGE_URI }} - name: Run djl_python unit/integration tests on container working-directory: engines/python/setup run: | @@ -247,7 +250,7 @@ jobs: -v $PWD/:/opt/ml/model/ \ -w /opt/ml/model \ --device=/dev/neuron0:/dev/neuron0 \ - $DOCKER_IMAGE_URI \ + ${{ env.DOCKER_IMAGE_URI }} \ /bin/bash -c "'pip install /opt/ml/model/dist/*.whl pytest' && \ pytest djl_python/tests/neuron_test_scripts/ | tee logs/results.log" From 9808a935c3edc11e43fff63ca28f112eb4020353 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Sat, 7 Dec 2024 01:36:51 +0000 Subject: [PATCH 20/62] fix neuron docker tag --- .github/workflows/integration.yml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 4f1c21cc8..64df2bd67 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -224,20 +224,17 @@ jobs: - name: Build container name run: | ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} - # Set docker image URI based on override or default value - if [ ${{ github.event.inputs.djl-version }} ]; then + - name: Download models and dockers + run: | + if [ ${{ github.event.inputs.djl-version }} == "temp" ]; then DOCKER_IMAGE_URI="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp:pytorch-inf2-${GITHUB_SHA}" else DOCKER_IMAGE_URI="deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG" fi - echo $DOCKER_IMAGE_URI - echo "DOCKER_IMAGE_URI=$DOCKER_IMAGE_URI" >> $GITHUB_ENV - - name: Download models and dockers - run: | + echo "DOCKER_IMAGE_URI=$DOCKER_IMAGE_URI" >>$GITHUB_ENV aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - echo ${{ env.DOCKER_IMAGE_URI }} echo $DOCKER_IMAGE_URI - docker pull ${{ env.DOCKER_IMAGE_URI }} + docker pull $DOCKER_IMAGE_URI - name: Run djl_python unit/integration tests on container working-directory: engines/python/setup run: | @@ -250,7 +247,7 @@ jobs: -v $PWD/:/opt/ml/model/ \ -w /opt/ml/model \ --device=/dev/neuron0:/dev/neuron0 \ - ${{ env.DOCKER_IMAGE_URI }} \ + $DOCKER_IMAGE_URI \ /bin/bash -c "'pip install /opt/ml/model/dist/*.whl pytest' && \ pytest djl_python/tests/neuron_test_scripts/ | tee logs/results.log" From 14185c51abfe680875eacb1f84ccdd18060d0662 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 9 Dec 2024 19:09:19 +0000 Subject: [PATCH 21/62] add back aarch build --- .github/workflows/docker-nightly-publish.yml | 209 +++++++++---------- 1 file changed, 104 insertions(+), 105 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index ebe477184..4fe78f538 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -27,7 +27,6 @@ permissions: contents: read env: - # AWS_ECR_REPO: "243947502783.dkr.ecr.us-east-1.amazonaws.com/djl-tmp" AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" DOCKER_HUB_REPO: "deepjavalibrary/djl-serving" @@ -120,7 +119,7 @@ jobs: run-integration-tests: if: ${{ inputs.mode == 'nightly' && !inputs.skip_integ_test }} - needs: [nightly-build] + needs: [nightly-build, nightly-aarch64] uses: ./.github/workflows/integration.yml secrets: inherit with: @@ -146,109 +145,109 @@ jobs: # docker pull $tempTag # docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly - # create-runner: - # runs-on: [ self-hosted, scheduler ] - # steps: - # - name: Create new Graviton instance - # id: create_aarch64 - # run: | - # cd /home/ubuntu/djl_benchmark_script/scripts - # token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ - # https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ - # --fail \ - # | jq '.token' | tr -d '"' ) - # ./start_instance.sh action_graviton $token djl-serving - # outputs: - # aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }} + create-runner: + runs-on: [ self-hosted, scheduler ] + steps: + - name: Create new Graviton instance + id: create_aarch64 + run: | + cd /home/ubuntu/djl_benchmark_script/scripts + token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ + https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ + --fail \ + | jq '.token' | tr -d '"' ) + ./start_instance.sh action_graviton $token djl-serving + outputs: + aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }} - # nightly-aarch64: - # runs-on: [ self-hosted, aarch64 ] - # timeout-minutes: 60 - # needs: create-runner - # steps: - # - uses: actions/checkout@v4 - # - name: Clean docker env - # working-directory: serving/docker - # run: | - # yes | docker system prune -a --volumes - # - name: Login to Docker - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKER_USERNAME }} - # password: ${{ secrets.DOCKER_PASSWORD }} - # - name: install awscli - # run: | - # sudo apt-get update - # sudo apt-get install awscli -y - # - name: Configure AWS Credentials - # uses: aws-actions/configure-aws-credentials@v4 - # with: - # role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving - # aws-region: us-east-1 - # - name: Set up JDK 17 - # uses: actions/setup-java@v4 - # with: - # distribution: 'corretto' - # java-version: 17 - # - uses: actions/cache@v4 - # with: - # path: ~/.gradle/caches - # key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} - # - name: Extract DJL and DJL Serving versions from TOML - # id: get-versions - # run: | - # DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) - # SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) - # echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV - # echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV - # - name: Build serving package for nightly - # if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} - # run: | - # ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot - # - name: Build and push nightly docker image - # if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} - # working-directory: serving/docker - # run: | - # export NIGHTLY="-nightly" - # docker compose build --no-cache \ - # --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ - # --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ - # aarch64 - # docker compose push aarch64 - # - name: Build and push temp image - # if: ${{ inputs.mode == 'temp' }} - # working-directory: serving/docker - # run: | - # export NIGHTLY="-nightly" - # docker compose build --no-cache \ - # --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ - # --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ - # aarch64 - # repo="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" - # aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo - # tempTag="$repo:aarch64-${GITHUB_SHA}" - # docker tag deepjavalibrary/djl-serving:aarch64-nightly $tempTag - # docker push $tempTag - # - name: Build and push release docker image - # if: ${{ inputs.mode == 'release' }} - # working-directory: serving/docker - # run: | - # export BASE_RELEASE_VERSION="${{ env.SERVING_VERSION }}" - # export RELEASE_VERSION="${{ env.SERVING_VERSION }}-" - # docker compose build --no-cache \ - # --build-arg djl_version=${{ env.DJL_VERSION }} \ - # --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ - # aarch64 - # docker compose push aarch64 + nightly-aarch64: + runs-on: [ self-hosted, aarch64 ] + timeout-minutes: 60 + needs: create-runner + steps: + - uses: actions/checkout@v4 + - name: Clean docker env + working-directory: serving/docker + run: | + yes | docker system prune -a --volumes + - name: Login to Docker + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: install awscli + run: | + sudo apt-get update + sudo apt-get install awscli -y + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving + aws-region: us-east-1 + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + distribution: 'corretto' + java-version: 17 + - uses: actions/cache@v4 + with: + path: ~/.gradle/caches + key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} + - name: Extract DJL and DJL Serving versions from TOML + id: get-versions + run: | + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) + SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) + echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV + echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV + - name: Build serving package for nightly + if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} + run: | + ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot + - name: Build and push nightly docker image + if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} + working-directory: serving/docker + run: | + export NIGHTLY="-nightly" + docker compose build --no-cache \ + --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ + --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ + aarch64 + docker compose push aarch64 + - name: Build and push temp image + if: ${{ inputs.mode == 'temp' || inputs.mode == 'nightly' }} + working-directory: serving/docker + run: | + export NIGHTLY="-nightly" + docker compose build --no-cache \ + --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ + --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ + aarch64 + repo="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" + aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + tempTag="${{env.AWS_ECR_REPO}}:aarch64-${GITHUB_SHA}" + docker tag deepjavalibrary/djl-serving:aarch64-nightly $tempTag + docker push $tempTag + - name: Build and push release docker image + if: ${{ inputs.mode == 'release' }} + working-directory: serving/docker + run: | + export BASE_RELEASE_VERSION="${{ env.SERVING_VERSION }}" + export RELEASE_VERSION="${{ env.SERVING_VERSION }}-" + docker compose build --no-cache \ + --build-arg djl_version=${{ env.DJL_VERSION }} \ + --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ + aarch64 + docker compose push aarch64 - # stop-runner: - # if: always() - # runs-on: [ self-hosted, scheduler ] - # needs: [nightly-aarch64, create-runner] - # steps: - # - name: Stop all instances - # run: | - # cd /home/ubuntu/djl_benchmark_script/scripts - # instance_id=${{ needs.create-runner.outputs.aarch64_instance_id }} - # ./stop_instance.sh $instance_id + stop-runner: + if: always() + runs-on: [ self-hosted, scheduler ] + needs: [nightly-aarch64, create-runner] + steps: + - name: Stop all instances + run: | + cd /home/ubuntu/djl_benchmark_script/scripts + instance_id=${{ needs.create-runner.outputs.aarch64_instance_id }} + ./stop_instance.sh $instance_id From 9f4a91a485a48bd60f1e967685fd5351e4db4264 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 9 Dec 2024 20:08:22 +0000 Subject: [PATCH 22/62] fix for PR --- .github/workflows/docker-nightly-publish.yml | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 4fe78f538..316453e70 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -19,8 +19,8 @@ on: type: string required: true default: 'nightly' - # schedule: - # - cron: '0 13 * * *' + schedule: + - cron: '0 13 * * *' permissions: id-token: write @@ -44,11 +44,11 @@ jobs: /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \ $AGENT_TOOLSDIRECTORY - uses: actions/checkout@v4 - # - name: Login to Docker - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKER_USERNAME }} - # password: ${{ secrets.DOCKER_PASSWORD }} + - name: Login to Docker + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} - name: install awscli run: | sudo apt-get update @@ -56,7 +56,6 @@ jobs: - name: Configure AWS Credentials uses: aws-actions/configure-aws-credentials@v4 with: - # role-to-assume: arn:aws:iam::243947502783:role/github_action_happy_amazonian role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving aws-region: us-east-1 - name: Set up JDK 17 From 0adabd09246c796422b78aa56f0a7dfcbda7d0bd Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 9 Dec 2024 20:42:03 +0000 Subject: [PATCH 23/62] add back docker push --- .github/workflows/docker-nightly-publish.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index d5312aa0a..c84eff2dc 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -138,11 +138,10 @@ jobs: password: ${{ secrets.DOCKER_PASSWORD }} - name: Pull Image from ECR and Push it to Dockerhub run: | - echo TEST FAKE PUSHED - # aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - # tempTag=" ${{env.AWS_ECR_REPO}}:${{ matrix.arch }}-${GITHUB_SHA}" - # docker pull $tempTag - # docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly + aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + tempTag=" ${{env.AWS_ECR_REPO}}:${{ matrix.arch }}-${GITHUB_SHA}" + docker pull $tempTag + docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly create-runner: runs-on: [ self-hosted, scheduler ] From 7a75c6632aa1fd7047f4db35ed2c1b2393d78023 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 9 Dec 2024 21:04:40 +0000 Subject: [PATCH 24/62] fix format --- tests/integration/tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/tests.py b/tests/integration/tests.py index 8f1b8b070..7d85ba2d2 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -11,6 +11,7 @@ djl_version = os.environ.get('TEST_DJL_VERSION', '').strip() + def is_applicable_cuda_capability(arch: int) -> bool: import torch if not torch.cuda.is_available(): From c40f9f590bfb5791948bb8b78e283ac6c9465b3a Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Tue, 10 Dec 2024 01:13:17 +0000 Subject: [PATCH 25/62] add the missing tag step --- .github/workflows/docker-nightly-publish.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index c84eff2dc..0acb1b361 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -141,6 +141,7 @@ jobs: aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} tempTag=" ${{env.AWS_ECR_REPO}}:${{ matrix.arch }}-${GITHUB_SHA}" docker pull $tempTag + docker tag $tempTag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly create-runner: From 9fb5574a1b80dc65aa46a37376269513a45941f7 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Thu, 12 Dec 2024 02:06:11 +0000 Subject: [PATCH 26/62] reorg --- .github/workflows/docker-nightly-publish.yml | 153 +++++++++++-------- .github/workflows/integration.yml | 4 +- tests/integration/tests.py | 2 +- 3 files changed, 95 insertions(+), 64 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 0acb1b361..136d5d10f 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -74,75 +74,90 @@ jobs: SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV + - name: set condition env + run: | + BUILD_TEMP_DOCKER_IMAGE=${{ !inputs.skip_nightly_integ_test }} + echo "BUILD_TEMP_DOCKER_IMAGE=$BUILD_TEMP_DOCKER_IMAGE" >> $GITHUB_ENV + + PUSH_TO_DOCKERHUB_DIRECTLY=${{ inputs.skip_nightly_integ_test && !inputs.mode == 'temp'}} + echo "PUSH_TO_DOCKERHUB_DIRECTLY=$PUSH_TO_DOCKERHUB_DIRECTLY" >> $GITHUB_ENV + - name: Build release docker image + if: ${{ inputs.mode == 'release'}} + working-directory: serving/docker + run: | + export BASE_RELEASE_VERSION="${{ env.SERVING_VERSION }}" + export RELEASE_VERSION="${{ env.SERVING_VERSION }}-" + docker compose build --no-cache \ + --build-arg djl_version=${{ env.DJL_VERSION }} \ + --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ + ${{ matrix.arch }} - name: Build serving package for nightly if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} run: | ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot - - name: Build and push nightly docker image + - name: Build temp docker image + if: ${{ env.BUILD_TEMP_DOCKER_IMAGE }} working-directory: serving/docker run: | export NIGHTLY="-nightly" + echo "NIGHTLY=$NIGHTLY" >> $GITHUB_ENV + docker compose build --no-cache \ --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ ${{ matrix.arch }} - name: Tag and push temp image to ECR repo - if: ${{ !inputs.skip_nightly_integ_test && inputs.mode == 'nightly' || inputs.mode == 'temp'}} + if: ${{ env.BUILD_TEMP_DOCKER_IMAGE }} working-directory: serving/docker run: | - aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - tempTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-${GITHUB_SHA}" - docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly $tempTag + ECR_REGION= $(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + tempTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-${GITHUB_SHA}-${GITHUB_RUN_ID}" + docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempTag docker push $tempTag - - name: Push nightly to dockerhub - if: ${{ inputs.skip_nightly_integ_test && inputs.mode == 'nightly' }} - run: | - docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly - - name: Build and push release docker image - if: ${{ inputs.mode == 'release' }} + - name: push docker image to dockerhub working-directory: serving/docker + if: ${{ env.PUSH_TO_DOCKERHUB_DIRECTLY }} run: | - export BASE_RELEASE_VERSION="${{ env.SERVING_VERSION }}" - export RELEASE_VERSION="${{ env.SERVING_VERSION }}-" - docker compose build --no-cache \ - --build-arg djl_version=${{ env.DJL_VERSION }} \ - --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ - ${{ matrix.arch }} - docker compose push ${{ matrix.arch }} - - name: Retag image for release - if: ${{ matrix.arch == 'cpu' && inputs.mode == 'release' }} - working-directory: serving/docker - run: | - docker tag deepjavalibrary/djl-serving:${{ env.SERVING_VERSION }} deepjavalibrary/djl-serving:latest - docker push deepjavalibrary/djl-serving:latest + export NIGHTLY=${{ env.NIGHTLY }} + docker image ls + echo push ${{ matrix.arch }}$NIGHTLY + # docker compose push ${{ matrix.arch }} + if ${{ matrix.arch == 'cpu' && inputs.mode == 'release' }}; then + docker tag deepjavalibrary/djl-serving:${{ env.SERVING_VERSION }} deepjavalibrary/djl-serving:latest + echo push CPU + # docker push deepjavalibrary/djl-serving:latest + fi run-integration-tests: - if: ${{ inputs.mode == 'nightly' && !inputs.skip_integ_test }} + if: ${{ !inputs.skip_nightly_integ_test }} needs: [nightly-build, nightly-aarch64] uses: ./.github/workflows/integration.yml secrets: inherit with: djl-version: temp - push-to-dockerhub: - runs-on: ubuntu-latest - needs: [run-integration-tests] - strategy: - matrix: - arch: [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi ] - steps: - - name: Login to Docker - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - name: Pull Image from ECR and Push it to Dockerhub - run: | - aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - tempTag=" ${{env.AWS_ECR_REPO}}:${{ matrix.arch }}-${GITHUB_SHA}" - docker pull $tempTag - docker tag $tempTag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly - docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly + # push-to-dockerhub: + # if: ${{ inputs.mode != 'temp' && !cancelled()}} + # runs-on: ubuntu-latest + # needs: [run-integration-tests] + # strategy: + # matrix: + # arch: [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi ] + # steps: + # - name: Login to Docker + # uses: docker/login-action@v3 + # with: + # username: ${{ secrets.DOCKER_USERNAME }} + # password: ${{ secrets.DOCKER_PASSWORD }} + # - name: Pull Image from ECR and Push it to Dockerhub + # run: | + # ECR_REGION= $(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + # aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + # tempTag=" ${{env.AWS_ECR_REPO}}:${{ matrix.arch }}-${GITHUB_SHA}" + # docker pull $tempTag + # docker tag $tempTag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly + # docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly create-runner: runs-on: [ self-hosted, scheduler ] @@ -204,45 +219,59 @@ jobs: SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV + - name: set condition env + run: | + BUILD_TEMP_DOCKER_IMAGE=${{ !inputs.skip_nightly_integ_test }} + echo "BUILD_TEMP_DOCKER_IMAGE=$BUILD_TEMP_DOCKER_IMAGE" >> $GITHUB_ENV + + PUSH_TO_DOCKERHUB_DIRECTLY=${{ inputs.skip_nightly_integ_test && !inputs.mode == 'temp'}} + echo "PUSH_TO_DOCKERHUB_DIRECTLY=$PUSH_TO_DOCKERHUB_DIRECTLY" >> $GITHUB_ENV + - name: Build release docker image + if: ${{ inputs.mode == 'release' }} + working-directory: serving/docker + run: | + export BASE_RELEASE_VERSION="${{ env.SERVING_VERSION }}" + export RELEASE_VERSION="${{ env.SERVING_VERSION }}-" + docker compose build --no-cache \ + --build-arg djl_version=${{ env.DJL_VERSION }} \ + --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ + aarch64 - name: Build serving package for nightly if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} run: | ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot - - name: Build and push nightly docker image - if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} + - name: Build temp docker image + if: ${{ env.BUILD_TEMP_DOCKER_IMAGE }} working-directory: serving/docker run: | export NIGHTLY="-nightly" + echo "NIGHTLY=$NIGHTLY" >> $GITHUB_ENV docker compose build --no-cache \ --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ aarch64 - docker compose push aarch64 - name: Build and push temp image - if: ${{ inputs.mode == 'temp' || inputs.mode == 'nightly' }} + if: ${{ env.BUILD_TEMP_DOCKER_IMAGE }} working-directory: serving/docker run: | - export NIGHTLY="-nightly" docker compose build --no-cache \ --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ aarch64 - repo="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" - aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - tempTag="${{env.AWS_ECR_REPO}}:aarch64-${GITHUB_SHA}" - docker tag deepjavalibrary/djl-serving:aarch64-nightly $tempTag + ECR_REGION= $(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + tempTag="${{env.AWS_ECR_REPO}}:aarch64-${GITHUB_SHA}-${GITHUB_RUN_ID}" + docker tag deepjavalibrary/djl-serving:aarch64$NIGHTLY $tempTag docker push $tempTag - - name: Build and push release docker image - if: ${{ inputs.mode == 'release' }} + + - name: push docker image to dockerhub working-directory: serving/docker + if: ${{env.PUSH_TO_DOCKERHUB_DIRECTLY}} run: | - export BASE_RELEASE_VERSION="${{ env.SERVING_VERSION }}" - export RELEASE_VERSION="${{ env.SERVING_VERSION }}-" - docker compose build --no-cache \ - --build-arg djl_version=${{ env.DJL_VERSION }} \ - --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ - aarch64 - docker compose push aarch64 + export NIGHTLY=${{ env.NIGHTLY }} + docker image ls + echo push aarch64 $NIGHTLY + # docker compose push aarch64 stop-runner: diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 3016f5195..6ac562074 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -14,6 +14,7 @@ on: required: false type: string default: '' + env: AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" @@ -191,7 +192,8 @@ jobs: env: TEST_DJL_VERSION: ${{ inputs.djl-version }} run: | - aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + ECR_REGION= $(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} python -m pytest -s -k ${{ matrix.test.test }} tests.py - name: Cleanup working-directory: tests/integration diff --git a/tests/integration/tests.py b/tests/integration/tests.py index 7d85ba2d2..cac76e41e 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -36,7 +36,7 @@ def __init__(self, container, test_name=None, download=False): else: if djl_version == "temp": repo = "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" - flavor = f"{container}-{os.environ['GITHUB_SHA']}" + flavor = f"{container}-{os.environ['GITHUB_SHA']}-{os.environ['GITHUB_RUN_ID']}" else: if container == "cpu": flavor = djl_version From 0bc8a25aa9ccf9bdbfaed2e20e43960a3925d834 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Thu, 12 Dec 2024 02:16:23 +0000 Subject: [PATCH 27/62] fix region var --- .github/workflows/docker-nightly-publish.yml | 6 +++--- .github/workflows/integration.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 136d5d10f..76e229992 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -74,7 +74,7 @@ jobs: SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV - - name: set condition env + - name: Set Condition env run: | BUILD_TEMP_DOCKER_IMAGE=${{ !inputs.skip_nightly_integ_test }} echo "BUILD_TEMP_DOCKER_IMAGE=$BUILD_TEMP_DOCKER_IMAGE" >> $GITHUB_ENV @@ -110,7 +110,7 @@ jobs: if: ${{ env.BUILD_TEMP_DOCKER_IMAGE }} working-directory: serving/docker run: | - ECR_REGION= $(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} tempTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-${GITHUB_SHA}-${GITHUB_RUN_ID}" docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempTag @@ -258,7 +258,7 @@ jobs: --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ aarch64 - ECR_REGION= $(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} tempTag="${{env.AWS_ECR_REPO}}:aarch64-${GITHUB_SHA}-${GITHUB_RUN_ID}" docker tag deepjavalibrary/djl-serving:aarch64$NIGHTLY $tempTag diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6ac562074..7c9750a5d 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -192,7 +192,7 @@ jobs: env: TEST_DJL_VERSION: ${{ inputs.djl-version }} run: | - ECR_REGION= $(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} python -m pytest -s -k ${{ matrix.test.test }} tests.py - name: Cleanup @@ -249,7 +249,7 @@ jobs: - name: Download models and dockers run: | if [ ${{ github.event.inputs.djl-version }} == "temp" ]; then - DOCKER_IMAGE_URI="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp:pytorch-inf2-${GITHUB_SHA}" + DOCKER_IMAGE_URI="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp:pytorch-inf2-${GITHUB_SHA}-${GITHUB_RUN_ID}" else DOCKER_IMAGE_URI="deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG" fi From b02ca24b025c954f2c185f5df3c34b453b0fdbee Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Thu, 12 Dec 2024 22:57:35 +0000 Subject: [PATCH 28/62] test split push --- .github/workflows/docker-nightly-publish.yml | 43 +++---- .github/workflows/integration.yml | 125 ++++++++++++++----- 2 files changed, 117 insertions(+), 51 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 76e229992..2aaef376f 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -137,27 +137,28 @@ jobs: with: djl-version: temp - # push-to-dockerhub: - # if: ${{ inputs.mode != 'temp' && !cancelled()}} - # runs-on: ubuntu-latest - # needs: [run-integration-tests] - # strategy: - # matrix: - # arch: [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi ] - # steps: - # - name: Login to Docker - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKER_USERNAME }} - # password: ${{ secrets.DOCKER_PASSWORD }} - # - name: Pull Image from ECR and Push it to Dockerhub - # run: | - # ECR_REGION= $(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') - # aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - # tempTag=" ${{env.AWS_ECR_REPO}}:${{ matrix.arch }}-${GITHUB_SHA}" - # docker pull $tempTag - # docker tag $tempTag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly - # docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly + push-to-dockerhub: + if: ${{ inputs.mode != 'temp' && !cancelled() }} + runs-on: ubuntu-latest + needs: [run-integration-tests] + # strategy: + # matrix: + # arch: [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi ] + steps: + - name: Login to Docker + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: Pull Image from ECR and Push it to Dockerhub + run: | + echo ${{needs.run-integration-tests.outputs.images-to-push}} + # ECR_REGION= $(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + # aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + # tempTag=" ${{env.AWS_ECR_REPO}}:${{ matrix.arch }}-${GITHUB_SHA}" + # docker pull $tempTag + # docker tag $tempTag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly + # docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly create-runner: runs-on: [ self-hosted, scheduler ] diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 7c9750a5d..5fb7f6eb6 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -14,6 +14,9 @@ on: required: false type: string default: '' + outputs: + images-to-push: + value: ${{ jobs.test_results.outputs.images_to_push }} env: AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" @@ -120,36 +123,36 @@ jobs: instance: aarch64 - test: TestHfHandler instance: g6 - - test: TestTrtLlmHandler1 - instance: g6 - - test: TestTrtLlmHandler2 - instance: g6 - - test: TestSchedulerSingleGPU - instance: g6 - - test: TestSchedulerMultiGPU - instance: g6 - - test: TestLmiDist1 - instance: g6 - - test: TestLmiDist2 - instance: g6 - - test: TestVllm1 - instance: g6 - - test: TestVllmLora - instance: g6 - - test: TestLmiDistLora - instance: g6 - - test: TestNeuronx1 - instance: inf2 - - test: TestNeuronx2 - instance: inf2 - - test: TestNeuronxRollingBatch - instance: inf2 - - test: TestMultiModal - instance: g6 - - test: TestTextEmbedding - instance: g6 - - test: TestLmiDistPipelineParallel - instance: g6 + # - test: TestTrtLlmHandler1 + # instance: g6 + # - test: TestTrtLlmHandler2 + # instance: g6 + # - test: TestSchedulerSingleGPU + # instance: g6 + # - test: TestSchedulerMultiGPU + # instance: g6 + # - test: TestLmiDist1 + # instance: g6 + # - test: TestLmiDist2 + # instance: g6 + # - test: TestVllm1 + # instance: g6 + # - test: TestVllmLora + # instance: g6 + # - test: TestLmiDistLora + # instance: g6 + # - test: TestNeuronx1 + # instance: inf2 + # - test: TestNeuronx2 + # instance: inf2 + # - test: TestNeuronxRollingBatch + # instance: inf2 + # - test: TestMultiModal + # instance: g6 + # - test: TestTextEmbedding + # instance: g6 + # - test: TestLmiDistPipelineParallel + # instance: g6 steps: - uses: actions/checkout@v4 - name: Clean env @@ -214,6 +217,68 @@ jobs: with: name: test-${{ matrix.test.test }}-logs path: tests/integration/all_logs/ + - name: Set test status + if: ${{ always() }} + id: test_status + run: | + if [[ ${{ job.status }} == "success" ]]; then + echo "test_result_${{ matrix.test.test }}=true" >> \$GITHUB_OUTPUT + else + echo "test_result_${{ matrix.test.test }}=false" >> \$GITHUB_OUTPUT + fi + outputs: + test_result_TestCpuFull: ${{ steps.test_status.outputs.test_result_TestCpuFull }} + test_result_TestCpuBoth: ${{ steps.test_status.outputs.test_result_TestCpuBoth }} + test_result_TestGpu: ${{ steps.test_status.outputs.test_result_TestGpu }} + test_result_TestAarch64: ${{ steps.test_status.outputs.test_result_TestAarch64 }} + test_result_TestHfHandler: ${{ steps.test_status.outputs.test_result_TestHfHandler }} + test_result_TestTrtLlmHandler1: ${{ steps.test_status.outputs.test_result_TestTrtLlmHandler1 }} + test_result_TestTrtLlmHandler2: ${{ steps.test_status.outputs.test_result_TestTrtLlmHandler2 }} + test_result_TestSchedulerSingleGPU: ${{ steps.test_status.outputs.test_result_TestSchedulerSingleGPU }} + test_result_TestSchedulerMultiGPU: ${{ steps.test_status.outputs.test_result_TestSchedulerMultiGPU }} + test_result_TestLmiDist1: ${{ steps.test_status.outputs.test_result_TestLmiDist1 }} + test_result_TestLmiDist2: ${{ steps.test_status.outputs.test_result_TestLmiDist2 }} + test_result_TestVllm1: ${{ steps.test_status.outputs.test_result_TestVllm1 }} + test_result_TestVllmLora: ${{ steps.test_status.outputs.test_result_TestVllmLora }} + test_result_TestLmiDistLora: ${{ steps.test_status.outputs.test_result_TestLmiDistLora }} + test_result_TestNeuronx1: ${{ steps.test_status.outputs.test_result_TestNeuronx1 }} + test_result_TestNeuronx2: ${{ steps.test_status.outputs.test_result_TestNeuronx2 }} + test_result_TestNeuronxRollingBatch: ${{ steps.test_status.outputs.test_result_TestNeuronxRollingBatch }} + test_result_TestMultiModal: ${{ steps.test_status.outputs.test_result_TestMultiModal }} + test_result_TestTextEmbedding: ${{ steps.test_status.outputs.test_result_TestTextEmbedding }} + test_result_TestLmiDistPipelineParallel: ${{ steps.test_status.outputs.test_result_TestLmiDistPipelineParallel }} + test_result_sheteng_not_Exist: ${{ steps.test_status.outputs.not_existing }} + + test_results: + runs-on: ubuntu-latest + timeout-minutes: 5 + needs: [ test, transformers-neuronx-container-unit-tests ] + steps: + - name: summarize + id: summarize_passing_image + run: | + declare -a image_list=() + + echo needs.test.outputs.test_result_sheteng_not_Exist + echo ${{needs.test.outputs.test_result_sheteng_not_Exist}} + + if [[ + ${{ needs.test.outputs.test_result_TestCpuBoth }} && + ${{ needs.test.outputs.test_result_TestCpuFull }} + ]]; then + image_list+=("cpu") + fi + + if [[ + ${{ needs.test.outputs.test_result_TestNeuronx1 }} && + ${{ needs.test.outputs.test_result_TestNeuronx2 }} + ]]; then + push_neron_image=true + fi + json_array=\$(printf '%s\n' "\${image_list[@]}" | jq -R . | jq -s .) + echo "image_list=$json_array" >> \$GITHUB_OUTPUT + outputs: + images_to_push: ${{ steps.summarize_passing_image.outputs.image_list }} transformers-neuronx-container-unit-tests: runs-on: From a0a1b16d8b414722f6d2f77021696581158f21e7 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Fri, 13 Dec 2024 01:22:32 +0000 Subject: [PATCH 29/62] fix cli --- .github/workflows/docker-nightly-publish.yml | 6 +++--- .github/workflows/integration.yml | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 2aaef376f..cd01e95e9 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -141,9 +141,9 @@ jobs: if: ${{ inputs.mode != 'temp' && !cancelled() }} runs-on: ubuntu-latest needs: [run-integration-tests] - # strategy: - # matrix: - # arch: [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi ] + strategy: + matrix: + arch: ${{ fromJson(needs.run-integration-tests.outputs.images-to-push) }} steps: - name: Login to Docker uses: docker/login-action@v3 diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 5fb7f6eb6..b7239bb26 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -161,6 +161,10 @@ jobs: sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ echo "wait dpkg lock..." while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done + - name: install awscli + run: | + sudo apt-get update + sudo apt-get install awscli -y - name: Set up Python3 if: ${{ matrix.test.instance != 'aarch64' }} uses: actions/setup-python@v5 @@ -221,6 +225,7 @@ jobs: if: ${{ always() }} id: test_status run: | + echo ${{ job.status }} if [[ ${{ job.status }} == "success" ]]; then echo "test_result_${{ matrix.test.test }}=true" >> \$GITHUB_OUTPUT else @@ -250,6 +255,7 @@ jobs: test_result_sheteng_not_Exist: ${{ steps.test_status.outputs.not_existing }} test_results: + if: ${{ !cancelled() }} runs-on: ubuntu-latest timeout-minutes: 5 needs: [ test, transformers-neuronx-container-unit-tests ] From 7dfb9c30a8cf801068a78927312373562ab3152a Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Fri, 13 Dec 2024 03:40:05 +0000 Subject: [PATCH 30/62] fix syntax + add time --- .github/workflows/docker-nightly-publish.yml | 2 +- .github/workflows/integration.yml | 24 +++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index cd01e95e9..b4d52b548 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -114,7 +114,7 @@ jobs: aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} tempTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-${GITHUB_SHA}-${GITHUB_RUN_ID}" docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempTag - docker push $tempTag + time docker push $tempTag - name: push docker image to dockerhub working-directory: serving/docker if: ${{ env.PUSH_TO_DOCKERHUB_DIRECTLY }} diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index b7239bb26..ae673c577 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -268,17 +268,13 @@ jobs: echo needs.test.outputs.test_result_sheteng_not_Exist echo ${{needs.test.outputs.test_result_sheteng_not_Exist}} - if [[ - ${{ needs.test.outputs.test_result_TestCpuBoth }} && - ${{ needs.test.outputs.test_result_TestCpuFull }} - ]]; then + if [ "${{ needs.test.outputs.test_result_TestCpuBoth }}" = "true" ] && + [ "${{ needs.test.outputs.test_result_TestCpuFull }}" = "true" ]; then image_list+=("cpu") fi - if [[ - ${{ needs.test.outputs.test_result_TestNeuronx1 }} && - ${{ needs.test.outputs.test_result_TestNeuronx2 }} - ]]; then + if [ "${{ needs.test.outputs.test_result_TestNeuronx1 }}" == "true"] && + [ "${{ needs.test.outputs.test_result_TestNeuronx1 }}" = "true" ]; then push_neron_image=true fi json_array=\$(printf '%s\n' "\${image_list[@]}" | jq -R . | jq -s .) @@ -359,6 +355,18 @@ jobs: with: name: transformers-neuronx-${{ matrix.arch }}-logs path: engines/python/setup/logs/ + - name: Set test status + if: ${{ always() }} + id: test_status + run: | + echo ${{ job.status }} + if [[ ${{ job.status }} == "success" ]]; then + echo "test_result_Transformer_Neuron_UnitTest=true" >> \$GITHUB_OUTPUT + else + echo "test_result_${{ matrix.test.test }}=false" >> \$GITHUB_OUTPUT + fi + outputs: + test_result_Transformer_Neuron_UnitTest: ${{ steps.test_status.outputs.test_result_Transformer_Neuron_UnitTest }} stop-runners: if: always() From 86bd0d92539b98c3deecb09f2240e9724e9dac7b Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Fri, 13 Dec 2024 05:19:33 +0000 Subject: [PATCH 31/62] fix syntax error --- .github/workflows/integration.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index ae673c577..ac4925a73 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -268,17 +268,17 @@ jobs: echo needs.test.outputs.test_result_sheteng_not_Exist echo ${{needs.test.outputs.test_result_sheteng_not_Exist}} - if [ "${{ needs.test.outputs.test_result_TestCpuBoth }}" = "true" ] && + if [ "${{ needs.test.outputs.test_result_TestCpuBoth }}" = "true" ] && \ [ "${{ needs.test.outputs.test_result_TestCpuFull }}" = "true" ]; then image_list+=("cpu") fi - if [ "${{ needs.test.outputs.test_result_TestNeuronx1 }}" == "true"] && + if [ "${{ needs.test.outputs.test_result_TestNeuronx1 }}" = "true" ] && \ [ "${{ needs.test.outputs.test_result_TestNeuronx1 }}" = "true" ]; then push_neron_image=true fi - json_array=\$(printf '%s\n' "\${image_list[@]}" | jq -R . | jq -s .) - echo "image_list=$json_array" >> \$GITHUB_OUTPUT + json_array=$(printf '%s\n' "${image_list[@]}" | jq -R . | jq -s .) + echo "image_list=$json_array" >> $GITHUB_OUTPUT outputs: images_to_push: ${{ steps.summarize_passing_image.outputs.image_list }} From c28e39246ecc8cb553f4fa53936c769f317a28d3 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Fri, 13 Dec 2024 05:20:19 +0000 Subject: [PATCH 32/62] remove test for faster test --- .github/workflows/integration.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index ac4925a73..241c9c6d0 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -117,12 +117,12 @@ jobs: - test: TestCpuBoth instance: ubuntu-latest gh-runner: true - - test: TestGpu - instance: g6 + # - test: TestGpu + # instance: g6 - test: TestAarch64 instance: aarch64 - - test: TestHfHandler - instance: g6 + # - test: TestHfHandler + # instance: g6 # - test: TestTrtLlmHandler1 # instance: g6 # - test: TestTrtLlmHandler2 From b1b08714971758fcee12ea028d1d272b773d5f62 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Sat, 14 Dec 2024 00:51:11 +0000 Subject: [PATCH 33/62] build,test --- .github/workflows/docker-nightly-publish.yml | 120 +++++-------------- .github/workflows/integration.yml | 16 ++- tests/integration/tests.py | 7 +- 3 files changed, 46 insertions(+), 97 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index b4d52b548..3c595a34e 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -7,11 +7,11 @@ on: description: 'release/nightly/temp, default is nightly' required: true default: 'nightly' - skip_nightly_integ_test: - description: 'buld and push the nightly without running integ test' + arch: + description: 'which images to build [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi, aarch64]' + type: string required: false - default: false - type: boolean + default: '["cpu", "cpu-full", "pytorch-inf2", "pytorch-gpu", "tensorrt-llm", "lmi"]' workflow_call: inputs: mode: @@ -19,6 +19,11 @@ on: type: string required: true default: 'nightly' + arch: + description: 'which images to build [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi, aarch64]' + type: string + required: false + default: '["cpu", "cpu-full", "pytorch-inf2", "pytorch-gpu", "tensorrt-llm", "lmi"]' schedule: - cron: '0 13 * * *' @@ -35,7 +40,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - arch: [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi ] + arch: ${{ toJson(inputs.arch) }} steps: - name: Clean disk space run: | @@ -44,11 +49,6 @@ jobs: /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \ $AGENT_TOOLSDIRECTORY - uses: actions/checkout@v4 - - name: Login to Docker - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - name: install awscli run: | sudo apt-get update @@ -74,13 +74,6 @@ jobs: SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV - - name: Set Condition env - run: | - BUILD_TEMP_DOCKER_IMAGE=${{ !inputs.skip_nightly_integ_test }} - echo "BUILD_TEMP_DOCKER_IMAGE=$BUILD_TEMP_DOCKER_IMAGE" >> $GITHUB_ENV - - PUSH_TO_DOCKERHUB_DIRECTLY=${{ inputs.skip_nightly_integ_test && !inputs.mode == 'temp'}} - echo "PUSH_TO_DOCKERHUB_DIRECTLY=$PUSH_TO_DOCKERHUB_DIRECTLY" >> $GITHUB_ENV - name: Build release docker image if: ${{ inputs.mode == 'release'}} working-directory: serving/docker @@ -96,69 +89,29 @@ jobs: run: | ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot - name: Build temp docker image - if: ${{ env.BUILD_TEMP_DOCKER_IMAGE }} + if: ${{ inputs.mode == 'temp' || inputs.mode == 'nightly'}} working-directory: serving/docker run: | export NIGHTLY="-nightly" echo "NIGHTLY=$NIGHTLY" >> $GITHUB_ENV - docker compose build --no-cache \ --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ ${{ matrix.arch }} - name: Tag and push temp image to ECR repo - if: ${{ env.BUILD_TEMP_DOCKER_IMAGE }} working-directory: serving/docker run: | ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - tempTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-${GITHUB_SHA}-${GITHUB_RUN_ID}" - docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempTag - time docker push $tempTag - - name: push docker image to dockerhub - working-directory: serving/docker - if: ${{ env.PUSH_TO_DOCKERHUB_DIRECTLY }} - run: | - export NIGHTLY=${{ env.NIGHTLY }} - docker image ls - echo push ${{ matrix.arch }}$NIGHTLY - # docker compose push ${{ matrix.arch }} - if ${{ matrix.arch == 'cpu' && inputs.mode == 'release' }}; then - docker tag deepjavalibrary/djl-serving:${{ env.SERVING_VERSION }} deepjavalibrary/djl-serving:latest - echo push CPU - # docker push deepjavalibrary/djl-serving:latest - fi + tempRunIdTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-${{ inputs.mode }}-${GITHUB_RUN_ID}" + tempCommitTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-${{ inputs.mode }}-${GITHUB_SHA}" - run-integration-tests: - if: ${{ !inputs.skip_nightly_integ_test }} - needs: [nightly-build, nightly-aarch64] - uses: ./.github/workflows/integration.yml - secrets: inherit - with: - djl-version: temp - - push-to-dockerhub: - if: ${{ inputs.mode != 'temp' && !cancelled() }} - runs-on: ubuntu-latest - needs: [run-integration-tests] - strategy: - matrix: - arch: ${{ fromJson(needs.run-integration-tests.outputs.images-to-push) }} - steps: - - name: Login to Docker - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - name: Pull Image from ECR and Push it to Dockerhub - run: | - echo ${{needs.run-integration-tests.outputs.images-to-push}} - # ECR_REGION= $(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') - # aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - # tempTag=" ${{env.AWS_ECR_REPO}}:${{ matrix.arch }}-${GITHUB_SHA}" - # docker pull $tempTag - # docker tag $tempTag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly - # docker push ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}-nightly + docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempRunIdTag + docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempCommitTag + if ${{ inputs.mode == 'nightly' }}; then + docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-nightly + fi + time docker push --all-tags ${{ env.DOCKER_HUB_REPO }} create-runner: runs-on: [ self-hosted, scheduler ] @@ -190,11 +143,6 @@ jobs: working-directory: serving/docker run: | yes | docker system prune -a --volumes - - name: Login to Docker - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - name: install awscli run: | sudo apt-get update @@ -220,13 +168,6 @@ jobs: SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV - - name: set condition env - run: | - BUILD_TEMP_DOCKER_IMAGE=${{ !inputs.skip_nightly_integ_test }} - echo "BUILD_TEMP_DOCKER_IMAGE=$BUILD_TEMP_DOCKER_IMAGE" >> $GITHUB_ENV - - PUSH_TO_DOCKERHUB_DIRECTLY=${{ inputs.skip_nightly_integ_test && !inputs.mode == 'temp'}} - echo "PUSH_TO_DOCKERHUB_DIRECTLY=$PUSH_TO_DOCKERHUB_DIRECTLY" >> $GITHUB_ENV - name: Build release docker image if: ${{ inputs.mode == 'release' }} working-directory: serving/docker @@ -242,7 +183,7 @@ jobs: run: | ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot - name: Build temp docker image - if: ${{ env.BUILD_TEMP_DOCKER_IMAGE }} + if: ${{ inputs.mode == 'temp' || inputs.mode == 'nightly'}} working-directory: serving/docker run: | export NIGHTLY="-nightly" @@ -252,7 +193,6 @@ jobs: --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ aarch64 - name: Build and push temp image - if: ${{ env.BUILD_TEMP_DOCKER_IMAGE }} working-directory: serving/docker run: | docker compose build --no-cache \ @@ -261,19 +201,15 @@ jobs: aarch64 ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - tempTag="${{env.AWS_ECR_REPO}}:aarch64-${GITHUB_SHA}-${GITHUB_RUN_ID}" - docker tag deepjavalibrary/djl-serving:aarch64$NIGHTLY $tempTag - docker push $tempTag - - - name: push docker image to dockerhub - working-directory: serving/docker - if: ${{env.PUSH_TO_DOCKERHUB_DIRECTLY}} - run: | - export NIGHTLY=${{ env.NIGHTLY }} - docker image ls - echo push aarch64 $NIGHTLY - # docker compose push aarch64 + tempRunIdTag="${{ env.AWS_ECR_REPO }}aarch64-${{ inputs.mode }}-${GITHUB_RUN_ID}" + tempCommitTag="${{ env.AWS_ECR_REPO }}:aarch64-${{ inputs.mode }}-${GITHUB_SHA}" + docker tag ${{ env.DOCKER_HUB_REPO }}:aarch64}${{ env.NIGHTLY }} $tempRunIdTag + docker tag ${{ env.DOCKER_HUB_REPO }}:aarch64${{ env.NIGHTLY }} $tempCommitTag + if ${{ inputs.mode == 'nightly' }}; then + docker tag ${{ env.DOCKER_HUB_REPO }}:aarch64${{ env.NIGHTLY }} $${{ env.AWS_ECR_REPO }}:aarch64-nightly + fi + time docker push --all-tags ${{ env.DOCKER_HUB_REPO }} stop-runner: if: always() diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 241c9c6d0..d5428b0d2 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -7,6 +7,11 @@ on: description: 'The released version of DJL' required: false default: '' + tag-suffix: + description: 'Run tests on the specific tags suffix i.e. arch-{suffix}' + required: false + type: string + default: 'nightly' workflow_call: inputs: djl-version: @@ -14,6 +19,11 @@ on: required: false type: string default: '' + tag-suffix: + description: 'Run tests on the specific tags suffix i.e. arch-{suffix}' + required: false + type: string + default: 'nightly' outputs: images-to-push: value: ${{ jobs.test_results.outputs.images_to_push }} @@ -121,8 +131,8 @@ jobs: # instance: g6 - test: TestAarch64 instance: aarch64 - # - test: TestHfHandler - # instance: g6 + - test: TestHfHandler + instance: g6 # - test: TestTrtLlmHandler1 # instance: g6 # - test: TestTrtLlmHandler2 @@ -198,6 +208,8 @@ jobs: working-directory: tests/integration env: TEST_DJL_VERSION: ${{ inputs.djl-version }} + OVERRIDE_IMAGE_TAG_SUFFIX: ${{ inputs.tag-suffix }} + IMAGE_REPO: ${{ env.AWS_ECR_REPO }} run: | ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} diff --git a/tests/integration/tests.py b/tests/integration/tests.py index cac76e41e..f067a0c85 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -10,6 +10,8 @@ import test_client djl_version = os.environ.get('TEST_DJL_VERSION', '').strip() +override_image_tag_suffix = os.environ.get('OVERRIDE_IMAGE_TAG_SUFFIX', '').strip() +image_repo = os.environ.get('IMAGE_REPO', '').strip() def is_applicable_cuda_capability(arch: int) -> bool: @@ -29,14 +31,13 @@ def __init__(self, container, test_name=None, download=False): self.client_file_handler = None # Compute flavor and repo - repo = "deepjavalibrary/djl-serving" + repo = image_repo if djl_version is None or len( djl_version) == 0 or djl_version == "nightly": flavor = f"{container}-nightly" else: if djl_version == "temp": - repo = "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" - flavor = f"{container}-{os.environ['GITHUB_SHA']}-{os.environ['GITHUB_RUN_ID']}" + flavor = f"{container}-temp-{os.environ['GITHUB_SHA']}" else: if container == "cpu": flavor = djl_version From 277483ab45f754e9e934bdc7de2e48448951e9d3 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Sat, 14 Dec 2024 00:54:04 +0000 Subject: [PATCH 34/62] fix matrix value --- .github/workflows/docker-nightly-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 3c595a34e..9d704f716 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -40,7 +40,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - arch: ${{ toJson(inputs.arch) }} + arch: ${{ fromJson(inputs.arch) }} steps: - name: Clean disk space run: | From 60cd26d7b3f9502fd1241cd9cbcedd6ab0bb0e55 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Sat, 14 Dec 2024 01:00:49 +0000 Subject: [PATCH 35/62] fix push .github/workflows/docker-nightly-publish.yml --- .github/workflows/docker-nightly-publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 9d704f716..6b1ae576a 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -111,7 +111,7 @@ jobs: if ${{ inputs.mode == 'nightly' }}; then docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-nightly fi - time docker push --all-tags ${{ env.DOCKER_HUB_REPO }} + time docker push --all-tags ${{ env.AWS_ECR_REPO }} create-runner: runs-on: [ self-hosted, scheduler ] @@ -209,7 +209,7 @@ jobs: if ${{ inputs.mode == 'nightly' }}; then docker tag ${{ env.DOCKER_HUB_REPO }}:aarch64${{ env.NIGHTLY }} $${{ env.AWS_ECR_REPO }}:aarch64-nightly fi - time docker push --all-tags ${{ env.DOCKER_HUB_REPO }} + time docker push --all-tags ${{ env.AWS_ECR_REPO }} stop-runner: if: always() From 45092e43c4c7624f63afe6bfaf16ee89f80184eb Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Sat, 14 Dec 2024 01:12:34 +0000 Subject: [PATCH 36/62] fix uri tag --- .github/workflows/docker-nightly-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 6b1ae576a..02d45fd94 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -109,7 +109,7 @@ jobs: docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempRunIdTag docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempCommitTag if ${{ inputs.mode == 'nightly' }}; then - docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-nightly + docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} ${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-nightly fi time docker push --all-tags ${{ env.AWS_ECR_REPO }} From 66466fd878d938fa73c1432a8419314d84405098 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Sat, 14 Dec 2024 01:16:40 +0000 Subject: [PATCH 37/62] fix typo --- .github/workflows/docker-nightly-publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 02d45fd94..891fc5331 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -204,10 +204,10 @@ jobs: tempRunIdTag="${{ env.AWS_ECR_REPO }}aarch64-${{ inputs.mode }}-${GITHUB_RUN_ID}" tempCommitTag="${{ env.AWS_ECR_REPO }}:aarch64-${{ inputs.mode }}-${GITHUB_SHA}" - docker tag ${{ env.DOCKER_HUB_REPO }}:aarch64}${{ env.NIGHTLY }} $tempRunIdTag + docker tag ${{ env.DOCKER_HUB_REPO }}:aarch64${{ env.NIGHTLY }} $tempRunIdTag docker tag ${{ env.DOCKER_HUB_REPO }}:aarch64${{ env.NIGHTLY }} $tempCommitTag if ${{ inputs.mode == 'nightly' }}; then - docker tag ${{ env.DOCKER_HUB_REPO }}:aarch64${{ env.NIGHTLY }} $${{ env.AWS_ECR_REPO }}:aarch64-nightly + docker tag ${{ env.DOCKER_HUB_REPO }}:aarch64${{ env.NIGHTLY }} ${{ env.AWS_ECR_REPO }}:aarch64-nightly fi time docker push --all-tags ${{ env.AWS_ECR_REPO }} From 1dd747bf235c91d6d9b005ae1b22ab6ff4674d4d Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Sat, 14 Dec 2024 02:36:33 +0000 Subject: [PATCH 38/62] fix aws permisioon --- .github/workflows/integration.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index d5428b0d2..59e45d91d 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -28,6 +28,10 @@ on: images-to-push: value: ${{ jobs.test_results.outputs.images_to_push }} +permissions: + id-token: write + contents: read + env: AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" From 49322cb6c2b1d26ecd6675186994a67da363244c Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 16 Dec 2024 18:09:54 +0000 Subject: [PATCH 39/62] add tests back --- .github/workflows/integration.yml | 120 ++++++++---------------------- 1 file changed, 30 insertions(+), 90 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 59e45d91d..841123b79 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -137,36 +137,36 @@ jobs: instance: aarch64 - test: TestHfHandler instance: g6 - # - test: TestTrtLlmHandler1 - # instance: g6 - # - test: TestTrtLlmHandler2 - # instance: g6 - # - test: TestSchedulerSingleGPU - # instance: g6 - # - test: TestSchedulerMultiGPU - # instance: g6 - # - test: TestLmiDist1 - # instance: g6 - # - test: TestLmiDist2 - # instance: g6 - # - test: TestVllm1 - # instance: g6 - # - test: TestVllmLora - # instance: g6 - # - test: TestLmiDistLora - # instance: g6 - # - test: TestNeuronx1 - # instance: inf2 - # - test: TestNeuronx2 - # instance: inf2 - # - test: TestNeuronxRollingBatch - # instance: inf2 - # - test: TestMultiModal - # instance: g6 - # - test: TestTextEmbedding - # instance: g6 - # - test: TestLmiDistPipelineParallel - # instance: g6 + - test: TestTrtLlmHandler1 + instance: g6 + - test: TestTrtLlmHandler2 + instance: g6 + - test: TestSchedulerSingleGPU + instance: g6 + - test: TestSchedulerMultiGPU + instance: g6 + - test: TestLmiDist1 + instance: g6 + - test: TestLmiDist2 + instance: g6 + - test: TestVllm1 + instance: g6 + - test: TestVllmLora + instance: g6 + - test: TestLmiDistLora + instance: g6 + - test: TestNeuronx1 + instance: inf2 + - test: TestNeuronx2 + instance: inf2 + - test: TestNeuronxRollingBatch + instance: inf2 + - test: TestMultiModal + instance: g6 + - test: TestTextEmbedding + instance: g6 + - test: TestLmiDistPipelineParallel + instance: g6 steps: - uses: actions/checkout@v4 - name: Clean env @@ -237,66 +237,6 @@ jobs: with: name: test-${{ matrix.test.test }}-logs path: tests/integration/all_logs/ - - name: Set test status - if: ${{ always() }} - id: test_status - run: | - echo ${{ job.status }} - if [[ ${{ job.status }} == "success" ]]; then - echo "test_result_${{ matrix.test.test }}=true" >> \$GITHUB_OUTPUT - else - echo "test_result_${{ matrix.test.test }}=false" >> \$GITHUB_OUTPUT - fi - outputs: - test_result_TestCpuFull: ${{ steps.test_status.outputs.test_result_TestCpuFull }} - test_result_TestCpuBoth: ${{ steps.test_status.outputs.test_result_TestCpuBoth }} - test_result_TestGpu: ${{ steps.test_status.outputs.test_result_TestGpu }} - test_result_TestAarch64: ${{ steps.test_status.outputs.test_result_TestAarch64 }} - test_result_TestHfHandler: ${{ steps.test_status.outputs.test_result_TestHfHandler }} - test_result_TestTrtLlmHandler1: ${{ steps.test_status.outputs.test_result_TestTrtLlmHandler1 }} - test_result_TestTrtLlmHandler2: ${{ steps.test_status.outputs.test_result_TestTrtLlmHandler2 }} - test_result_TestSchedulerSingleGPU: ${{ steps.test_status.outputs.test_result_TestSchedulerSingleGPU }} - test_result_TestSchedulerMultiGPU: ${{ steps.test_status.outputs.test_result_TestSchedulerMultiGPU }} - test_result_TestLmiDist1: ${{ steps.test_status.outputs.test_result_TestLmiDist1 }} - test_result_TestLmiDist2: ${{ steps.test_status.outputs.test_result_TestLmiDist2 }} - test_result_TestVllm1: ${{ steps.test_status.outputs.test_result_TestVllm1 }} - test_result_TestVllmLora: ${{ steps.test_status.outputs.test_result_TestVllmLora }} - test_result_TestLmiDistLora: ${{ steps.test_status.outputs.test_result_TestLmiDistLora }} - test_result_TestNeuronx1: ${{ steps.test_status.outputs.test_result_TestNeuronx1 }} - test_result_TestNeuronx2: ${{ steps.test_status.outputs.test_result_TestNeuronx2 }} - test_result_TestNeuronxRollingBatch: ${{ steps.test_status.outputs.test_result_TestNeuronxRollingBatch }} - test_result_TestMultiModal: ${{ steps.test_status.outputs.test_result_TestMultiModal }} - test_result_TestTextEmbedding: ${{ steps.test_status.outputs.test_result_TestTextEmbedding }} - test_result_TestLmiDistPipelineParallel: ${{ steps.test_status.outputs.test_result_TestLmiDistPipelineParallel }} - test_result_sheteng_not_Exist: ${{ steps.test_status.outputs.not_existing }} - - test_results: - if: ${{ !cancelled() }} - runs-on: ubuntu-latest - timeout-minutes: 5 - needs: [ test, transformers-neuronx-container-unit-tests ] - steps: - - name: summarize - id: summarize_passing_image - run: | - declare -a image_list=() - - echo needs.test.outputs.test_result_sheteng_not_Exist - echo ${{needs.test.outputs.test_result_sheteng_not_Exist}} - - if [ "${{ needs.test.outputs.test_result_TestCpuBoth }}" = "true" ] && \ - [ "${{ needs.test.outputs.test_result_TestCpuFull }}" = "true" ]; then - image_list+=("cpu") - fi - - if [ "${{ needs.test.outputs.test_result_TestNeuronx1 }}" = "true" ] && \ - [ "${{ needs.test.outputs.test_result_TestNeuronx1 }}" = "true" ]; then - push_neron_image=true - fi - json_array=$(printf '%s\n' "${image_list[@]}" | jq -R . | jq -s .) - echo "image_list=$json_array" >> $GITHUB_OUTPUT - outputs: - images_to_push: ${{ steps.summarize_passing_image.outputs.image_list }} transformers-neuronx-container-unit-tests: runs-on: From 037488710eba7e1ad85e91c77e6910ac5b606706 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 16 Dec 2024 18:21:00 +0000 Subject: [PATCH 40/62] fix typo --- .github/workflows/integration.yml | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 841123b79..6cafd0727 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -24,9 +24,8 @@ on: required: false type: string default: 'nightly' - outputs: - images-to-push: - value: ${{ jobs.test_results.outputs.images_to_push }} + schedule: + - cron: '0 15 * * *' permissions: id-token: write @@ -311,18 +310,6 @@ jobs: with: name: transformers-neuronx-${{ matrix.arch }}-logs path: engines/python/setup/logs/ - - name: Set test status - if: ${{ always() }} - id: test_status - run: | - echo ${{ job.status }} - if [[ ${{ job.status }} == "success" ]]; then - echo "test_result_Transformer_Neuron_UnitTest=true" >> \$GITHUB_OUTPUT - else - echo "test_result_${{ matrix.test.test }}=false" >> \$GITHUB_OUTPUT - fi - outputs: - test_result_Transformer_Neuron_UnitTest: ${{ steps.test_status.outputs.test_result_Transformer_Neuron_UnitTest }} stop-runners: if: always() From 086065b0cc0274a62550d0812856c605ba6a0919 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 16 Dec 2024 18:37:32 +0000 Subject: [PATCH 41/62] add override image suffix in tag --- tests/integration/tests.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/tests.py b/tests/integration/tests.py index f067a0c85..e35a1ab47 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -43,6 +43,8 @@ def __init__(self, container, test_name=None, download=False): flavor = djl_version else: flavor = f"{djl_version}-{container}" + if override_image_tag_suffix: + flavor = f"{container}-{override_image_tag_suffix}" self.image = f"{repo}:{flavor}" From 03f6e172bfa5188825009eac3ab8722d24b15c96 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 16 Dec 2024 19:27:25 +0000 Subject: [PATCH 42/62] fix neuron image --- .github/workflows/integration.yml | 13 ++++++------- tests/integration/launch_container.sh | 2 ++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6cafd0727..677aaaf57 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -130,8 +130,8 @@ jobs: - test: TestCpuBoth instance: ubuntu-latest gh-runner: true - # - test: TestGpu - # instance: g6 + - test: TestGpu + instance: g6 - test: TestAarch64 instance: aarch64 - test: TestHfHandler @@ -265,15 +265,14 @@ jobs: with: role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving aws-region: us-east-1 - - name: Build container name - run: | - ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} - name: Download models and dockers run: | if [ ${{ github.event.inputs.djl-version }} == "temp" ]; then - DOCKER_IMAGE_URI="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp:pytorch-inf2-${GITHUB_SHA}-${GITHUB_RUN_ID}" + DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}/djl-ci-temp:pytorch-inf2-temp-${GITHUB_SHA}" + elif [ -n "${{ inputs.tag-suffix }}" ]; then + DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}/djl-ci-temp:pytorch-inf2-${{ inputs.tag-suffix }}" else - DOCKER_IMAGE_URI="deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG" + DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}/djl-ci-temp:pytorch-inf2-nightly" fi echo "DOCKER_IMAGE_URI=$DOCKER_IMAGE_URI" >>$GITHUB_ENV aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} diff --git a/tests/integration/launch_container.sh b/tests/integration/launch_container.sh index 94d668754..860f50acd 100755 --- a/tests/integration/launch_container.sh +++ b/tests/integration/launch_container.sh @@ -11,6 +11,8 @@ model_path=$2 #required platform=$3 #required args=${@:4} #optional +echo launch_container.sh: using docker image: $docker_image + is_sm_neo_context=false if [[ $4 == "sm_neo_context" ]]; then is_sm_neo_context=true From 186e602cb23a5a7361a2f821e8e20f72eb6be27c Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 16 Dec 2024 19:50:10 +0000 Subject: [PATCH 43/62] fix condition in neuron ut --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 677aaaf57..7f6bf49a5 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -267,7 +267,7 @@ jobs: aws-region: us-east-1 - name: Download models and dockers run: | - if [ ${{ github.event.inputs.djl-version }} == "temp" ]; then + if [ "${{ github.event.inputs.djl-version }}" == "temp" ]; then DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}/djl-ci-temp:pytorch-inf2-temp-${GITHUB_SHA}" elif [ -n "${{ inputs.tag-suffix }}" ]; then DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}/djl-ci-temp:pytorch-inf2-${{ inputs.tag-suffix }}" From 5d4ac703c18c269ebe52b2cdf80ba985576bfe06 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 16 Dec 2024 20:23:23 +0000 Subject: [PATCH 44/62] fix neuron uri --- .github/workflows/integration.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 7f6bf49a5..6729ea3a0 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -268,11 +268,11 @@ jobs: - name: Download models and dockers run: | if [ "${{ github.event.inputs.djl-version }}" == "temp" ]; then - DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}/djl-ci-temp:pytorch-inf2-temp-${GITHUB_SHA}" + DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-temp-${GITHUB_SHA}" elif [ -n "${{ inputs.tag-suffix }}" ]; then - DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}/djl-ci-temp:pytorch-inf2-${{ inputs.tag-suffix }}" + DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-${{ inputs.tag-suffix }}" else - DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}/djl-ci-temp:pytorch-inf2-nightly" + DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-nightly" fi echo "DOCKER_IMAGE_URI=$DOCKER_IMAGE_URI" >>$GITHUB_ENV aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} From 3949dcc0940217e1d7ca0299c2be1fa2675fba0a Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 16 Dec 2024 20:50:38 +0000 Subject: [PATCH 45/62] fix format --- tests/integration/tests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/tests.py b/tests/integration/tests.py index e35a1ab47..78a8564bd 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -10,7 +10,8 @@ import test_client djl_version = os.environ.get('TEST_DJL_VERSION', '').strip() -override_image_tag_suffix = os.environ.get('OVERRIDE_IMAGE_TAG_SUFFIX', '').strip() +override_image_tag_suffix = os.environ.get('OVERRIDE_IMAGE_TAG_SUFFIX', + '').strip() image_repo = os.environ.get('IMAGE_REPO', '').strip() From bcd3555eef8bc135add3fe7ce929d054a998cbb4 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 16 Dec 2024 20:59:21 +0000 Subject: [PATCH 46/62] clean --- .github/workflows/docker-nightly-publish.yml | 4 ++-- .github/workflows/integration.yml | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 891fc5331..58a6f3021 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -89,7 +89,7 @@ jobs: run: | ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot - name: Build temp docker image - if: ${{ inputs.mode == 'temp' || inputs.mode == 'nightly'}} + if: ${{ inputs.mode == 'temp' || inputs.mode == 'nightly' }} working-directory: serving/docker run: | export NIGHTLY="-nightly" @@ -183,7 +183,7 @@ jobs: run: | ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot - name: Build temp docker image - if: ${{ inputs.mode == 'temp' || inputs.mode == 'nightly'}} + if: ${{ inputs.mode == 'temp' || inputs.mode == 'nightly' }} working-directory: serving/docker run: | export NIGHTLY="-nightly" diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6729ea3a0..466058ace 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -275,7 +275,8 @@ jobs: DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-nightly" fi echo "DOCKER_IMAGE_URI=$DOCKER_IMAGE_URI" >>$GITHUB_ENV - aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} echo $DOCKER_IMAGE_URI docker pull $DOCKER_IMAGE_URI - name: Run djl_python unit/integration tests on container From 5a9f70ceda5058db9eaf1f8d22daeaf0e1016db4 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 16 Dec 2024 23:21:49 +0000 Subject: [PATCH 47/62] fix based on comment --- .github/workflows/docker-nightly-publish.yml | 38 +++++++++++--------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 58a6f3021..09eaa3a15 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -41,6 +41,8 @@ jobs: strategy: matrix: arch: ${{ fromJson(inputs.arch) }} + exclude: + - arch: aarch64 steps: - name: Clean disk space run: | @@ -74,7 +76,7 @@ jobs: SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV - - name: Build release docker image + - name: Build release candidate docker image if: ${{ inputs.mode == 'release'}} working-directory: serving/docker run: | @@ -84,14 +86,11 @@ jobs: --build-arg djl_version=${{ env.DJL_VERSION }} \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ ${{ matrix.arch }} - - name: Build serving package for nightly - if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} - run: | - ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot - name: Build temp docker image - if: ${{ inputs.mode == 'temp' || inputs.mode == 'nightly' }} + if: ${{ inputs.mode == '' || inputs.mode == 'temp' || inputs.mode == 'nightly' }} working-directory: serving/docker run: | + ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot export NIGHTLY="-nightly" echo "NIGHTLY=$NIGHTLY" >> $GITHUB_ENV docker compose build --no-cache \ @@ -103,8 +102,12 @@ jobs: run: | ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - tempRunIdTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-${{ inputs.mode }}-${GITHUB_RUN_ID}" - tempCommitTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-${{ inputs.mode }}-${GITHUB_SHA}" + mode=${{ inputs.mode }} + if [ "${{ inputs.mode }}" == "release" ]; then + mode=${{ env.DJL_VERSION }} + fi + tempRunIdTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-$mode-${GITHUB_RUN_ID}" + tempCommitTag="${{ env.AWS_ECR_REPO }}:${{ matrix.arch }}-$mode-${GITHUB_SHA}" docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempRunIdTag docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.arch }}${{ env.NIGHTLY }} $tempCommitTag @@ -114,6 +117,7 @@ jobs: time docker push --all-tags ${{ env.AWS_ECR_REPO }} create-runner: + if: contains(fromJSON(inputs.arch), 'aarch64') runs-on: [ self-hosted, scheduler ] steps: - name: Create new Graviton instance @@ -129,6 +133,7 @@ jobs: aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }} nightly-aarch64: + if: contains(fromJSON(inputs.arch), 'aarch64') runs-on: - self-hosted - aarch64 @@ -168,7 +173,7 @@ jobs: SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV - - name: Build release docker image + - name: Build release candidate docker image if: ${{ inputs.mode == 'release' }} working-directory: serving/docker run: | @@ -178,14 +183,11 @@ jobs: --build-arg djl_version=${{ env.DJL_VERSION }} \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ aarch64 - - name: Build serving package for nightly - if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} - run: | - ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot - name: Build temp docker image - if: ${{ inputs.mode == 'temp' || inputs.mode == 'nightly' }} + if: ${{ inputs.mode == '' || inputs.mode == 'temp' || inputs.mode == 'nightly' }} working-directory: serving/docker run: | + ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot export NIGHTLY="-nightly" echo "NIGHTLY=$NIGHTLY" >> $GITHUB_ENV docker compose build --no-cache \ @@ -201,8 +203,12 @@ jobs: aarch64 ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - tempRunIdTag="${{ env.AWS_ECR_REPO }}aarch64-${{ inputs.mode }}-${GITHUB_RUN_ID}" - tempCommitTag="${{ env.AWS_ECR_REPO }}:aarch64-${{ inputs.mode }}-${GITHUB_SHA}" + mode=${{ inputs.mode }} + if [ "${{ inputs.mode }}" == "release" ]; then + mode=${{ env.DJL_VERSION }} + fi + tempRunIdTag="${{ env.AWS_ECR_REPO }}aarch64-$mode-${GITHUB_RUN_ID}" + tempCommitTag="${{ env.AWS_ECR_REPO }}:aarch64-$mode-${GITHUB_SHA}" docker tag ${{ env.DOCKER_HUB_REPO }}:aarch64${{ env.NIGHTLY }} $tempRunIdTag docker tag ${{ env.DOCKER_HUB_REPO }}:aarch64${{ env.NIGHTLY }} $tempCommitTag From f396e74a18b327b8b0172611f9c142262200a4d2 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Mon, 16 Dec 2024 23:24:11 +0000 Subject: [PATCH 48/62] update default arch value --- .github/workflows/docker-nightly-publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 09eaa3a15..0f3324dfe 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -11,7 +11,7 @@ on: description: 'which images to build [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi, aarch64]' type: string required: false - default: '["cpu", "cpu-full", "pytorch-inf2", "pytorch-gpu", "tensorrt-llm", "lmi"]' + default: '["cpu", "cpu-full", "pytorch-inf2", "pytorch-gpu", "tensorrt-llm", "lmi", "aarch64"]' workflow_call: inputs: mode: @@ -23,7 +23,7 @@ on: description: 'which images to build [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi, aarch64]' type: string required: false - default: '["cpu", "cpu-full", "pytorch-inf2", "pytorch-gpu", "tensorrt-llm", "lmi"]' + default: '["cpu", "cpu-full", "pytorch-inf2", "pytorch-gpu", "tensorrt-llm", "lmi", "aarch64"]' schedule: - cron: '0 13 * * *' From 33ab6306b4d0897645a999161a942356e13d3fbc Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Tue, 17 Dec 2024 19:07:50 +0000 Subject: [PATCH 49/62] rebase on other pr --- .github/workflows/docker-nightly-publish.yml | 10 +- .github/workflows/docker_publish.yml | 103 ++++++++++++++++++ .github/workflows/integration.yml | 10 +- .github/workflows/nightly.yml | 62 +++++++++++ serving/docker/scripts/pull_and_retag.sh | 1 + serving/docker/scripts/push_image_from_ECR.sh | 23 ++++ tests/integration/tests.py | 11 +- 7 files changed, 204 insertions(+), 16 deletions(-) create mode 100644 .github/workflows/docker_publish.yml create mode 100644 .github/workflows/nightly.yml create mode 100644 serving/docker/scripts/push_image_from_ECR.sh diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 0f3324dfe..44b19194c 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -24,9 +24,10 @@ on: type: string required: false default: '["cpu", "cpu-full", "pytorch-inf2", "pytorch-gpu", "tensorrt-llm", "lmi", "aarch64"]' - schedule: - - cron: '0 13 * * *' - + outputs: + djl_version: + description: "djl version" + value: ${{ jobs.nightly-build.outputs.djl_version }} permissions: id-token: write contents: read @@ -43,6 +44,8 @@ jobs: arch: ${{ fromJson(inputs.arch) }} exclude: - arch: aarch64 + outputs: + djl_version: ${{ steps.get-versions.outputs.DJL_VERSION }} steps: - name: Clean disk space run: | @@ -76,6 +79,7 @@ jobs: SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV + echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_OUTPUT - name: Build release candidate docker image if: ${{ inputs.mode == 'release'}} working-directory: serving/docker diff --git a/.github/workflows/docker_publish.yml b/.github/workflows/docker_publish.yml new file mode 100644 index 000000000..54fa359e1 --- /dev/null +++ b/.github/workflows/docker_publish.yml @@ -0,0 +1,103 @@ +name: Build and push docker nightly to temp ECR repo + +on: + workflow_dispatch: + inputs: + mode: + description: 'release/nightly, default is nightly' + required: true + default: 'nightly' + type: choice + options: + - nightly + - release + workflow_call: + inputs: + mode: + description: 'release/nightly, default is nightly' + type: string + required: true + default: 'nightly' + +permissions: + id-token: write + contents: read + +env: + AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" + +jobs: + create-aarch64-runner: + runs-on: [ self-hosted, scheduler ] + steps: + - name: Create new Graviton instance + id: create_aarch64 + run: | + cd /home/ubuntu/djl_benchmark_script/scripts + token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ + https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ + --fail \ + | jq '.token' | tr -d '"' ) + ./start_instance.sh action_graviton $token djl-serving + outputs: + aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }} + + nightly-aarch64: + runs-on: [ self-hosted, aarch64 ] + timeout-minutes: 60 + needs: create-aarch64-runner + steps: + - uses: actions/checkout@v4 + - name: Clean docker env + working-directory: serving/docker + run: | + yes | docker system prune -a --volumes + - name: Login to Docker + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: install awscli + run: | + sudo apt-get update + sudo apt-get install awscli -y + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving + aws-region: us-east-1 + - name: Pull and sync to docker hub + working-directory: serving/docker + run: | + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) + ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + ./scripts/pull_and_retag.sh $DJL_VERSION deepjavalibrary/djl-serving ${{ inputs.mode }} + - name: Pull and sync to ECR + working-directory: serving/docker + run: | + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) + repo="125045733377.dkr.ecr.us-east-1.amazonaws.com/djl-serving" + aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo + ./scripts/pull_and_retag.sh $DJL_VERSION $repo ${{ inputs.mode }} + - name: Retag image for release latest + if: ${{ inputs.mode == 'release' }} + working-directory: serving/docker + run: | + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) + docker tag deepjavalibrary/djl-serving:${DJL_VERSION} deepjavalibrary/djl-serving:latest + docker push deepjavalibrary/djl-serving:latest + - name: Clean docker env + working-directory: serving/docker + run: | + yes | docker system prune -a --volumes + stop-aarch64-runner: + if: always() + runs-on: [ self-hosted, scheduler ] + needs: [nightly-aarch64, create-aarch64-runner] + steps: + - name: Stop all instances + run: | + cd /home/ubuntu/djl_benchmark_script/scripts + instance_id=${{ needs.create-aarch64-runner.outputs.aarch64_instance_id }} + ./stop_instance.sh $instance_id \ No newline at end of file diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 466058ace..0afa40ca7 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: inputs: djl-version: - description: 'The released version of DJL' + description: 'The released version of DJL.' required: false default: '' tag-suffix: @@ -15,17 +15,15 @@ on: workflow_call: inputs: djl-version: - description: 'The released version of DJL' + description: 'The released version of DJL.' required: false type: string - default: '' + default: 'nightly' tag-suffix: description: 'Run tests on the specific tags suffix i.e. arch-{suffix}' required: false type: string - default: 'nightly' - schedule: - - cron: '0 15 * * *' + default: '' permissions: id-token: write diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml new file mode 100644 index 000000000..fd2feae33 --- /dev/null +++ b/.github/workflows/nightly.yml @@ -0,0 +1,62 @@ +name: Nightly Pipeline + +on: + workflow_dispatch: + inputs: + mode: + description: 'release/nightly, default is nightly' + required: true + default: 'nightly' + type: choice + options: + - nightly + - release + workflow_call: + inputs: + mode: + description: 'release/nightly, default is nightly' + type: string + required: true + default: 'nightly' + schedule: + - cron: '0 13 * * *' + + +permissions: + id-token: write + contents: read + +jobs: + build: + uses: ./.github/workflows/docker-nightly-publish.yml + secrets: inherit + with: + mode: ${{ inputs.mode }} + get_image_tag_suffix: + outputs: + test_image_tag_suffix: ${{ steps.get_image_tag_suffix.test_image_tag_suffix }} + needs: [build] + runs-on: ubuntu-latest + steps: + - name: get_image_tag_suffix + id: get_image_tag_suffix + run: | + if ${{ inputs.mode == 'nightly'}}; then + test_image_tag_suffix='nightly' + fi + if ${{ inputs.mode == 'release'}}; then + test_image_tag_suffix='${{ needs.build.outputs.djl_version}}-${GITHUB_RUN_ID}' + fi + echo "test_image_tag_suffix=$test_image_tag_suffix" >> $GITHUB_OUTPUT + integration-test: + needs: [get_image_tag_suffix] + uses: ./.github/workflows/integration.yml + secrets: inherit + with: + tag-suffix:: ${{ needs.get_image_tag_suffix.outputs.test_image_tag_suffix }} + publish: + needs: [integration-test, get_image_tag_suffix] + uses: ./.github/workflows/docker_publish.yml + secrets: inherit + with: + mode: ${{ inputs.mode }} \ No newline at end of file diff --git a/serving/docker/scripts/pull_and_retag.sh b/serving/docker/scripts/pull_and_retag.sh index 4a9010144..5bdcdcf59 100755 --- a/serving/docker/scripts/pull_and_retag.sh +++ b/serving/docker/scripts/pull_and_retag.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# for djl-serving/.github/workflows/nightly-docker-ecr-sync.yml version=$1 repo=$2 diff --git a/serving/docker/scripts/push_image_from_ECR.sh b/serving/docker/scripts/push_image_from_ECR.sh new file mode 100644 index 000000000..98d152d57 --- /dev/null +++ b/serving/docker/scripts/push_image_from_ECR.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# for docker_publish.yml +version=$1 +repo=$2 +mode=$3 +images="cpu aarch64 cpu-full pytorch-inf2 pytorch-gpu lmi tensor rt-llm" + +temprepo="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" + +for image in $images; do + if [[ ! "$mode" == "nightly" ]]; then + if [[ "$image" == "cpu" ]]; then + tag=$version + else + tag="$version-$image" + fi + else + tag="$image-nightly" + fi + echo docker pull $temprepo:$image-$mode-$GITHUB_RUN_ID + echo docker tag $temprepo:$image-$mode-$GITHUB_RUN_ID $repo:$tag + echo docker push $repo:$tag +done \ No newline at end of file diff --git a/tests/integration/tests.py b/tests/integration/tests.py index 78a8564bd..032e4ccf2 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -36,14 +36,11 @@ def __init__(self, container, test_name=None, download=False): if djl_version is None or len( djl_version) == 0 or djl_version == "nightly": flavor = f"{container}-nightly" + elif djl_version == "temp": + flavor = f"{container}-temp-{os.environ['GITHUB_SHA']}" else: - if djl_version == "temp": - flavor = f"{container}-temp-{os.environ['GITHUB_SHA']}" - else: - if container == "cpu": - flavor = djl_version - else: - flavor = f"{djl_version}-{container}" + flavor = f"{container}-{djl_version}-{os.environ['GITHUB_SHA']}" + if override_image_tag_suffix: flavor = f"{container}-{override_image_tag_suffix}" From 74b72fa8686354ade0e60af7809b1c178735402f Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Tue, 17 Dec 2024 20:18:52 +0000 Subject: [PATCH 50/62] test docker publish --- .github/workflows/docker_publish.yml | 14 +++++++------- serving/docker/scripts/push_image_from_ECR.sh | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/docker_publish.yml b/.github/workflows/docker_publish.yml index 54fa359e1..ea1f90dfa 100644 --- a/.github/workflows/docker_publish.yml +++ b/.github/workflows/docker_publish.yml @@ -24,7 +24,9 @@ permissions: contents: read env: - AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" + AWS_TMP_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" + AWS_STAGING_ECR_REPO: "125045733377.dkr.ecr.us-east-1.amazonaws.com/djl-serving" + ECR_REPO_REGION: "us-east-1" jobs: create-aarch64-runner: @@ -70,16 +72,14 @@ jobs: working-directory: serving/docker run: | DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) - ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') - aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - ./scripts/pull_and_retag.sh $DJL_VERSION deepjavalibrary/djl-serving ${{ inputs.mode }} + aws ecr get-login-password --region $ECR_REPO_REGION | docker login --username AWS --password-stdin ${{env.AWS_TMP_ECR_REPO}} + ./scripts/push_image_from_ECR.sh $DJL_VERSION deepjavalibrary/djl-serving ${{ inputs.mode }} - name: Pull and sync to ECR working-directory: serving/docker run: | DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) - repo="125045733377.dkr.ecr.us-east-1.amazonaws.com/djl-serving" - aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo - ./scripts/pull_and_retag.sh $DJL_VERSION $repo ${{ inputs.mode }} + aws ecr get-login-password --region $ECR_REPO_REGION | docker login --username AWS --password-stdin $AWS_STAGING_ECR_REPO + ./scripts/push_image_from_ECR.sh $DJL_VERSION $repo ${{ inputs.mode }} - name: Retag image for release latest if: ${{ inputs.mode == 'release' }} working-directory: serving/docker diff --git a/serving/docker/scripts/push_image_from_ECR.sh b/serving/docker/scripts/push_image_from_ECR.sh index 98d152d57..abcd956ed 100644 --- a/serving/docker/scripts/push_image_from_ECR.sh +++ b/serving/docker/scripts/push_image_from_ECR.sh @@ -18,6 +18,7 @@ for image in $images; do tag="$image-nightly" fi echo docker pull $temprepo:$image-$mode-$GITHUB_RUN_ID + docker pull $temprepo:$image-$mode-$GITHUB_RUN_ID echo docker tag $temprepo:$image-$mode-$GITHUB_RUN_ID $repo:$tag echo docker push $repo:$tag done \ No newline at end of file From 53b0d1cd860162861dfe6c26d9456be7f0e56137 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Tue, 17 Dec 2024 20:23:13 +0000 Subject: [PATCH 51/62] fix permission --- serving/docker/scripts/push_image_from_ECR.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 serving/docker/scripts/push_image_from_ECR.sh diff --git a/serving/docker/scripts/push_image_from_ECR.sh b/serving/docker/scripts/push_image_from_ECR.sh old mode 100644 new mode 100755 From 26636d7599eb898cf1e9da773123a7d684477294 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Tue, 17 Dec 2024 20:57:27 +0000 Subject: [PATCH 52/62] use sha --- .github/workflows/docker-nightly-publish.yml | 4 ++-- serving/docker/scripts/push_image_from_ECR.sh | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 44b19194c..792fbadce 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -92,9 +92,9 @@ jobs: ${{ matrix.arch }} - name: Build temp docker image if: ${{ inputs.mode == '' || inputs.mode == 'temp' || inputs.mode == 'nightly' }} - working-directory: serving/docker run: | ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot + cd serving/docker export NIGHTLY="-nightly" echo "NIGHTLY=$NIGHTLY" >> $GITHUB_ENV docker compose build --no-cache \ @@ -189,9 +189,9 @@ jobs: aarch64 - name: Build temp docker image if: ${{ inputs.mode == '' || inputs.mode == 'temp' || inputs.mode == 'nightly' }} - working-directory: serving/docker run: | ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot + cd serving/docker export NIGHTLY="-nightly" echo "NIGHTLY=$NIGHTLY" >> $GITHUB_ENV docker compose build --no-cache \ diff --git a/serving/docker/scripts/push_image_from_ECR.sh b/serving/docker/scripts/push_image_from_ECR.sh index abcd956ed..0ea77340e 100755 --- a/serving/docker/scripts/push_image_from_ECR.sh +++ b/serving/docker/scripts/push_image_from_ECR.sh @@ -17,8 +17,8 @@ for image in $images; do else tag="$image-nightly" fi - echo docker pull $temprepo:$image-$mode-$GITHUB_RUN_ID - docker pull $temprepo:$image-$mode-$GITHUB_RUN_ID - echo docker tag $temprepo:$image-$mode-$GITHUB_RUN_ID $repo:$tag + echo docker pull $temprepo:$image-$mode-$GITHUB_SHA + docker pull $temprepo:$image-$mode-$GITHUB_SHA + echo docker tag $temprepo:$image-$mode-$GITHUB_SHA $repo:$tag echo docker push $repo:$tag done \ No newline at end of file From 468a260e6589a498c7d80be19c38eadf3e7f6c8e Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Tue, 17 Dec 2024 21:46:32 +0000 Subject: [PATCH 53/62] improve scripts --- .github/workflows/docker_publish.yml | 13 ++++++-- serving/docker/scripts/push_image_from_ECR.sh | 31 +++++++++++++------ 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/.github/workflows/docker_publish.yml b/.github/workflows/docker_publish.yml index ea1f90dfa..f2e7f23c5 100644 --- a/.github/workflows/docker_publish.yml +++ b/.github/workflows/docker_publish.yml @@ -11,6 +11,10 @@ on: options: - nightly - release + commit_sha: + description: 'specify which sha value the image was built.' + required: false + default: '' workflow_call: inputs: mode: @@ -18,6 +22,11 @@ on: type: string required: true default: 'nightly' + commit_sha: + type: string + description: 'specify which sha value the image was built.' + required: false + default: '' permissions: id-token: write @@ -73,13 +82,13 @@ jobs: run: | DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) aws ecr get-login-password --region $ECR_REPO_REGION | docker login --username AWS --password-stdin ${{env.AWS_TMP_ECR_REPO}} - ./scripts/push_image_from_ECR.sh $DJL_VERSION deepjavalibrary/djl-serving ${{ inputs.mode }} + ./scripts/push_image_from_ECR.sh $DJL_VERSION deepjavalibrary/djl-serving ${{ inputs.mode }} ${{ inputs.commit_sha }} - name: Pull and sync to ECR working-directory: serving/docker run: | DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) aws ecr get-login-password --region $ECR_REPO_REGION | docker login --username AWS --password-stdin $AWS_STAGING_ECR_REPO - ./scripts/push_image_from_ECR.sh $DJL_VERSION $repo ${{ inputs.mode }} + ./scripts/push_image_from_ECR.sh $DJL_VERSION $AWS_STAGING_ECR_REPO ${{ inputs.mode }} ${{ inputs.commit_sha }} - name: Retag image for release latest if: ${{ inputs.mode == 'release' }} working-directory: serving/docker diff --git a/serving/docker/scripts/push_image_from_ECR.sh b/serving/docker/scripts/push_image_from_ECR.sh index 0ea77340e..01010d15a 100755 --- a/serving/docker/scripts/push_image_from_ECR.sh +++ b/serving/docker/scripts/push_image_from_ECR.sh @@ -1,24 +1,37 @@ #!/usr/bin/env bash # for docker_publish.yml + +set -euo pipefail +# Validate required arguments +if [ $# -lt 3 ]; then + echo "Usage: $0 [commit_sha]" >&2 + exit 1 +fi + version=$1 -repo=$2 +to_repo=$2 mode=$3 -images="cpu aarch64 cpu-full pytorch-inf2 pytorch-gpu lmi tensor rt-llm" +commit_sha=${4:-$GITHUB_SHA} # Use parameter expansion for default value + +images=(cpu aarch64 cpu-full pytorch-inf2 pytorch-gpu lmi tensorrt-llm) -temprepo="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" +from_repo=$AWS_TMP_ECR_REPO for image in $images; do - if [[ ! "$mode" == "nightly" ]]; then + + if [[ "$mode" == "release" ]]; then if [[ "$image" == "cpu" ]]; then tag=$version else tag="$version-$image" fi - else + fi + + if [[ "$mode" == "nightly" ]]; then tag="$image-nightly" fi - echo docker pull $temprepo:$image-$mode-$GITHUB_SHA - docker pull $temprepo:$image-$mode-$GITHUB_SHA - echo docker tag $temprepo:$image-$mode-$GITHUB_SHA $repo:$tag - echo docker push $repo:$tag + echo docker pull $from_repo:$image-$mode-$commit_sha + docker pull $from_repo:$image-$mode-$commit_sha + echo docker tag $from_repo:$image-$mode-$commit_sha $to_repo:$tag + echo docker push $to_repo:$tag done \ No newline at end of file From b5eaf032b9fbda4fb43bbf24587c7dd85ac9ab34 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Tue, 17 Dec 2024 21:54:36 +0000 Subject: [PATCH 54/62] fix for loop --- serving/docker/scripts/push_image_from_ECR.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/serving/docker/scripts/push_image_from_ECR.sh b/serving/docker/scripts/push_image_from_ECR.sh index 01010d15a..793756af9 100755 --- a/serving/docker/scripts/push_image_from_ECR.sh +++ b/serving/docker/scripts/push_image_from_ECR.sh @@ -17,7 +17,7 @@ images=(cpu aarch64 cpu-full pytorch-inf2 pytorch-gpu lmi tensorrt-llm) from_repo=$AWS_TMP_ECR_REPO -for image in $images; do +for image in "${images[@]}"; do if [[ "$mode" == "release" ]]; then if [[ "$image" == "cpu" ]]; then From ea3b51888379b3632638b20e2387717342157ae4 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Tue, 17 Dec 2024 22:05:41 +0000 Subject: [PATCH 55/62] improve code quality --- .github/workflows/docker_publish.yml | 13 ++++++++----- serving/docker/scripts/push_image_from_ECR.sh | 5 +++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker_publish.yml b/.github/workflows/docker_publish.yml index f2e7f23c5..43b148490 100644 --- a/.github/workflows/docker_publish.yml +++ b/.github/workflows/docker_publish.yml @@ -77,23 +77,26 @@ jobs: with: role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving aws-region: us-east-1 + - name: Login to ECR + run: | + aws ecr get-login-password --region ${{ env.ECR_REPO_REGION }} | docker login --username AWS --password-stdin ${{ env.AWS_TMP_ECR_REPO }} + aws ecr get-login-password --region ${{ env.ECR_REPO_REGION }} | docker login --username AWS --password-stdin ${{ env.AWS_STAGING_ECR_REPO }} + - name: Get DJL Version + run: | + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) + echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV - name: Pull and sync to docker hub working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) - aws ecr get-login-password --region $ECR_REPO_REGION | docker login --username AWS --password-stdin ${{env.AWS_TMP_ECR_REPO}} ./scripts/push_image_from_ECR.sh $DJL_VERSION deepjavalibrary/djl-serving ${{ inputs.mode }} ${{ inputs.commit_sha }} - name: Pull and sync to ECR working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) - aws ecr get-login-password --region $ECR_REPO_REGION | docker login --username AWS --password-stdin $AWS_STAGING_ECR_REPO ./scripts/push_image_from_ECR.sh $DJL_VERSION $AWS_STAGING_ECR_REPO ${{ inputs.mode }} ${{ inputs.commit_sha }} - name: Retag image for release latest if: ${{ inputs.mode == 'release' }} working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) docker tag deepjavalibrary/djl-serving:${DJL_VERSION} deepjavalibrary/djl-serving:latest docker push deepjavalibrary/djl-serving:latest - name: Clean docker env diff --git a/serving/docker/scripts/push_image_from_ECR.sh b/serving/docker/scripts/push_image_from_ECR.sh index 793756af9..abb83a1e3 100755 --- a/serving/docker/scripts/push_image_from_ECR.sh +++ b/serving/docker/scripts/push_image_from_ECR.sh @@ -7,6 +7,11 @@ if [ $# -lt 3 ]; then echo "Usage: $0 [commit_sha]" >&2 exit 1 fi +# Validate required environment variables +if [ -z "$AWS_TMP_ECR_REPO" ]; then + echo "ERROR: AWS_TMP_ECR_REPO environment variable is not set" >&2 + exit 1 +fi version=$1 to_repo=$2 From 0faa058b3a70abd8b62bf92557d12287d24673a8 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Tue, 17 Dec 2024 22:10:46 +0000 Subject: [PATCH 56/62] fix path --- .github/workflows/docker_publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker_publish.yml b/.github/workflows/docker_publish.yml index 43b148490..1f30f8326 100644 --- a/.github/workflows/docker_publish.yml +++ b/.github/workflows/docker_publish.yml @@ -83,7 +83,7 @@ jobs: aws ecr get-login-password --region ${{ env.ECR_REPO_REGION }} | docker login --username AWS --password-stdin ${{ env.AWS_STAGING_ECR_REPO }} - name: Get DJL Version run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ./gradle/libs.versions.toml) echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV - name: Pull and sync to docker hub working-directory: serving/docker From ab7e10bb41f60931beabe6720ff95659f59869c2 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Tue, 17 Dec 2024 22:44:42 +0000 Subject: [PATCH 57/62] fix multiple typo --- .github/workflows/nightly.yml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index fd2feae33..550d8a8c8 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -41,22 +41,24 @@ jobs: - name: get_image_tag_suffix id: get_image_tag_suffix run: | - if ${{ inputs.mode == 'nightly'}}; then - test_image_tag_suffix='nightly' + if [[ "${{ inputs.mode }}" == "nightly" ]]; then + echo "test_image_tag_suffix=nightly" >> $GITHUB_OUTPUT + elif [[ "${{ inputs.mode }}" == "release" ]]; then + echo "test_image_tag_suffix=${{ needs.build.outputs.djl_version }}-${GITHUB_RUN_ID}" >> $GITHUB_OUTPUT + else + echo "Invalid mode specified" + exit 1 fi - if ${{ inputs.mode == 'release'}}; then - test_image_tag_suffix='${{ needs.build.outputs.djl_version}}-${GITHUB_RUN_ID}' - fi - echo "test_image_tag_suffix=$test_image_tag_suffix" >> $GITHUB_OUTPUT integration-test: needs: [get_image_tag_suffix] uses: ./.github/workflows/integration.yml secrets: inherit with: - tag-suffix:: ${{ needs.get_image_tag_suffix.outputs.test_image_tag_suffix }} + tag-suffix: ${{ needs.get_image_tag_suffix.outputs.test_image_tag_suffix }} publish: needs: [integration-test, get_image_tag_suffix] uses: ./.github/workflows/docker_publish.yml secrets: inherit with: - mode: ${{ inputs.mode }} \ No newline at end of file + mode: ${{ inputs.mode }} + commit_sha: ${{ github.sha }} From ebc7f895da849e99127f28db6f9d070c3afc6375 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Wed, 18 Dec 2024 01:32:02 +0000 Subject: [PATCH 58/62] use credential only for ubuntu --- .github/workflows/integration.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 0afa40ca7..17781a671 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -201,6 +201,7 @@ jobs: chmod +x awscurl mkdir outputs - name: Configure AWS Credentials + if: matrix.test.instance == 'ubuntu-latest' uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving From cf56d95d430be8db2b8caf7176fd786c9f8164e6 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Wed, 18 Dec 2024 19:17:47 +0000 Subject: [PATCH 59/62] enable docker push --- serving/docker/scripts/push_image_from_ECR.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/serving/docker/scripts/push_image_from_ECR.sh b/serving/docker/scripts/push_image_from_ECR.sh index abb83a1e3..e5577351f 100755 --- a/serving/docker/scripts/push_image_from_ECR.sh +++ b/serving/docker/scripts/push_image_from_ECR.sh @@ -22,6 +22,7 @@ images=(cpu aarch64 cpu-full pytorch-inf2 pytorch-gpu lmi tensorrt-llm) from_repo=$AWS_TMP_ECR_REPO +set -x for image in "${images[@]}"; do if [[ "$mode" == "release" ]]; then @@ -35,8 +36,7 @@ for image in "${images[@]}"; do if [[ "$mode" == "nightly" ]]; then tag="$image-nightly" fi - echo docker pull $from_repo:$image-$mode-$commit_sha docker pull $from_repo:$image-$mode-$commit_sha - echo docker tag $from_repo:$image-$mode-$commit_sha $to_repo:$tag - echo docker push $to_repo:$tag + docker tag $from_repo:$image-$mode-$commit_sha $to_repo:$tag + docker push $to_repo:$tag done \ No newline at end of file From 1d6b0e4fb337347ca2ddd0c4915593ba8b5671e3 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Wed, 18 Dec 2024 20:32:32 +0000 Subject: [PATCH 60/62] fix naming --- .github/workflows/docker_publish.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker_publish.yml b/.github/workflows/docker_publish.yml index 1f30f8326..e4fdedeee 100644 --- a/.github/workflows/docker_publish.yml +++ b/.github/workflows/docker_publish.yml @@ -1,4 +1,4 @@ -name: Build and push docker nightly to temp ECR repo +name: Publish docker nightly to dockerhub & staging ECR repo on: workflow_dispatch: @@ -12,7 +12,7 @@ on: - nightly - release commit_sha: - description: 'specify which sha value the image was built.' + description: 'specify which sha value the image was built with.' required: false default: '' workflow_call: @@ -24,7 +24,7 @@ on: default: 'nightly' commit_sha: type: string - description: 'specify which sha value the image was built.' + description: 'specify which sha value the image aws built with.' required: false default: '' From 583e8a2e9ce003d2aa8109e6f266f43f056dca97 Mon Sep 17 00:00:00 2001 From: Shen Teng Date: Wed, 18 Dec 2024 19:37:17 +0000 Subject: [PATCH 61/62] echo --- serving/docker/scripts/push_image_from_ECR.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/serving/docker/scripts/push_image_from_ECR.sh b/serving/docker/scripts/push_image_from_ECR.sh index e5577351f..729348a6a 100755 --- a/serving/docker/scripts/push_image_from_ECR.sh +++ b/serving/docker/scripts/push_image_from_ECR.sh @@ -37,6 +37,6 @@ for image in "${images[@]}"; do tag="$image-nightly" fi docker pull $from_repo:$image-$mode-$commit_sha - docker tag $from_repo:$image-$mode-$commit_sha $to_repo:$tag - docker push $to_repo:$tag + echo docker tag $from_repo:$image-$mode-$commit_sha $to_repo:$tag + echo docker push $to_repo:$tag done \ No newline at end of file From ce98891fd4e5cebf3434c7f9137afd9689629ca2 Mon Sep 17 00:00:00 2001 From: Siddharth Venkatesan Date: Thu, 19 Dec 2024 10:44:16 -0800 Subject: [PATCH 62/62] log image under tests for integration tests --- tests/integration/tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/tests.py b/tests/integration/tests.py index d47d4aec2..8176814d3 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -51,6 +51,7 @@ def __init__(self, container, test_name=None, download=False): if download: os.system(f"./download_models.sh {self.container}") + logging.info(f"Using the following image for tests: {self.image}") def __enter__(self): return self