diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 0a41e989c..788632f59 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -14,13 +14,19 @@ on: type: string required: true default: 'nightly' - schedule: - - cron: '0 13 * * *' + outputs: + djl_version: + description: "djl version" + value: ${{ jobs.nightly-build.outputs.djl_version }} permissions: id-token: write contents: read +env: + AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" + DOCKER_HUB_REPO: "deepjavalibrary/djl-serving" + jobs: create-runners: runs-on: [ self-hosted, scheduler ] @@ -96,6 +102,7 @@ jobs: cpu_instance_id_5: ${{ steps.create_cpu_5.outputs.action_cpu_instance_id }} cpu_instance_id_6: ${{ steps.create_cpu_6.outputs.action_cpu_instance_id }} graviton_instance_id_1: ${{ steps.create_graviton_1.outputs.action_graviton_instance_id }} + nightly-build: needs: create-runners strategy: @@ -122,6 +129,8 @@ jobs: - RUN_ID-${{ github.run_id }} - RUN_NUMBER-${{ github.run_number }} - SHA-${{ github.sha }} + outputs: + djl_version: ${{ steps.get-versions.outputs.DJL_VERSION }} steps: - name: Clean disk space run: | @@ -130,11 +139,6 @@ jobs: /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \ $AGENT_TOOLSDIRECTORY - uses: actions/checkout@v4 - - name: Login to Docker - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_ACCESS_TOKEN }} - name: install awscli run: | sudo apt-get update @@ -160,35 +164,8 @@ jobs: SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV - - name: Build serving package for nightly - if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} - run: | - ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot - - name: Build and push nightly docker image - if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} - working-directory: serving/docker - run: | - export NIGHTLY="-nightly" - docker compose build --no-cache \ - --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ - --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ - ${{ matrix.containers.name }} - docker compose push ${{ matrix.containers.name }} - - name: Build and push temp image - if: ${{ inputs.mode == 'temp' }} - working-directory: serving/docker - run: | - export NIGHTLY="-nightly" - docker compose build --no-cache \ - --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ - --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ - ${{ matrix.containers.name }} - repo="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" - aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo - tempTag="$repo:${{ matrix.containers.name }}-${GITHUB_SHA}" - docker tag deepjavalibrary/djl-serving:${{ matrix.containers.name }}-nightly $tempTag - docker push $tempTag - - name: Build and push release docker image + echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_OUTPUT + - name: Build release candidate docker image if: ${{ inputs.mode == 'release' }} working-directory: serving/docker run: | @@ -197,14 +174,37 @@ jobs: docker compose build --no-cache \ --build-arg djl_version=${{ env.DJL_VERSION }} \ --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ - ${{ matrix.containers.name }} - docker compose push ${{ matrix.containers.name }} - - name: Retag image for release - if: ${{ matrix.containers.name == 'cpu' && inputs.mode == 'release' }} + ${{ matrix.containers.name }} + - name: Build temp docker image + if: ${{ inputs.mode == '' || inputs.mode == 'temp' || inputs.mode == 'nightly' }} + run: | + ./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot + cd serving/docker + export NIGHTLY="-nightly" + echo "NIGHTLY=$NIGHTLY" >> $GITHUB_ENV + docker compose build --no-cache \ + --build-arg djl_version=${{ env.DJL_VERSION }}-SNAPSHOT \ + --build-arg djl_serving_version=${{ env.SERVING_VERSION }}-SNAPSHOT \ + ${{ matrix.containers.name }} + - name: Tag and push temp image to ECR repo working-directory: serving/docker run: | - docker tag deepjavalibrary/djl-serving:${{ env.SERVING_VERSION }} deepjavalibrary/djl-serving:latest - docker push deepjavalibrary/djl-serving:latest + ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + mode=${{ inputs.mode }} + if [ "${{ inputs.mode }}" == "release" ]; then + mode=${{ env.DJL_VERSION }} + fi + tempRunIdTag="${{ env.AWS_ECR_REPO }}:${{ matrix.containers.name }}-$mode-${GITHUB_RUN_ID}" + tempCommitTag="${{ env.AWS_ECR_REPO }}:${{ matrix.containers.name }}-$mode-${GITHUB_SHA}" + + docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.containers.name }}${{ env.NIGHTLY }} $tempRunIdTag + docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.containers.name }}${{ env.NIGHTLY }} $tempCommitTag + if ${{ inputs.mode == 'nightly' }}; then + docker tag ${{ env.DOCKER_HUB_REPO }}:${{ matrix.containers.name }}${{ env.NIGHTLY }} ${{ env.AWS_ECR_REPO }}:${{ matrix.containers.name }}-nightly + fi + time docker push --all-tags ${{ env.AWS_ECR_REPO }} + stop-runners: if: always() runs-on: [ self-hosted, scheduler ] diff --git a/.github/workflows/docker_publish.yml b/.github/workflows/docker_publish.yml index 54fa359e1..e4fdedeee 100644 --- a/.github/workflows/docker_publish.yml +++ b/.github/workflows/docker_publish.yml @@ -1,4 +1,4 @@ -name: Build and push docker nightly to temp ECR repo +name: Publish docker nightly to dockerhub & staging ECR repo on: workflow_dispatch: @@ -11,6 +11,10 @@ on: options: - nightly - release + commit_sha: + description: 'specify which sha value the image was built with.' + required: false + default: '' workflow_call: inputs: mode: @@ -18,13 +22,20 @@ on: type: string required: true default: 'nightly' + commit_sha: + type: string + description: 'specify which sha value the image aws built with.' + required: false + default: '' permissions: id-token: write contents: read env: - AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" + AWS_TMP_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" + AWS_STAGING_ECR_REPO: "125045733377.dkr.ecr.us-east-1.amazonaws.com/djl-serving" + ECR_REPO_REGION: "us-east-1" jobs: create-aarch64-runner: @@ -66,25 +77,26 @@ jobs: with: role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving aws-region: us-east-1 + - name: Login to ECR + run: | + aws ecr get-login-password --region ${{ env.ECR_REPO_REGION }} | docker login --username AWS --password-stdin ${{ env.AWS_TMP_ECR_REPO }} + aws ecr get-login-password --region ${{ env.ECR_REPO_REGION }} | docker login --username AWS --password-stdin ${{ env.AWS_STAGING_ECR_REPO }} + - name: Get DJL Version + run: | + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ./gradle/libs.versions.toml) + echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV - name: Pull and sync to docker hub working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) - ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') - aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} - ./scripts/pull_and_retag.sh $DJL_VERSION deepjavalibrary/djl-serving ${{ inputs.mode }} + ./scripts/push_image_from_ECR.sh $DJL_VERSION deepjavalibrary/djl-serving ${{ inputs.mode }} ${{ inputs.commit_sha }} - name: Pull and sync to ECR working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) - repo="125045733377.dkr.ecr.us-east-1.amazonaws.com/djl-serving" - aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo - ./scripts/pull_and_retag.sh $DJL_VERSION $repo ${{ inputs.mode }} + ./scripts/push_image_from_ECR.sh $DJL_VERSION $AWS_STAGING_ECR_REPO ${{ inputs.mode }} ${{ inputs.commit_sha }} - name: Retag image for release latest if: ${{ inputs.mode == 'release' }} working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) docker tag deepjavalibrary/djl-serving:${DJL_VERSION} deepjavalibrary/djl-serving:latest docker push deepjavalibrary/djl-serving:latest - name: Clean docker env diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 616f3870c..17781a671 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -4,12 +4,33 @@ on: workflow_dispatch: inputs: djl-version: - description: 'The released version of DJL' + description: 'The released version of DJL.' required: false default: '' - schedule: - - cron: '0 15 * * *' + tag-suffix: + description: 'Run tests on the specific tags suffix i.e. arch-{suffix}' + required: false + type: string + default: 'nightly' + workflow_call: + inputs: + djl-version: + description: 'The released version of DJL.' + required: false + type: string + default: 'nightly' + tag-suffix: + description: 'Run tests on the specific tags suffix i.e. arch-{suffix}' + required: false + type: string + default: '' + +permissions: + id-token: write + contents: read +env: + AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" jobs: create-runners: @@ -151,6 +172,10 @@ jobs: sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ echo "wait dpkg lock..." while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done + - name: install awscli + run: | + sudo apt-get update + sudo apt-get install awscli -y - name: Set up Python3 if: ${{ matrix.test.instance != 'aarch64' }} uses: actions/setup-python@v5 @@ -175,12 +200,22 @@ jobs: wget https://publish.djl.ai/awscurl/awscurl chmod +x awscurl mkdir outputs + - name: Configure AWS Credentials + if: matrix.test.instance == 'ubuntu-latest' + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving + aws-region: us-east-1 - name: Test working-directory: tests/integration env: TEST_DJL_VERSION: ${{ inputs.djl-version }} + OVERRIDE_IMAGE_TAG_SUFFIX: ${{ inputs.tag-suffix }} + IMAGE_REPO: ${{ env.AWS_ECR_REPO }} run: | - python -m pytest -k ${{ matrix.test.test }} tests.py + ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + python -m pytest -s -k ${{ matrix.test.test }} tests.py - name: Cleanup working-directory: tests/integration run: | @@ -224,11 +259,25 @@ jobs: python-version: '3.10.x' - name: Install pip dependencies run: pip3 install requests numpy pillow wheel - - name: Build container name - run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving + aws-region: us-east-1 - name: Download models and dockers run: | - docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG + if [ "${{ github.event.inputs.djl-version }}" == "temp" ]; then + DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-temp-${GITHUB_SHA}" + elif [ -n "${{ inputs.tag-suffix }}" ]; then + DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-${{ inputs.tag-suffix }}" + else + DOCKER_IMAGE_URI="${{ env.AWS_ECR_REPO }}:pytorch-inf2-nightly" + fi + echo "DOCKER_IMAGE_URI=$DOCKER_IMAGE_URI" >>$GITHUB_ENV + ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}') + aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}} + echo $DOCKER_IMAGE_URI + docker pull $DOCKER_IMAGE_URI - name: Run djl_python unit/integration tests on container working-directory: engines/python/setup run: | @@ -241,7 +290,7 @@ jobs: -v $PWD/:/opt/ml/model/ \ -w /opt/ml/model \ --device=/dev/neuron0:/dev/neuron0 \ - deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG \ + $DOCKER_IMAGE_URI \ /bin/bash -c "'pip install /opt/ml/model/dist/*.whl pytest' && \ pytest djl_python/tests/neuron_test_scripts/ | tee logs/results.log" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index fd2feae33..e36cc8201 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -25,7 +25,7 @@ on: permissions: id-token: write contents: read - + jobs: build: uses: ./.github/workflows/docker-nightly-publish.yml @@ -41,22 +41,24 @@ jobs: - name: get_image_tag_suffix id: get_image_tag_suffix run: | - if ${{ inputs.mode == 'nightly'}}; then - test_image_tag_suffix='nightly' - fi - if ${{ inputs.mode == 'release'}}; then - test_image_tag_suffix='${{ needs.build.outputs.djl_version}}-${GITHUB_RUN_ID}' + if [[ "${{ inputs.mode }}" == "nightly" ]]; then + echo "test_image_tag_suffix=nightly" >> $GITHUB_OUTPUT + elif [[ "${{ inputs.mode }}" == "release" ]]; then + echo "test_image_tag_suffix=${{ needs.build.outputs.djl_version }}-${GITHUB_RUN_ID}" >> $GITHUB_OUTPUT + else + echo "Invalid mode specified" + exit 1 fi - echo "test_image_tag_suffix=$test_image_tag_suffix" >> $GITHUB_OUTPUT integration-test: needs: [get_image_tag_suffix] uses: ./.github/workflows/integration.yml secrets: inherit with: - tag-suffix:: ${{ needs.get_image_tag_suffix.outputs.test_image_tag_suffix }} + tag-suffix: ${{ needs.get_image_tag_suffix.outputs.test_image_tag_suffix }} publish: needs: [integration-test, get_image_tag_suffix] uses: ./.github/workflows/docker_publish.yml secrets: inherit with: - mode: ${{ inputs.mode }} \ No newline at end of file + mode: ${{ inputs.mode }} + commit_sha: ${{ github.sha }} diff --git a/serving/docker/scripts/pull_and_retag.sh b/serving/docker/scripts/pull_and_retag.sh index 4a9010144..5bdcdcf59 100755 --- a/serving/docker/scripts/pull_and_retag.sh +++ b/serving/docker/scripts/pull_and_retag.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# for djl-serving/.github/workflows/nightly-docker-ecr-sync.yml version=$1 repo=$2 diff --git a/serving/docker/scripts/push_image_from_ECR.sh b/serving/docker/scripts/push_image_from_ECR.sh new file mode 100755 index 000000000..729348a6a --- /dev/null +++ b/serving/docker/scripts/push_image_from_ECR.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# for docker_publish.yml + +set -euo pipefail +# Validate required arguments +if [ $# -lt 3 ]; then + echo "Usage: $0 [commit_sha]" >&2 + exit 1 +fi +# Validate required environment variables +if [ -z "$AWS_TMP_ECR_REPO" ]; then + echo "ERROR: AWS_TMP_ECR_REPO environment variable is not set" >&2 + exit 1 +fi + +version=$1 +to_repo=$2 +mode=$3 +commit_sha=${4:-$GITHUB_SHA} # Use parameter expansion for default value + +images=(cpu aarch64 cpu-full pytorch-inf2 pytorch-gpu lmi tensorrt-llm) + +from_repo=$AWS_TMP_ECR_REPO + +set -x +for image in "${images[@]}"; do + + if [[ "$mode" == "release" ]]; then + if [[ "$image" == "cpu" ]]; then + tag=$version + else + tag="$version-$image" + fi + fi + + if [[ "$mode" == "nightly" ]]; then + tag="$image-nightly" + fi + docker pull $from_repo:$image-$mode-$commit_sha + echo docker tag $from_repo:$image-$mode-$commit_sha $to_repo:$tag + echo docker push $to_repo:$tag +done \ No newline at end of file diff --git a/tests/integration/launch_container.sh b/tests/integration/launch_container.sh index 94d668754..860f50acd 100755 --- a/tests/integration/launch_container.sh +++ b/tests/integration/launch_container.sh @@ -11,6 +11,8 @@ model_path=$2 #required platform=$3 #required args=${@:4} #optional +echo launch_container.sh: using docker image: $docker_image + is_sm_neo_context=false if [[ $4 == "sm_neo_context" ]]; then is_sm_neo_context=true diff --git a/tests/integration/tests.py b/tests/integration/tests.py index 2840123d0..8176814d3 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -10,6 +10,9 @@ import test_client djl_version = os.environ.get('TEST_DJL_VERSION', '').strip() +override_image_tag_suffix = os.environ.get('OVERRIDE_IMAGE_TAG_SUFFIX', + '').strip() +image_repo = os.environ.get('IMAGE_REPO', '').strip() def is_applicable_cuda_capability(arch: int) -> bool: @@ -29,19 +32,17 @@ def __init__(self, container, test_name=None, download=False): self.client_file_handler = None # Compute flavor and repo - repo = "deepjavalibrary/djl-serving" + repo = image_repo if djl_version is None or len( djl_version) == 0 or djl_version == "nightly": flavor = f"{container}-nightly" + elif djl_version == "temp": + flavor = f"{container}-temp-{os.environ['GITHUB_SHA']}" else: - if djl_version == "temp": - repo = "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" - flavor = f"{container}-{os.environ['GITHUB_SHA']}" - else: - if container == "cpu": - flavor = djl_version - else: - flavor = f"{djl_version}-{container}" + flavor = f"{container}-{djl_version}-{os.environ['GITHUB_SHA']}" + + if override_image_tag_suffix: + flavor = f"{container}-{override_image_tag_suffix}" self.image = f"{repo}:{flavor}" @@ -50,6 +51,7 @@ def __init__(self, container, test_name=None, download=False): if download: os.system(f"./download_models.sh {self.container}") + logging.info(f"Using the following image for tests: {self.image}") def __enter__(self): return self