Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TensorFlow][Inference][EC2] TensorFlow 2.18.0 Currency Release #4493

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions dlc_developer_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ deep_canary_mode = false
[build]
# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
# available frameworks - ["autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
build_frameworks = []
build_frameworks = ["tensorflow"]


# By default we build both training and inference containers. Set true/false values to determine which to build.
build_training = true
build_training = false
build_inference = true

# Set do_build to "false" to skip builds and test the latest image built by this PR
Expand Down Expand Up @@ -132,7 +133,7 @@ dlc-pr-tensorflow-2-habana-training = ""

# Standard Framework Inference
dlc-pr-pytorch-inference = ""
dlc-pr-tensorflow-2-inference = ""
dlc-pr-tensorflow-2-inference = "tensorflow/inference/buildspec-2-18-ec2.yml"
dlc-pr-autogluon-inference = ""

# Neuron Inference
Expand Down
72 changes: 72 additions & 0 deletions tensorflow/inference/buildspec-2-18-ec2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
prod_account_id: &PROD_ACCOUNT_ID 763104351884
region: &REGION <set-$REGION-in-environment>
framework: &FRAMEWORK tensorflow
version: &VERSION 2.18.0
short_version: &SHORT_VERSION 2.18
arch_type: x86
# autopatch_build: "True"

repository_info:
inference_repository: &INFERENCE_REPOSITORY
image_type: &INFERENCE_IMAGE_TYPE inference
root: !join [ *FRAMEWORK, "/", *INFERENCE_IMAGE_TYPE ]
repository_name: &REPOSITORY_NAME !join [pr, "-", *FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE]
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK, "-", *INFERENCE_IMAGE_TYPE ]
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/,
*RELEASE_REPOSITORY_NAME ]

context:
inference_context: &INFERENCE_CONTEXT
sagemaker_package_name:
source: docker/build_artifacts/sagemaker
target: sagemaker
init:
source: docker/build_artifacts/__init__.py
target: __init__.py
dockerd-entrypoint:
source: docker/build_artifacts/dockerd-entrypoint.py
target: dockerd-entrypoint.py
deep_learning_container:
source: ../../src/deep_learning_container.py
target: deep_learning_container.py

images:
BuildEC2TensorflowCPUInferencePy3DockerImage:
<<: *INFERENCE_REPOSITORY
build: &TENSORFLOW_CPU_INFERENCE_PY3 false
image_size_baseline: &IMAGE_SIZE_BASELINE 4899
framework_version: &FRAMEWORK_VERSION 2.18.0
device_type: &DEVICE_TYPE cpu
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py310
os_version: &OS_VERSION ubuntu20.04
tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-ec2" ]
latest_release_tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION,
"-ec2" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
target: ec2
# build_tag_override: "beta:2.16.1-cpu-py310-ubuntu20.04-ec2"
context:
<<: *INFERENCE_CONTEXT
BuildEC2TensorflowGPUInferencePy3DockerImage:
<<: *INFERENCE_REPOSITORY
build: &TENSORFLOW_GPU_INFERENCE_PY3 false
image_size_baseline: &IMAGE_SIZE_BASELINE 13100
framework_version: &FRAMEWORK_VERSION 2.18.0
device_type: &DEVICE_TYPE gpu
python_version: &DOCKER_PYTHON_VERSION py3
tag_python_version: &TAG_PYTHON_VERSION py310
cuda_version: &CUDA_VERSION cu122
os_version: &OS_VERSION ubuntu20.04
tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION,
"-ec2" ]
latest_release_tag: !join [ *FRAMEWORK_VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION,
"-", *OS_VERSION, "-ec2" ]
docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile.,
*DEVICE_TYPE ]
target: ec2
# build_tag_override: "beta:2.16.1-gpu-py310-cu122-ubuntu20.04-ec2"
context:
<<: *INFERENCE_CONTEXT
2 changes: 1 addition & 1 deletion tensorflow/inference/buildspec.yml
Original file line number Diff line number Diff line change
@@ -1 +1 @@
buildspec_pointer: buildspec-2-16-sm.yml
buildspec_pointer: buildspec-2-18-ec2.yml
206 changes: 206 additions & 0 deletions tensorflow/inference/docker/2.18/py3/Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
########################################################
# _____ ____ ____ ___
# | ____/ ___|___ \ |_ _|_ __ ___ __ _ __ _ ___
# | _|| | __) | | || '_ ` _ \ / _` |/ _` |/ _ \
# | |__| |___ / __/ | || | | | | | (_| | (_| | __/
# |_____\____|_____| |___|_| |_| |_|\__,_|\__, |\___|
# |___/
# ____ _
# | _ \ ___ ___(_)_ __ ___
# | |_) / _ \/ __| | '_ \ / _ \
# | _ < __/ (__| | |_) | __/
# |_| \_\___|\___|_| .__/ \___|
# |_|
########################################################

FROM ubuntu:20.04 AS base_image

ENV DEBIAN_FRONTEND=noninteractive \
LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib"

RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get autoremove -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

FROM base_image AS ec2

LABEL maintainer="Amazon AI"
LABEL dlc_major_version="1"

ARG PYTHON=python3.10
ARG PYTHON_PIP=python3-pip
ARG PIP=pip3
ARG PYTHON_VERSION=3.10.14
ARG TFS_URL=https://framework-binaries.s3.us-west-2.amazonaws.com/tensorflow_serving/r2.18_aws/cpu/2025-01-15-19-48/tensorflow_model_server
ARG TFS_SHORT_VERSION=2.18

# ENV variable to be passed to SageMaker stage
ENV PIP=${PIP}
ENV PYTHON=${PYTHON}

# See http://bugs.python.org/issue19846
ENV LANG=C.UTF-8
# Python won’t try to write .pyc or .pyo files on the import of source modules
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH'
ENV MODEL_BASE_PATH=/models
# The only required piece is the model name in order to differentiate endpoints
ENV MODEL_NAME=model
ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update \
&& apt-get -y install --no-install-recommends \
curl \
gnupg2 \
ca-certificates \
emacs \
git \
unzip \
wget \
vim \
libbz2-dev \
liblzma-dev \
libffi-dev \
build-essential \
zlib1g-dev \
openssl \
libssl1.1 \
libreadline-gplv2-dev \
libncursesw5-dev \
libssl-dev \
libsqlite3-dev \
tk-dev \
libgdbm-dev \
libc6-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Install python3.10
RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
&& tar -xvf Python-$PYTHON_VERSION.tgz \
&& cd Python-$PYTHON_VERSION \
&& ./configure && make && make install \
&& rm -rf ../Python-$PYTHON_VERSION*

RUN ${PIP} --no-cache-dir install --upgrade \
pip \
setuptools

RUN ${PIP} install --no-cache-dir \
"awscli<2" \
boto3 \
"cython<3.0" \
gevent \
requests \
grpcio \
"protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3" \
packaging \
# using --no-dependencies to avoid installing tensorflow binary
&& ${PIP} install --no-dependencies --no-cache-dir \
tensorflow-serving-api=="2.18.0"

# Some TF tools expect a "python" binary
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \
&& ln -s $(which ${PIP}) /usr/bin/pip


RUN curl $TFS_URL -o /usr/bin/tensorflow_model_server \
&& chmod 555 /usr/bin/tensorflow_model_server

# Expose ports
# gRPC and REST
EXPOSE 8500 8501

# Set where models should be stored in the container
RUN mkdir -p ${MODEL_BASE_PATH}

# Create a script that runs the model server so we can use environment variables
# while also passing in arguments from the docker command line
RUN echo '#!/bin/bash \n\n' > /usr/bin/tf_serving_entrypoint.sh \
&& echo '/usr/bin/tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/bin/tf_serving_entrypoint.sh \
&& chmod +x /usr/bin/tf_serving_entrypoint.sh

COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py

RUN chmod +x /usr/local/bin/deep_learning_container.py

RUN HOME_DIR=/root \
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
&& chmod +x /usr/local/bin/testOSSCompliance \
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
&& rm -rf ${HOME_DIR}/oss_compliance*

RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-${TFS_SHORT_VERSION}/license.txt -o /license.txt

RUN rm -rf /tmp/*

CMD ["/usr/bin/tf_serving_entrypoint.sh"]

#################################################################
# ____ __ __ _
# / ___| __ _ __ _ ___| \/ | __ _| | _____ _ __
# \___ \ / _` |/ _` |/ _ \ |\/| |/ _` | |/ / _ \ '__|
# ___) | (_| | (_| | __/ | | | (_| | < __/ |
# |____/ \__,_|\__, |\___|_| |_|\__,_|_|\_\___|_|
# |___/
# ___ ____ _
# |_ _|_ __ ___ __ _ __ _ ___ | _ \ ___ ___(_)_ __ ___
# | || '_ ` _ \ / _` |/ _` |/ _ \ | |_) / _ \/ __| | '_ \ / _ \
# | || | | | | | (_| | (_| | __/ | _ < __/ (__| | |_) | __/
# |___|_| |_| |_|\__,_|\__, |\___| |_| \_\___|\___|_| .__/ \___|
# |___/ |_|
#################################################################

FROM ec2 AS sagemaker

LABEL maintainer="Amazon AI"
LABEL dlc_major_version="1"

# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT
# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true

ARG TFS_SHORT_VERSION=2.18
ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
ENV PATH="$PATH:/sagemaker"

# nginx + njs
RUN curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
&& echo 'deb http://nginx.org/packages/ubuntu/ focal nginx' >> /etc/apt/sources.list \
&& apt-get update \
&& apt-get -y install --no-install-recommends \
nginx \
nginx-module-njs \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# the Pins are for the TFS SageMaker Toolkit
RUN ${PIP} install --no-cache-dir \
falcon==3.1.0 \
"gunicorn>=22.0.0"

COPY ./sagemaker /sagemaker

# Expose ports
# gRPC and REST
EXPOSE 8500 8501

RUN HOME_DIR=/root \
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
&& chmod +x /usr/local/bin/testOSSCompliance \
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
&& rm -rf ${HOME_DIR}/oss_compliance*

RUN rm -rf /tmp/*

CMD ["/usr/bin/tf_serving_entrypoint.sh"]
Loading
Loading