Skip to content

Commit

Permalink
Add python sitecustomize file (#4486)
Browse files Browse the repository at this point in the history
  • Loading branch information
arjkesh authored Jan 15, 2025
1 parent 7aab149 commit d18d984
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 27 deletions.
14 changes: 14 additions & 0 deletions miscellaneous_scripts/dlc_template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import os

try:
if os.path.exists("/usr/local/bin/deep_learning_container.py") and (
os.getenv("OPT_OUT_TRACKING") is None or os.getenv("OPT_OUT_TRACKING", "").lower() != "true"
):
import threading

cmd = "python /usr/local/bin/deep_learning_container.py --framework {FRAMEWORK} --framework-version {FRAMEWORK_VERSION} --container-type {CONTAINER_TYPE} &>/dev/null"
x = threading.Thread(target=lambda: os.system(cmd))
x.setDaemon(True)
x.start()
except Exception:
pass
2 changes: 2 additions & 0 deletions pytorch/inference/docker/2.4/py3/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.4/l

COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py

COPY sitecustomize.py /usr/local/lib/${PYTHON_SHORT_VERSION}/sitecustomize.py

RUN chmod +x /usr/local/bin/deep_learning_container.py

RUN HOME_DIR=/root \
Expand Down
2 changes: 2 additions & 0 deletions pytorch/inference/docker/2.4/py3/cu124/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.4/l

COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py

COPY sitecustomize.py /usr/local/lib/${PYTHON_SHORT_VERSION}/sitecustomize.py

RUN chmod +x /usr/local/bin/deep_learning_container.py

RUN HOME_DIR=/root \
Expand Down
44 changes: 33 additions & 11 deletions src/image_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,39 @@ def image_builder(buildspec, image_types=[], device_types=[]):
}
}
)
# job_type will be either inference or training, based on the repo URI
if "training" in image_repo_uri:
label_job_type = "training"
elif "inference" in image_repo_uri:
label_job_type = "inference"
else:
raise RuntimeError(
f"Cannot find inference or training job type in {image_repo_uri}. "
f"This is required to set job_type label."
)

template_file = os.path.join(
os.sep, get_cloned_folder_path(), "miscellaneous_scripts", "dlc_template.py"
)

template_fw_version = (
str(image_config["framework_version"])
if image_config.get("framework_version")
else str(BUILDSPEC["version"])
)
template_fw = str(BUILDSPEC["framework"])
post_template_file = utils.generate_dlc_cmd(
template_path=template_file,
output_path=os.path.join(image_config["root"], "out.py"),
framework=template_fw,
framework_version=template_fw_version,
container_type=label_job_type,
)

ARTIFACTS.update(
{"customize": {"source": post_template_file, "target": "sitecustomize.py"}}
)

context = Context(ARTIFACTS, f"build/{image_name}.tar.gz", image_config["root"])

if "labels" in image_config:
Expand All @@ -265,17 +298,6 @@ def image_builder(buildspec, image_types=[], device_types=[]):
label_contributor = str(BUILDSPEC.get("contributor"))
label_transformers_version = str(transformers_version).replace(".", "-")

# job_type will be either inference or training, based on the repo URI
if "training" in image_repo_uri:
label_job_type = "training"
elif "inference" in image_repo_uri:
label_job_type = "inference"
else:
raise RuntimeError(
f"Cannot find inference or training job type in {image_repo_uri}. "
f"This is required to set job_type label."
)

if cx_type == "sagemaker":
# Adding standard labels to all images
labels[
Expand Down
20 changes: 20 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,3 +642,23 @@ def verify_if_child_image_is_built_on_top_of_base_image(base_image_uri, child_im
if base_layer_sha != child_layer_sha:
return False
return True


def generate_dlc_cmd(template_path, output_path, framework, framework_version, container_type):
with open(template_path, "r") as tf:
content = tf.read()

replacements = {
"FRAMEWORK": framework,
"FRAMEWORK_VERSION": framework_version,
"CONTAINER_TYPE": container_type,
}

for anchor, value in replacements.items():
content = content.replace(f"{{{anchor}}}", value)

with open(output_path, "w") as out_f:
out_f.write(content)

# Return base path and set as artifact
return os.path.basename(output_path)
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ def _clean_up_reports():
os.system("rm /tmp/test_tag_request.txt")


def opt_in_opt_out_test():
def opt_in_opt_out_test(exec_cmd):
os.environ["TEST_MODE"] = "1"

for opt_out_value in ["True", "TRUE", "true"]:
_clean_up_reports()
os.environ["OPT_OUT_TRACKING"] = opt_out_value
cmd = "python -c 'import torch'"
cmd = f"python -c '{exec_cmd}'"
os.system(cmd)
time.sleep(5)
assert not os.path.exists(
Expand All @@ -29,7 +29,7 @@ def opt_in_opt_out_test():
for opt_out_value in ["False", "XYgg"]:
_clean_up_reports()
os.environ["OPT_OUT_TRACKING"] = opt_out_value
cmd = "python -c 'import torch'"
cmd = f"python -c '{exec_cmd}'"
os.system(cmd)
time.sleep(5)
assert os.path.exists(
Expand All @@ -43,23 +43,23 @@ def opt_in_opt_out_test():
print("Opt-In/Opt-Out Test passed")


def perf_test():
def perf_test(exec_cmd):
os.environ["TEST_MODE"] = "0"
os.environ["OPT_OUT_TRACKING"] = "False"
NUM_ITERATIONS = 5

for itr in range(NUM_ITERATIONS):
total_time_in = 0
for x in range(NUM_ITERATIONS):
cmd = "python -c 'import torch'"
cmd = f"python -c '{exec_cmd}'"
start = time.time()
os.system(cmd)
total_time_in += time.time() - start
print("avg out time: ", total_time_in / NUM_ITERATIONS)

total_time_out = 0
for x in range(NUM_ITERATIONS):
cmd = "export OPT_OUT_TRACKING='true' && python -c 'import torch'"
cmd = f"export OPT_OUT_TRACKING='true' && python -c '{exec_cmd}'"
start = time.time()
os.system(cmd)
total_time_out += time.time() - start
Expand All @@ -72,7 +72,11 @@ def perf_test():
print("DLC Telemetry performance test Passed")


perf_test()
opt_in_opt_out_test()
perf_test("import torch")
opt_in_opt_out_test("import torch")

# Disabling os tests until it is added to all new images
# perf_test("import os")
# opt_in_opt_out_test("import os")

print("All DLC telemetry test passed")
21 changes: 13 additions & 8 deletions test/dlc_tests/container_tests/bin/test_tf_dlc_telemetry_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ def _clean_up_reports():
os.system("rm /tmp/test_tag_request.txt")


def opt_in_opt_out_test():
def opt_in_opt_out_test(exec_cmd):
os.environ["TEST_MODE"] = "1"

for opt_out_value in ["True", "TRUE", "true"]:
_clean_up_reports()
os.environ["OPT_OUT_TRACKING"] = opt_out_value
cmd = "python -c 'import tensorflow'"
cmd = f"python -c '{exec_cmd}'"
os.system(cmd)
time.sleep(5)
assert not os.path.exists(
Expand All @@ -29,7 +29,7 @@ def opt_in_opt_out_test():
for opt_out_value in ["False", "XYgg"]:
_clean_up_reports()
os.environ["OPT_OUT_TRACKING"] = opt_out_value
cmd = "python -c 'import tensorflow'"
cmd = f"python -c '{exec_cmd}'"
os.system(cmd)
time.sleep(5)
assert os.path.exists(
Expand All @@ -43,23 +43,23 @@ def opt_in_opt_out_test():
print("Opt-In/Opt-Out Test passed")


def performance_test():
def performance_test(exec_cmd):
os.environ["TEST_MODE"] = "0"
os.environ["OPT_OUT_TRACKING"] = "False"
NUM_ITERATIONS = 5

for itr in range(NUM_ITERATIONS):
total_time_in = 0
for x in range(NUM_ITERATIONS):
cmd = "python -c 'import tensorflow'"
cmd = f"python -c '{exec_cmd}'"
start = time.time()
os.system(cmd)
total_time_in += time.time() - start
print("avg out time: ", total_time_in / NUM_ITERATIONS)

total_time_out = 0
for x in range(NUM_ITERATIONS):
cmd = "export OPT_OUT_TRACKING='true' && python -c 'import tensorflow'"
cmd = f"export OPT_OUT_TRACKING='true' && python -c '{exec_cmd}'"
start = time.time()
os.system(cmd)
total_time_out += time.time() - start
Expand All @@ -72,7 +72,12 @@ def performance_test():
print("DLC Telemetry performance test Passed")


performance_test()
opt_in_opt_out_test()
# test framework functionality
performance_test("import tensorflow")
opt_in_opt_out_test("import tensorflow")

# Disabling os tests until it is added to all new images
# performance_test("import os")
# opt_in_opt_out_test("import os")

print("All DLC telemetry test passed")

0 comments on commit d18d984

Please sign in to comment.