From 932c2e0262a2a908ad1d7d3b09114f74a45fffb9 Mon Sep 17 00:00:00 2001 From: Ben Zhang Date: Sat, 29 Jun 2024 16:04:39 -0700 Subject: [PATCH] Use off-perm bucket (#2) This PR introduces the off-perm bucket to store objects retired from perm. This bucket is needed because the retired objects from perm may not fit within the temporary bucket's quota. Requires #1, https://github.com/WATonomous/infra-config/pull/2886 --- .env.example | 2 ++ docker-compose.yml | 8 ++++++++ src/agent.py | 40 +++++++++++++++++++++++++++++----------- 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/.env.example b/.env.example index e330963..afe5107 100644 --- a/.env.example +++ b/.env.example @@ -3,3 +3,5 @@ S3_TEMP_ACCESS_KEY=your_access_key S3_TEMP_SECRET_KEY=your_secret_key S3_PERM_ACCESS_KEY=your_access_key S3_PERM_SECRET_KEY=your_secret_key +S3_OFF_PERM_ACCESS_KEY=your_access_key +S3_OFF_PERM_SECRET_KEY=your_secret_key \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 017cc0d..1427f54 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,6 +26,12 @@ services: "bucket": "asset-perm", "access_key_env_var": "S3_PERM_ACCESS_KEY", "secret_key_env_var": "S3_PERM_SECRET_KEY" + }, + "off-perm": { + "endpoint": "https://rgw.watonomous.ca", + "bucket": "asset-off-perm", + "access_key_env_var": "S3_OFF_PERM_ACCESS_KEY", + "secret_key_env_var": "S3_OFF_PERM_SECRET_KEY" } } } @@ -34,3 +40,5 @@ services: - S3_TEMP_SECRET_KEY=${S3_TEMP_SECRET_KEY:?} - S3_PERM_ACCESS_KEY=${S3_PERM_ACCESS_KEY:?} - S3_PERM_SECRET_KEY=${S3_PERM_SECRET_KEY:?} + - S3_OFF_PERM_ACCESS_KEY=${S3_OFF_PERM_ACCESS_KEY:?} + - S3_OFF_PERM_SECRET_KEY=${S3_OFF_PERM_SECRET_KEY:?} diff --git a/src/agent.py b/src/agent.py index c2f1a1c..78060e8 100644 --- a/src/agent.py +++ b/src/agent.py @@ -27,29 +27,38 @@ def run_agent(): temp_bucket = get_bucket("temp") perm_bucket = get_bucket("perm") + off_perm_bucket = get_bucket("off-perm") temp_objects = set(obj.key for obj in temp_bucket.objects.all()) perm_objects = set(obj.key for obj in perm_bucket.objects.all()) + off_perm_objects = set(obj.key for obj in off_perm_bucket.objects.all()) + all_objects = temp_objects | perm_objects | off_perm_objects logging.info(f"Found {len(temp_objects)} objects in temp bucket") logging.info(f"Found {len(perm_objects)} objects in perm bucket") + logging.info(f"Found {len(off_perm_objects)} objects in off-perm bucket") + + import pdb; pdb.set_trace() errors = [] - if not desired_perm_objects.issubset(temp_objects | perm_objects): + if not desired_perm_objects.issubset(all_objects): errors.append( ValueError( - f"Cannot find the following objects in any bucket: {desired_perm_objects - temp_objects - perm_objects}" + f"Cannot find the following objects in any bucket: {desired_perm_objects - all_objects}" ) ) # Objects that need to be copied to perm bucket to_perm = desired_perm_objects - perm_objects - # Objects that need to be copied from temp bucket to perm bucket temp_to_perm = to_perm & temp_objects - # Objects that need to be deleted from perm bucket - perm_to_temp = perm_objects - desired_perm_objects - # Objects that need to be deleted from the temp bucket (already exists in another bucket) + off_perm_to_perm = to_perm & off_perm_objects + + # Objects that need to be retired from the perm bucket + perm_to_off_perm = perm_objects - desired_perm_objects + + # Objects that need to be deleted from the temp bucket (already exists in the perm bucket) + # We don't exclude objects from off-perm because the object in temp may exipre later than the object in off-perm delete_from_temp = desired_perm_objects & temp_objects - temp_to_perm logging.info( @@ -58,8 +67,11 @@ def run_agent(): logging.info(f"Copying {len(temp_to_perm)} object(s) from temp to perm bucket:") for obj_key in temp_to_perm: logging.info(obj_key) - logging.info(f"Copying {len(perm_to_temp)} object(s) from perm to temp bucket:") - for obj_key in perm_to_temp: + logging.info(f"Copying {len(off_perm_to_perm)} object(s) from off-perm to perm bucket:") + for obj_key in off_perm_to_perm: + logging.info(obj_key) + logging.info(f"Copying {len(perm_to_off_perm)} object(s) from perm to off-perm bucket:") + for obj_key in perm_to_off_perm: logging.info(obj_key) logging.info(f"Deleting {len(delete_from_temp)} redundant object(s) from temp bucket:") for obj_key in delete_from_temp: @@ -68,7 +80,8 @@ def run_agent(): with TemporaryDirectory() as temp_dir: for obj_key in temp_to_perm: temp_bucket.download_file(obj_key, os.path.join(temp_dir, obj_key)) - # Verify checksum + # Verify checksum because we can't trust that the objects in the temp bucket has correct checksums + # i.e. attackers can simply use a custom client to upload objects with arbitrary names with open(os.path.join(temp_dir, obj_key), "rb") as f: checksum = sha256(f.read()).hexdigest() if checksum != obj_key: @@ -82,9 +95,14 @@ def run_agent(): perm_bucket.upload_file(os.path.join(temp_dir, obj_key), obj_key) temp_bucket.delete_objects(Delete={"Objects": [{"Key": obj_key}]}) - for obj_key in perm_to_temp: + for obj_key in off_perm_to_perm: + off_perm_bucket.download_file(obj_key, os.path.join(temp_dir, obj_key)) + perm_bucket.upload_file(os.path.join(temp_dir, obj_key), obj_key) + off_perm_bucket.delete_objects(Delete={"Objects": [{"Key": obj_key}]}) + + for obj_key in perm_to_off_perm: perm_bucket.download_file(obj_key, os.path.join(temp_dir, obj_key)) - temp_bucket.upload_file(os.path.join(temp_dir, obj_key), obj_key) + off_perm_bucket.upload_file(os.path.join(temp_dir, obj_key), obj_key) perm_bucket.delete_objects(Delete={"Objects": [{"Key": obj_key}]}) for obj_key in delete_from_temp: