Skip to content

Commit

Permalink
Detach and archive old partitions (#4138)
Browse files Browse the repository at this point in the history
Every month, create and attach a new partition for the next month and attempt to detach and archive partitions that are 3 to 6 months old.
  • Loading branch information
conbrad authored Nov 27, 2024
1 parent 69ba46e commit 06d8512
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 19 deletions.
2 changes: 1 addition & 1 deletion openshift/pgslice/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM artifacts.developer.gov.bc.ca/docker-remote/ubuntu:24.04

RUN apt-get update && \
apt-get install -y build-essential libpq-dev postgresql-client-16 ruby-full && \
apt-get install -y build-essential libpq-dev postgresql-client-16 python3 ruby-full && \
gem install pgslice

# Download the Amazon CLI installer.
Expand Down
58 changes: 40 additions & 18 deletions openshift/pgslice/docker/partition_and_archive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,24 +51,46 @@ fi

ENCODED_PASS=$(python3 -c "import urllib.parse; print(urllib.parse.quote('${PG_PASSWORD}'))")
PGSLICE_URL=postgresql://${PG_USER}:${ENCODED_PASS}@${PG_HOSTNAME}:${PG_PORT}/${PG_DATABASE}
# Add partitions to the intermediate table (assumes it already exists)
pgslice add_partitions $TABLE --intermediate --future 1 --url $PGSLICE_URL
# Fill the partitions with data from the original table
pgslice fill $TABLE --url $PGSLICE_URL
# Analyze for query planner
pgslice analyze $TABLE --url $PGSLICE_URL
# Swap the intermediate table with the original table
pgslice swap $TABLE --url $PGSLICE_URL
# Fill the rest (rows inserted between the first fill and the swap)
pgslice fill $TABLE --swapped --url $PGSLICE_URL

# Add new partition for next month
NEXT_MONTH_DATE=$(date -d "$(date +%Y-%m-01) next month" +%Y%m)
FIRST_DAY_NEXT_MONTH=$(date -d "$(date +%Y-%m-01) next month" +%Y-%m-%d)
LAST_DAY_NEXT_MONTH=$(date -d "$(date +%Y-%m-01) next month +1 month -1 day" +%Y-%m-%d)
echo "Creating new partition for dates: $FIRST_DAY_NEXT_MONTH to $LAST_DAY_NEXT_MONTH"

# Dump any retired tables to S3 and drop
# borrowing a lot from https://github.com/BCDevOps/backup-container
_timestamp=`date +\%Y-\%m-\%d_%H-%M-%S`
_datestamp=`date +\%Y/\%m`
_target_filename="${PG_HOSTNAME}_${TABLE}_retired_${_timestamp}.sql.gz"
_target_folder="${PG_HOSTNAME}_${PG_DATABASE}/${_datestamp}"
NEW_PARTITION_COMMAND="CREATE TABLE ${TABLE}_${NEXT_MONTH_DATE} PARTITION OF $TABLE FOR VALUES FROM ('$FIRST_DAY_NEXT_MONTH') TO ('$LAST_DAY_NEXT_MONTH');"
psql -c "$NEW_PARTITION_COMMAND" "$PGSLICE_URL"

pg_dump -c -Fc -t ${TABLE}_retired $PGSLICE_URL | gzip | AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY AWS_SECRET_ACCESS_KEY=$AWS_SECRET_KEY aws --endpoint="https://${AWS_HOSTNAME}" s3 cp - "s3://${AWS_BUCKET}/retired/${_target_folder}/${_target_filename}"
psql -c "DROP TABLE ${TABLE}_retired" $PGSLICE_URL
# Mark tables from 3 months ago to 6 months ago as retired if they exist, then detach and dump them to object store
# Borrowing a lot from https://github.com/BCDevOps/backup-container
for i in {3..6}; do
DATE=$(date -d "$(date +%Y-%m-01) -$i months" +%Y%m)
PARTITION_TABLE="weather_station_model_predictions_${DATE}"
DETACH_COMMAND="
DO \$BODY\$
BEGIN
IF EXISTS (
SELECT 1
FROM pg_inherits
JOIN pg_class parent ON pg_inherits.inhparent = parent.oid
JOIN pg_class child ON pg_inherits.inhrelid = child.oid
JOIN pg_namespace ns_parent ON parent.relnamespace = ns_parent.oid
JOIN pg_namespace ns_child ON child.relnamespace = ns_child.oid
WHERE ns_child.nspname = 'public'
AND child.relname = '${PARTITION_TABLE}'
AND parent.relname = 'weather_station_model_predictions'
) THEN
EXECUTE 'ALTER TABLE weather_station_model_predictions DETACH PARTITION ${PARTITION_TABLE}';
END IF;
END \$BODY\$;"

echo "Detaching partition: ${PARTITION_TABLE}"
psql -c "$DETACH_COMMAND" $PGSLICE_URL

echo "Dumping partition: ${PARTITION_TABLE}"
_datestamp=`date +\%Y/\%m`
_target_filename="${PG_HOSTNAME}_${PARTITION_TABLE}_retired_${DATE}.sql.gz"
_target_folder="${PG_HOSTNAME}_${PG_DATABASE}/${_datestamp}"
pg_dump -c -Fc -t ${PARTITION_TABLE} $PGSLICE_URL | gzip | AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY AWS_SECRET_ACCESS_KEY=$AWS_SECRET_KEY aws --endpoint="https://${AWS_HOSTNAME}" s3 cp - "s3://${AWS_BUCKET}/retired/${_target_folder}/${_target_filename}"
psql -c "DROP TABLE IF EXISTS ${PARTITION_TABLE};" $PGSLICE_URL
done

0 comments on commit 06d8512

Please sign in to comment.