diff --git a/.github/workflows/mariadb-logical-backup.yml b/.github/workflows/mariadb-logical-backup.yml new file mode 100644 index 00000000..5f957fa3 --- /dev/null +++ b/.github/workflows/mariadb-logical-backup.yml @@ -0,0 +1,47 @@ +name: Build Logical backup + +on: + push: + branches: ["mariadb-logical-backup"] + paths: + - "mariadb-logical-backup/**" + - ".github/workflows/mariadb-logical-backup.yml" + +jobs: + build-and-push: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - name: Generate container metadata + id: meta + uses: docker/metadata-action@v3 + with: + images: ghcr.io/obmondo/mariadb-logical-backup + tags: | + type=raw,value=latest + type=semver,pattern={{version}},value=v3.1.8 + flavor: | + latest=false + - name: Login to GitHub Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build & push container image + id: docker_build + uses: docker/build-push-action@v2 + with: + file: "./mariadb-logical-backup/Dockerfile" + context: . + labels: ${{ steps.meta.outputs.labels }} + push: true + tags: ${{ steps.meta.outputs.tags }} + platforms: linux/amd64,linux/arm64 diff --git a/.github/workflows/postgres-logical-backup.yml b/.github/workflows/postgres-logical-backup.yml index aa89030c..9c4c6beb 100644 --- a/.github/workflows/postgres-logical-backup.yml +++ b/.github/workflows/postgres-logical-backup.yml @@ -25,7 +25,7 @@ jobs: images: ghcr.io/obmondo/postgres-logical-backup tags: | type=raw,value=latest - type=semver,pattern={{version}},value=v3.1.6 + type=semver,pattern={{version}},value=v3.1.8 flavor: | latest=false - name: Login to GitHub Container Registry diff --git a/mariadb-logical-backup/Dockerfile b/mariadb-logical-backup/Dockerfile new file mode 100644 index 00000000..1ed496aa --- /dev/null +++ b/mariadb-logical-backup/Dockerfile @@ -0,0 +1,32 @@ +FROM ubuntu:jammy +LABEL maintainer="Anantharam R U anantharam@obmondo.com" + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN apt-get update \ + && apt-get install --no-install-recommends -y \ + apt-utils \ + ca-certificates \ + lsb-release \ + pigz \ + python3-pip \ + python3-setuptools \ + curl \ + jq \ + gnupg \ + gcc \ + libffi-dev \ + && curl -sL https://aka.ms/InstallAzureCLIDeb | bash \ + && pip3 install --upgrade pip \ + && pip3 install --no-cache-dir gsutil --upgrade \ + && curl -sL https://dl.min.io/client/mc/release/linux-amd64/mc -o /usr/local/bin/mc \ + && chmod +x /usr/local/bin/mc \ + && apt-get update \ + && apt-get install --no-install-recommends -y \ + mariadb-client \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +COPY ./* ./ + +ENTRYPOINT ["/mariadb-dump.sh"] diff --git a/mariadb-logical-backup/mariadb-dump.sh b/mariadb-logical-backup/mariadb-dump.sh new file mode 100755 index 00000000..0f3d8cfd --- /dev/null +++ b/mariadb-logical-backup/mariadb-dump.sh @@ -0,0 +1,152 @@ +#! /usr/bin/env bash + +set -x +set -eou pipefail +IFS=$'\n\t' + +## Required Env passed from CronJob: +# MARIADB_HOST, MARIADB_USER, MARIADB_PASSWORD, MARIADB_DATABASE (or use .my.cnf) +# LOGICAL_BACKUP_PROVIDER, LOGICAL_BACKUP_S3_BUCKET, etc. + +# MariaDB query to get total size of all databases in bytes +ALL_DB_SIZE_QUERY="SELECT SUM(data_length + index_length) FROM information_schema.TABLES;" +DUMP_SIZE_COEFF=5 +ERRORCOUNT=0 +CLUSTER_NAME=${CLUSTER_NAME_LABEL:-"mariadb-cluster"} +LOGICAL_BACKUP_PROVIDER=${LOGICAL_BACKUP_PROVIDER:="s3"} +LOGICAL_BACKUP_S3_RETENTION_TIME=${LOGICAL_BACKUP_S3_RETENTION_TIME:=""} +LOGICAL_BACKUP_S3_ENDPOINT=${LOGICAL_BACKUP_S3_ENDPOINT:-} +LOGICAL_BACKUP_S3_REGION=${LOGICAL_BACKUP_S3_REGION:-"us-west-1"} + +function estimate_size { + # Connects to MariaDB to calculate data size for S3 multipart upload optimization + mariadb -h "$MARIADB_HOST" -u "$MARIADB_USER" -p"$MARIADB_PASSWORD" \ + --skip-ssl -Nsr -e "${ALL_DB_SIZE_QUERY}" < /dev/null +} + +function dump { + echo "Taking dump from ${MARIADB_HOST} using mariadb-dump for database ${MARIADB_DATABASE}" >&2 + + # --all-databases: Backup everything + # --single-transaction: Ensure consistency for InnoDB without locking + # --quick: Stream output to save memory + # --routines: Include stored procedures + mariadb-dump -h "$MARIADB_HOST" -u "$MARIADB_USER" -p"$MARIADB_PASSWORD" -P "$MARIADB_PORT" \ + --single-transaction \ + --quick \ + --routines \ + --events \ + --insert-ignore \ + --verbose \ + "$MARIADB_DATABASE" +} + +function compress { + # Use pigz for multi-threaded compression if available, else gzip + command -v pigz >/dev/null 2>&1 && pigz || gzip + +} + +function generate_checksum { + local FILE_PATH="${1}" + local CHECKSUM_FILE="/tmp/checksum.sha1" + + echo "Generating SHA1 checksum for ${FILE_PATH}..." + sha1sum "${FILE_PATH}" | tee "${CHECKSUM_FILE}" +} + +function az_upload { + local FILE_PATH="${1}" + # Path: container/cluster-name/scope/logical_backups/timestamp.sql.gz + PATH_TO_BACKUP="${CLUSTER_NAME}/${LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX}/logical_backups/$(date +%s).sql.gz" + + echo "Uploading to Azure Blob Storage..." + az storage blob upload \ + --file "${FILE_PATH}" \ + --account-name "${LOGICAL_BACKUP_AZURE_STORAGE_ACCOUNT_NAME}" \ + --account-key "${LOGICAL_BACKUP_AZURE_STORAGE_ACCOUNT_KEY}" \ + --container-name "${LOGICAL_BACKUP_AZURE_STORAGE_CONTAINER}" \ + --name "${PATH_TO_BACKUP}" +} + +function setup_mc_alias { + local endpoint="${LOGICAL_BACKUP_S3_ENDPOINT:-https://s3.amazonaws.com}" + echo "Setting up MinIO Client alias..." + mc alias set minio_dest "$endpoint" "${AWS_ACCESS_KEY_ID}" "${AWS_SECRET_ACCESS_KEY}" +} + +function mc_delete_outdated { + if [[ -z "$LOGICAL_BACKUP_S3_RETENTION_TIME" ]] ; then + echo "No retention time configured; skipping cleanup." + return 0 + fi + + setup_mc_alias + + cutoff_timestamp=$(date -d "$LOGICAL_BACKUP_S3_RETENTION_TIME ago" +%s) + prefix="${CLUSTER_NAME}/${LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX}/logical_backups/" + bucket_path="minio_dest/${LOGICAL_BACKUP_S3_BUCKET}/${prefix}" + + mc ls --json "$bucket_path" | jq -r '.key' | awk -F/ '{print $NF}' | grep '\.sql\.gz$' | sort -n > /tmp/all-backups + + if [[ $(wc -l < /tmp/all-backups) -le 1 ]]; then + echo "1 or fewer backups found, skipping cleanup." + return 0 + fi + + awk -v cutoff="$cutoff_timestamp" -F. '{if ($1 < cutoff) print $0}' /tmp/all-backups > /tmp/outdated-backups + + most_recent=$(tail -n 1 /tmp/all-backups) + sed -i "\|${most_recent}|d" /tmp/outdated-backups + + count=$(wc -l < /tmp/outdated-backups) + if [[ $count -gt 0 ]]; then + echo "Deleting $count outdated backups created before $cutoff_timestamp" + for backup in $(cat /tmp/outdated-backups); do + mc rm "$bucket_path$backup" + done + fi +} + +function mc_upload { + local EXPECTED_SIZE="$1" + PATH_TO_BACKUP="minio_dest/${LOGICAL_BACKUP_S3_BUCKET}/${CLUSTER_NAME}/${LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX}/logical_backups/$(date +%s).sql.gz" + + setup_mc_alias + + echo "Uploading dump to MinIO: ${PATH_TO_BACKUP}" + mc pipe "$PATH_TO_BACKUP" +} + +function upload { + case $LOGICAL_BACKUP_PROVIDER in + "s3") + mc_upload $(($(estimate_size) / DUMP_SIZE_COEFF)) + mc_delete_outdated + ;; + "az") + # Azure requires a physical file for 'az storage blob upload' in this context + dump | compress > /tmp/mariadb-backup.sql.gz + generate_checksum /tmp/mariadb-backup.sql.gz + az_upload /tmp/mariadb-backup.sql.gz + rm /tmp/mariadb-backup.sql.gz + ;; + esac +} + +if [ "$LOGICAL_BACKUP_PROVIDER" == "az" ]; then + upload +else + + echo "Starting debug pipeline..." + dump 2> /tmp/dump_stderr.log | tee /tmp/raw_dump.sql | compress > /tmp/final_upload.sql.gz + + PIPELINE_STATUS=("${PIPESTATUS[@]}") + + generate_checksum /tmp/final_upload.sql.gz + cat /tmp/final_upload.sql.gz | upload + UPLOAD_EXIT_CODE=$? + + [[ ${PIPELINE_STATUS[0]} != 0 || ${PIPELINE_STATUS[1]} != 0 || ${PIPELINE_STATUS[2]} != 0 || ${UPLOAD_EXIT_CODE} != 0 ]] && (( ERRORCOUNT += 1 )) + exit $ERRORCOUNT +fi \ No newline at end of file diff --git a/postgres-logical-backup/Dockerfile b/postgres-logical-backup/Dockerfile index 7f8fd7ae..2d2cc5fd 100644 --- a/postgres-logical-backup/Dockerfile +++ b/postgres-logical-backup/Dockerfile @@ -25,6 +25,7 @@ RUN apt-get update \ && curl --silent https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - \ && apt-get update \ && apt-get install --no-install-recommends -y \ + postgresql-client-18 \ postgresql-client-17 \ postgresql-client-16 \ postgresql-client-15 \ diff --git a/postgres-logical-backup/dump.sh b/postgres-logical-backup/dump.sh index 82d473b9..4660a93b 100755 --- a/postgres-logical-backup/dump.sh +++ b/postgres-logical-backup/dump.sh @@ -11,19 +11,26 @@ PG_BIN=/usr/lib/postgresql/$PG_VERSION/bin DUMP_SIZE_COEFF=5 ERRORCOUNT=0 POSTGRES_OPERATOR=spilo +PGDATABASE=${PGDATABASE:-} LOGICAL_BACKUP_PROVIDER=${LOGICAL_BACKUP_PROVIDER:="s3"} LOGICAL_BACKUP_S3_RETENTION_TIME=${LOGICAL_BACKUP_S3_RETENTION_TIME:=""} LOGICAL_BACKUP_S3_ENDPOINT=${LOGICAL_BACKUP_S3_ENDPOINT:-} +if [[ "${USE_PG_DUMP:-}" == "true" ]]; then + BACKUP_EXTENSION="dump.gz" +else + BACKUP_EXTENSION="sql.gz" +fi + function estimate_size { "$PG_BIN"/psql -tqAc "${ALL_DB_SIZE_QUERY}" } function dump { - echo "Taking dump from ${PGHOST} using ${USE_PG_DUMP:-pg_dumpall}" + echo "Taking dump from ${PGHOST} using ${USE_PG_DUMP:-pg_dumpall}" >&2 if [[ "${USE_PG_DUMP:-}" == "true" ]]; then - "$PG_BIN"/pg_dump + "$PG_BIN"/pg_dump -Fc --dbname="$PGDATABASE" else "$PG_BIN"/pg_dumpall --exclude-database='postgres' fi @@ -34,7 +41,7 @@ function compress { } function az_upload { - PATH_TO_BACKUP="${LOGICAL_BACKUP_S3_BUCKET}/${POSTGRES_OPERATOR}/${LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX}/logical_backups/$(date +%s).sql.gz" + PATH_TO_BACKUP="${LOGICAL_BACKUP_S3_BUCKET}/${POSTGRES_OPERATOR}/${LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX}/logical_backups/$(date +%s).${BACKUP_EXTENSION}" az storage blob upload --file "${1}" --account-name "${LOGICAL_BACKUP_AZURE_STORAGE_ACCOUNT_NAME}" --account-key "${LOGICAL_BACKUP_AZURE_STORAGE_ACCOUNT_KEY}" -c "${LOGICAL_BACKUP_AZURE_STORAGE_CONTAINER}" -n "${PATH_TO_BACKUP}" } @@ -96,7 +103,7 @@ function aws_upload { # mimic bucket setup from Spilo # to keep logical backups at the same path as WAL # NB: $LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX already contains the leading "/" when set by the Postgres Operator - PATH_TO_BACKUP=s3://${LOGICAL_BACKUP_S3_BUCKET}"/"${POSTGRES_OPERATOR}"/"${LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX}"/logical_backups/"$(date +%s).sql.gz + PATH_TO_BACKUP=s3://${LOGICAL_BACKUP_S3_BUCKET}"/"${POSTGRES_OPERATOR}"/"${LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX}"/logical_backups/"$(date +%s).${BACKUP_EXTENSION} args=() @@ -109,7 +116,7 @@ function aws_upload { } function gcs_upload { - PATH_TO_BACKUP=gs://${LOGICAL_BACKUP_S3_BUCKET}"/"${POSTGRES_OPERATOR}"/"${LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX}"/logical_backups/"$(date +%s).sql.gz + PATH_TO_BACKUP=gs://${LOGICAL_BACKUP_S3_BUCKET}"/"${POSTGRES_OPERATOR}"/"${LOGICAL_BACKUP_S3_BUCKET_SCOPE_SUFFIX}"/logical_backups/"$(date +%s).${BACKUP_EXTENSION} gsutil -o Credentials:gs_service_key_file=${LOGICAL_BACKUP_GOOGLE_APPLICATION_CREDENTIALS} cp - "${PATH_TO_BACKUP}" } @@ -126,12 +133,13 @@ function upload { esac } -if [ "$LOGICAL_BACKUP_PROVIDER" == "az" ]; then - dump | compress > /tmp/azure-backup.sql.gz - az_upload /tmp/azure-backup.sql.gz +if [[ "$LOGICAL_BACKUP_PROVIDER" == "az" ]]; then + dump | compress > /tmp/azure-backup.${BACKUP_EXTENSION} + az_upload /tmp/azure-backup.${BACKUP_EXTENSION} + else dump | compress | upload [[ ${PIPESTATUS[0]} != 0 || ${PIPESTATUS[1]} != 0 || ${PIPESTATUS[2]} != 0 ]] && (( ERRORCOUNT += 1 )) set +x exit $ERRORCOUNT -fi \ No newline at end of file +fi