From f67021bcfce8c70406895f9d02547522073e4e3f Mon Sep 17 00:00:00 2001 From: Kyle Ferriter Date: Tue, 2 Sep 2025 20:53:19 -0400 Subject: [PATCH 1/7] First revision of github action builds for vrs-python images with slim seqrepo builds --- .github/workflows/build-seqrepo-slim.yaml | 85 +++++++++++++++++++++ misc/containers/Dockerfile | 92 +++++++++++++++++++++++ misc/containers/build-GRCh37.bash | 12 +++ misc/containers/build-GRCh38.bash | 12 +++ misc/containers/build-with-tar.sh | 62 +++++++++++++++ misc/containers/entrypoint.sh | 2 + 6 files changed, 265 insertions(+) create mode 100644 .github/workflows/build-seqrepo-slim.yaml create mode 100644 misc/containers/Dockerfile create mode 100644 misc/containers/build-GRCh37.bash create mode 100644 misc/containers/build-GRCh38.bash create mode 100755 misc/containers/build-with-tar.sh create mode 100644 misc/containers/entrypoint.sh diff --git a/.github/workflows/build-seqrepo-slim.yaml b/.github/workflows/build-seqrepo-slim.yaml new file mode 100644 index 00000000..c36907ce --- /dev/null +++ b/.github/workflows/build-seqrepo-slim.yaml @@ -0,0 +1,85 @@ +name: Build SeqRepo Slim Container + +on: + workflow_dispatch: + inputs: + assembly: + description: 'Assembly version to build' + required: false + default: 'GRCh38' + type: string + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }}/seqrepo-slim + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout vrs-python repository + uses: actions/checkout@v4 + with: + repository: ga4gh/vrs-python + fetch-depth: 1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + driver-opts: | + image=moby/buildkit:latest + + - name: Log in to Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push data stage + run: | + ASSEMBLY=${{ github.event.inputs.assembly || 'GRCh38' }} + cd misc/containers + docker build \ + --build-arg ASSEMBLY=${ASSEMBLY} \ + --target data \ + --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data \ + --cache-to type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data-cache,mode=max \ + -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data \ + -f Dockerfile . + docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data + + - name: Build and push build stage + run: | + ASSEMBLY=${{ github.event.inputs.assembly || 'GRCh38' }} + cd misc/containers + docker build \ + --build-arg ASSEMBLY=${ASSEMBLY} \ + --target build \ + --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data \ + --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build \ + --cache-to type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build-cache,mode=max \ + -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build \ + -f Dockerfile . + docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build + + - name: Build and push final stage + run: | + ASSEMBLY=${{ github.event.inputs.assembly || 'GRCh38' }} + cd misc/containers + docker build \ + --build-arg ASSEMBLY=${ASSEMBLY} \ + --target vrs-python \ + --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data \ + --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build \ + --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY} \ + --cache-to type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-cache,mode=max \ + -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY} \ + -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest \ + -f Dockerfile . + docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY} + docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest diff --git a/misc/containers/Dockerfile b/misc/containers/Dockerfile new file mode 100644 index 00000000..ab73fa4c --- /dev/null +++ b/misc/containers/Dockerfile @@ -0,0 +1,92 @@ +### +# podman build --arch linux/amd64,linux/arm64 --build-arg ASSEMBLY=GRCh38 -t docker.io/ga4gh/vrs-python:GRCh38 -f ./Dockerfile . +# podman build --arch linux/arm64 --build-arg ASSEMBLY=GRCh38 --target build -t docker.io/ga4gh/vrs-python:GRCh38-build -f ./Dockerfile . +### +# Data layer - downloads genomic reference files +FROM python:3.12-slim AS data + +# Either 'GRCh38' or 'GRCh37' +ARG ASSEMBLY="GRCh38" + +# Tell build-seqrepo where to put the data +ENV SEQREPO_ROOT_DIR=/seqrepo-${ASSEMBLY} + +# Install curl for downloading +RUN apt-get update && apt-get install -y curl + +WORKDIR /data + +# Download the appropriate genomic reference file based on assembly +RUN if [ "$ASSEMBLY" = "GRCh38" ]; then \ + curl -O https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.26_GRCh38/GCF_000001405.26_GRCh38_genomic.fna.gz; \ + elif [ "$ASSEMBLY" = "GRCh37" ]; then \ + curl -O https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.13_GRCh37/GCF_000001405.13_GRCh37_genomic.fna.gz; \ + else \ + echo "Unknown assembly: $ASSEMBLY" && exit 1; \ + fi + +# Builder image +FROM python:3.12-slim AS build + +# Either 'GRCh38' or 'GRCh37' +ARG ASSEMBLY="GRCh38" + +# Install packages needed for the build +RUN apt-get update && apt-get upgrade -y && apt-get install -y \ + curl \ + git \ + libpq-dev \ + python3-pip \ + python3-venv \ + tabix \ + rsync \ + zlib1g-dev \ + postgresql \ + unzip \ + libhts3 \ + ; + +WORKDIR /vrs-python + +# Copy downloaded genomic files from data layer +COPY --from=data /data/*.fna.gz /vrs-python/ + +# Setup the virtual env for vrs-python +RUN python3 -m venv /vrs-python/venv +ENV PATH=/vrs-python/venv/bin:$PATH + +# Tell build-seqrepo where to put the data +ENV SEQREPO_ROOT_DIR=/seqrepo-${ASSEMBLY} + +# Install vrs-python +RUN /vrs-python/venv/bin/python3 -m pip install -U setuptools +RUN /vrs-python/venv/bin/python3 -m pip install 'ga4gh.vrs[extras]' biocommons.seqrepo + +COPY build-${ASSEMBLY}.bash /vrs-python/build-seqrepo.bash +RUN bash /vrs-python/build-seqrepo.bash + +# Final image +FROM python:3.12-slim AS vrs-python +ARG ASSEMBLY +ENV ASSEMBLY=${ASSEMBLY} + +# Install runtime required packages +RUN apt-get update && apt-get install -y libpq-dev + +# Copy over artifacts from the builder +COPY --from=build /vrs-python /vrs-python +COPY --from=build /seqrepo-${ASSEMBLY} /seqrepo-${ASSEMBLY} + +# Copy over run script +COPY ./entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +# Set environment variables +ENV GA4GH_VRS_DATAPROXY_URI="seqrepo+file:///seqrepo-${ASSEMBLY}/master" +ENV SEQREPO_ROOT_DIR=/seqrepo-${ASSEMBLY} +ENV VIRTUAL_ENV=/vrs-python/venv +ENV PATH=/vrs-python/venv/bin:$PATH + +WORKDIR / + +ENTRYPOINT [ "/entrypoint.sh" ] diff --git a/misc/containers/build-GRCh37.bash b/misc/containers/build-GRCh37.bash new file mode 100644 index 00000000..41cec7a8 --- /dev/null +++ b/misc/containers/build-GRCh37.bash @@ -0,0 +1,12 @@ +#!/bin/bash + +if [ -z $SEQREPO_ROOT_DIR ]; then + echo "Must set SEQREPO_ROOT_DIR" + exit 1 +fi + +# Load reference genome from pre-downloaded file +# File should already be present from Docker data layer +seqrepo -r $SEQREPO_ROOT_DIR init +seqrepo -r $SEQREPO_ROOT_DIR load -n NCBI GCF_000001405.13_GRCh37_genomic.fna.gz +seqrepo -r $SEQREPO_ROOT_DIR add-assembly-names \ No newline at end of file diff --git a/misc/containers/build-GRCh38.bash b/misc/containers/build-GRCh38.bash new file mode 100644 index 00000000..d2150232 --- /dev/null +++ b/misc/containers/build-GRCh38.bash @@ -0,0 +1,12 @@ +#!/bin/bash + +if [ -z $SEQREPO_ROOT_DIR ]; then + echo "Must set SEQREPO_ROOT_DIR" + exit 1 +fi + +# Load reference genome from pre-downloaded file +# File should already be present from Docker data layer +seqrepo -r $SEQREPO_ROOT_DIR init +seqrepo -r $SEQREPO_ROOT_DIR load -n NCBI GCF_000001405.26_GRCh38_genomic.fna.gz +seqrepo -r $SEQREPO_ROOT_DIR add-assembly-names diff --git a/misc/containers/build-with-tar.sh b/misc/containers/build-with-tar.sh new file mode 100755 index 00000000..ab42b0f1 --- /dev/null +++ b/misc/containers/build-with-tar.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# Build script that creates a tar.gz with only necessary files for container build +# Usage: ./build-with-tar.sh [ASSEMBLY] + +set -e + +ASSEMBLY=${1:-GRCh38} +TAR_NAME="build-context.tar.gz" +BUILD_DIR="build-context" + +echo "Building container with assembly: $ASSEMBLY" +echo "Creating build context tar.gz..." + +# Clean up any existing build context +rm -rf "$BUILD_DIR" "$TAR_NAME" + +# Create build directory +mkdir -p "$BUILD_DIR/misc/containers" + +# Copy necessary files for the container build +echo "Copying files to build context..." + +# Container-specific files +cp misc/containers/Dockerfile "$BUILD_DIR/misc/containers/" +cp misc/containers/entrypoint.sh "$BUILD_DIR/misc/containers/" +cp misc/containers/build-${ASSEMBLY}.bash "$BUILD_DIR/misc/containers/" + +# Create the tar.gz +echo "Creating tar.gz..." +rm -rf "$TAR_NAME" +tar -czf "$TAR_NAME" -C "$BUILD_DIR" . + +# Clean up build directory +rm -rf "$BUILD_DIR" + +echo "Build context created: $TAR_NAME" + +# Detect container runtime +if command -v docker >/dev/null 2>&1; then + CONTAINER_CMD="docker" + echo "Using Docker for build..." +elif command -v podman >/dev/null 2>&1; then + CONTAINER_CMD="podman" + echo "Using Podman for build..." +else + echo "Error: Neither docker nor podman found in PATH" + exit 1 +fi + +# Run container build with the tar.gz as context +cat "$TAR_NAME" | $CONTAINER_CMD build \ + --arch linux/arm64,linux/amd64 \ + --build-arg ASSEMBLY="$ASSEMBLY" \ + --target build \ + -t docker.io/ga4gh/vrs-python:${ASSEMBLY}-build \ + -f ./misc/containers/Dockerfile + +# Clean up tar file +# rm -f "$TAR_NAME" + +echo "Build completed successfully!" diff --git a/misc/containers/entrypoint.sh b/misc/containers/entrypoint.sh new file mode 100644 index 00000000..1a745408 --- /dev/null +++ b/misc/containers/entrypoint.sh @@ -0,0 +1,2 @@ +#!/bin/sh +/vrs-python/venv/bin/vrs-annotate vcf --assembly ${ASSEMBLY} $@ From 6866994d184cdcf9f0879ba3f99ecb68f6720731 Mon Sep 17 00:00:00 2001 From: Kyle Ferriter Date: Tue, 2 Sep 2025 21:03:40 -0400 Subject: [PATCH 2/7] Remove explicit repo line --- .github/workflows/build-seqrepo-slim.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-seqrepo-slim.yaml b/.github/workflows/build-seqrepo-slim.yaml index c36907ce..dfff328e 100644 --- a/.github/workflows/build-seqrepo-slim.yaml +++ b/.github/workflows/build-seqrepo-slim.yaml @@ -24,7 +24,6 @@ jobs: - name: Checkout vrs-python repository uses: actions/checkout@v4 with: - repository: ga4gh/vrs-python fetch-depth: 1 - name: Set up Docker Buildx From fc67ffa40132e7cc11364d84461d4c8c495b0589 Mon Sep 17 00:00:00 2001 From: Kyle Ferriter Date: Tue, 2 Sep 2025 21:09:33 -0400 Subject: [PATCH 3/7] Remove cache-to --- .github/workflows/build-seqrepo-slim.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/build-seqrepo-slim.yaml b/.github/workflows/build-seqrepo-slim.yaml index dfff328e..d677115f 100644 --- a/.github/workflows/build-seqrepo-slim.yaml +++ b/.github/workflows/build-seqrepo-slim.yaml @@ -47,7 +47,6 @@ jobs: --build-arg ASSEMBLY=${ASSEMBLY} \ --target data \ --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data \ - --cache-to type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data-cache,mode=max \ -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data \ -f Dockerfile . docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data @@ -61,7 +60,6 @@ jobs: --target build \ --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data \ --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build \ - --cache-to type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build-cache,mode=max \ -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build \ -f Dockerfile . docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build @@ -76,7 +74,6 @@ jobs: --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data \ --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build \ --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY} \ - --cache-to type=registry,ref=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-cache,mode=max \ -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY} \ -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest \ -f Dockerfile . From d9627dd70f1b61866d46f020c298aa3f8e2a2940 Mon Sep 17 00:00:00 2001 From: Kyle Ferriter Date: Mon, 13 Oct 2025 22:48:39 -0400 Subject: [PATCH 4/7] Use the docker build action runner. Set some global vars in the action. Make the Dockerfile more generic. --- .github/workflows/build-seqrepo-slim.yaml | 79 +++++++++++++---------- docker-compose.yml | 2 +- misc/containers/Dockerfile | 33 +++++----- misc/containers/build-GRCh37.bash | 21 ++++-- misc/containers/build-GRCh38.bash | 23 +++++-- misc/containers/build-with-tar.sh | 16 ++++- 6 files changed, 110 insertions(+), 64 deletions(-) diff --git a/.github/workflows/build-seqrepo-slim.yaml b/.github/workflows/build-seqrepo-slim.yaml index d677115f..1196d745 100644 --- a/.github/workflows/build-seqrepo-slim.yaml +++ b/.github/workflows/build-seqrepo-slim.yaml @@ -20,6 +20,10 @@ jobs: contents: read packages: write + env: + ASSEMBLY: ${{ github.event.inputs.assembly || 'GRCh38' }} + BASE_TAG: ghcr.io/${{ github.repository }}/seqrepo-slim + steps: - name: Checkout vrs-python repository uses: actions/checkout@v4 @@ -40,42 +44,47 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push data stage - run: | - ASSEMBLY=${{ github.event.inputs.assembly || 'GRCh38' }} - cd misc/containers - docker build \ - --build-arg ASSEMBLY=${ASSEMBLY} \ - --target data \ - --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data \ - -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data \ - -f Dockerfile . - docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data + uses: docker/build-push-action@v5 + with: + context: misc/containers + file: misc/containers/Dockerfile + platforms: linux/amd64,linux/arm64 + target: data + build-args: | + ASSEMBLY=${{ env.ASSEMBLY }} + cache-from: type=registry,ref=${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-data + tags: ${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-data + push: true - name: Build and push build stage - run: | - ASSEMBLY=${{ github.event.inputs.assembly || 'GRCh38' }} - cd misc/containers - docker build \ - --build-arg ASSEMBLY=${ASSEMBLY} \ - --target build \ - --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data \ - --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build \ - -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build \ - -f Dockerfile . - docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build + uses: docker/build-push-action@v5 + with: + context: misc/containers + file: misc/containers/Dockerfile + platforms: linux/amd64,linux/arm64 + target: build + build-args: | + ASSEMBLY=${{ env.ASSEMBLY }} + cache-from: | + type=registry,ref=${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-data + type=registry,ref=${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-build + tags: ${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-build + push: true - name: Build and push final stage - run: | - ASSEMBLY=${{ github.event.inputs.assembly || 'GRCh38' }} - cd misc/containers - docker build \ - --build-arg ASSEMBLY=${ASSEMBLY} \ - --target vrs-python \ - --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-data \ - --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY}-build \ - --cache-from ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY} \ - -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY} \ - -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest \ - -f Dockerfile . - docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${ASSEMBLY} - docker push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest + uses: docker/build-push-action@v5 + with: + context: misc/containers + file: misc/containers/Dockerfile + platforms: linux/amd64,linux/arm64 + target: vrs-python + build-args: | + ASSEMBLY=${{ env.ASSEMBLY }} + cache-from: | + type=registry,ref=${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-data + type=registry,ref=${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-build + type=registry,ref=${{ env.BASE_TAG }}:${{ env.ASSEMBLY }} + tags: | + ${{ env.BASE_TAG }}:${{ env.ASSEMBLY }} + ${{ env.BASE_TAG }}:latest + push: true diff --git a/docker-compose.yml b/docker-compose.yml index 43d4348e..b11ceebc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,7 +26,7 @@ services: volumes: - uta_vol:/var/lib/postgresql/data ports: - - 5432:5432 + - 5433:5432 volumes: seqrepo_vol: diff --git a/misc/containers/Dockerfile b/misc/containers/Dockerfile index ab73fa4c..6c1c3150 100644 --- a/misc/containers/Dockerfile +++ b/misc/containers/Dockerfile @@ -12,18 +12,15 @@ ARG ASSEMBLY="GRCh38" ENV SEQREPO_ROOT_DIR=/seqrepo-${ASSEMBLY} # Install curl for downloading -RUN apt-get update && apt-get install -y curl +RUN apt-get update && apt-get install -y curl \ + && rm -rf /var/lib/apt/lists/* WORKDIR /data # Download the appropriate genomic reference file based on assembly -RUN if [ "$ASSEMBLY" = "GRCh38" ]; then \ - curl -O https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.26_GRCh38/GCF_000001405.26_GRCh38_genomic.fna.gz; \ - elif [ "$ASSEMBLY" = "GRCh37" ]; then \ - curl -O https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.13_GRCh37/GCF_000001405.13_GRCh37_genomic.fna.gz; \ - else \ - echo "Unknown assembly: $ASSEMBLY" && exit 1; \ - fi +COPY build-${ASSEMBLY}.bash /data/ +RUN . /data/build-${ASSEMBLY}.bash \ + && download_reference # Builder image FROM python:3.12-slim AS build @@ -41,15 +38,15 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y \ tabix \ rsync \ zlib1g-dev \ - postgresql \ + postgresql-client \ unzip \ libhts3 \ - ; + && rm -rf /var/lib/apt/lists/* WORKDIR /vrs-python # Copy downloaded genomic files from data layer -COPY --from=data /data/*.fna.gz /vrs-python/ +COPY --from=data /data/* /data/ # Setup the virtual env for vrs-python RUN python3 -m venv /vrs-python/venv @@ -59,19 +56,21 @@ ENV PATH=/vrs-python/venv/bin:$PATH ENV SEQREPO_ROOT_DIR=/seqrepo-${ASSEMBLY} # Install vrs-python -RUN /vrs-python/venv/bin/python3 -m pip install -U setuptools -RUN /vrs-python/venv/bin/python3 -m pip install 'ga4gh.vrs[extras]' biocommons.seqrepo +RUN /vrs-python/venv/bin/python3 -m pip install -U setuptools 'ga4gh.vrs[extras]' biocommons.seqrepo -COPY build-${ASSEMBLY}.bash /vrs-python/build-seqrepo.bash -RUN bash /vrs-python/build-seqrepo.bash +# Build the seqrepo data using provided function +COPY build-${ASSEMBLY}.bash /tmp/build-${ASSEMBLY}.bash +RUN cd /data && . /tmp/build-${ASSEMBLY}.bash \ + && build_seqrepo # Final image FROM python:3.12-slim AS vrs-python -ARG ASSEMBLY +ARG ASSEMBLY="GRCh38" ENV ASSEMBLY=${ASSEMBLY} # Install runtime required packages -RUN apt-get update && apt-get install -y libpq-dev +RUN apt-get update && apt-get install -y libpq-dev \ + && rm -rf /var/lib/apt/lists/* # Copy over artifacts from the builder COPY --from=build /vrs-python /vrs-python diff --git a/misc/containers/build-GRCh37.bash b/misc/containers/build-GRCh37.bash index 41cec7a8..830af257 100644 --- a/misc/containers/build-GRCh37.bash +++ b/misc/containers/build-GRCh37.bash @@ -1,12 +1,23 @@ #!/bin/bash +set -xeuo pipefail if [ -z $SEQREPO_ROOT_DIR ]; then echo "Must set SEQREPO_ROOT_DIR" exit 1 fi -# Load reference genome from pre-downloaded file -# File should already be present from Docker data layer -seqrepo -r $SEQREPO_ROOT_DIR init -seqrepo -r $SEQREPO_ROOT_DIR load -n NCBI GCF_000001405.13_GRCh37_genomic.fna.gz -seqrepo -r $SEQREPO_ROOT_DIR add-assembly-names \ No newline at end of file +reference_url=https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.13_GRCh37/GCF_000001405.13_GRCh37_genomic.fna.gz +reference_fname=$(basename $reference_url) + +download_reference() { + curl -O $reference_url + echo "$reference_url" +} + +build_seqrepo() { + # Load reference genome from pre-downloaded file + # File should already be present from Docker data layer + seqrepo -r $SEQREPO_ROOT_DIR init + seqrepo -r $SEQREPO_ROOT_DIR load -n NCBI $reference_fname + seqrepo -r $SEQREPO_ROOT_DIR add-assembly-names +} diff --git a/misc/containers/build-GRCh38.bash b/misc/containers/build-GRCh38.bash index d2150232..6f52ff14 100644 --- a/misc/containers/build-GRCh38.bash +++ b/misc/containers/build-GRCh38.bash @@ -1,12 +1,25 @@ #!/bin/bash +set -xeuo pipefail if [ -z $SEQREPO_ROOT_DIR ]; then echo "Must set SEQREPO_ROOT_DIR" exit 1 fi -# Load reference genome from pre-downloaded file -# File should already be present from Docker data layer -seqrepo -r $SEQREPO_ROOT_DIR init -seqrepo -r $SEQREPO_ROOT_DIR load -n NCBI GCF_000001405.26_GRCh38_genomic.fna.gz -seqrepo -r $SEQREPO_ROOT_DIR add-assembly-names +reference_url=https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.26_GRCh38/GCF_000001405.26_GRCh38_genomic.fna.gz +reference_fname=$(basename $reference_url) + +echo "PATH: $PATH" + +download_reference() { + curl -O $reference_url + echo "$reference_url" +} + +build_seqrepo() { + # Load reference genome from pre-downloaded file + # File should already be present from Docker data layer + seqrepo -r $SEQREPO_ROOT_DIR init + seqrepo -r $SEQREPO_ROOT_DIR load -n NCBI $reference_fname + seqrepo -r $SEQREPO_ROOT_DIR add-assembly-names +} diff --git a/misc/containers/build-with-tar.sh b/misc/containers/build-with-tar.sh index ab42b0f1..3d304585 100755 --- a/misc/containers/build-with-tar.sh +++ b/misc/containers/build-with-tar.sh @@ -49,11 +49,25 @@ else fi # Run container build with the tar.gz as context +cat "$TAR_NAME" | $CONTAINER_CMD build \ + --arch linux/arm64,linux/amd64 \ + --build-arg ASSEMBLY="$ASSEMBLY" \ + --target data \ + -t ghcr.io/theferrit32/vrs-python:${ASSEMBLY}-data \ + -f ./misc/containers/Dockerfile + +cat "$TAR_NAME" | $CONTAINER_CMD build \ + --arch linux/arm64,linux/amd64 \ + --build-arg ASSEMBLY="$ASSEMBLY" \ + --target build \ + -t ghcr.io/theferrit32/vrs-python:${ASSEMBLY}-build \ + -f ./misc/containers/Dockerfile + cat "$TAR_NAME" | $CONTAINER_CMD build \ --arch linux/arm64,linux/amd64 \ --build-arg ASSEMBLY="$ASSEMBLY" \ --target build \ - -t docker.io/ga4gh/vrs-python:${ASSEMBLY}-build \ + -t ghcr.io/theferrit32/vrs-python:${ASSEMBLY} \ -f ./misc/containers/Dockerfile # Clean up tar file From c2fec741ac13eca3cc4e5de5e587e81285347c65 Mon Sep 17 00:00:00 2001 From: Kyle Ferriter Date: Wed, 22 Oct 2025 14:41:43 -0400 Subject: [PATCH 5/7] add trigger to run on every release --- .github/workflows/build-seqrepo-slim.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build-seqrepo-slim.yaml b/.github/workflows/build-seqrepo-slim.yaml index 1196d745..ae42b6c0 100644 --- a/.github/workflows/build-seqrepo-slim.yaml +++ b/.github/workflows/build-seqrepo-slim.yaml @@ -1,6 +1,8 @@ name: Build SeqRepo Slim Container on: + release: + types: [published, created] workflow_dispatch: inputs: assembly: From 75a381bad7bd875b5fd749254aed29b4523531d9 Mon Sep 17 00:00:00 2001 From: Kyle Ferriter Date: Wed, 22 Oct 2025 15:03:39 -0400 Subject: [PATCH 6/7] Only run build on release publish --- .github/workflows/build-seqrepo-slim.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-seqrepo-slim.yaml b/.github/workflows/build-seqrepo-slim.yaml index ae42b6c0..c6fa5c7e 100644 --- a/.github/workflows/build-seqrepo-slim.yaml +++ b/.github/workflows/build-seqrepo-slim.yaml @@ -2,7 +2,7 @@ name: Build SeqRepo Slim Container on: release: - types: [published, created] + types: [published] workflow_dispatch: inputs: assembly: From 9647bf2db758d993b54a77d4ff9647bfc58e1e18 Mon Sep 17 00:00:00 2001 From: Kyle Ferriter Date: Wed, 22 Oct 2025 15:20:32 -0400 Subject: [PATCH 7/7] Add matrix for building both GRCh37 and GRCh38 --- .github/workflows/build-seqrepo-slim.yaml | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-seqrepo-slim.yaml b/.github/workflows/build-seqrepo-slim.yaml index c6fa5c7e..dd62392a 100644 --- a/.github/workflows/build-seqrepo-slim.yaml +++ b/.github/workflows/build-seqrepo-slim.yaml @@ -3,13 +3,6 @@ name: Build SeqRepo Slim Container on: release: types: [published] - workflow_dispatch: - inputs: - assembly: - description: 'Assembly version to build' - required: false - default: 'GRCh38' - type: string env: REGISTRY: ghcr.io @@ -22,8 +15,12 @@ jobs: contents: read packages: write + strategy: + matrix: + assembly: [GRCh38, GRCh37] + env: - ASSEMBLY: ${{ github.event.inputs.assembly || 'GRCh38' }} + ASSEMBLY: ${{ matrix.assembly }} BASE_TAG: ghcr.io/${{ github.repository }}/seqrepo-slim steps: