Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions .github/workflows/build-seqrepo-slim.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: Build SeqRepo Slim Container

on:
release:
types: [published]

env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}/seqrepo-slim

jobs:
build-and-push:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write

strategy:
matrix:
assembly: [GRCh38, GRCh37]

env:
ASSEMBLY: ${{ matrix.assembly }}
BASE_TAG: ghcr.io/${{ github.repository }}/seqrepo-slim

steps:
- name: Checkout vrs-python repository
uses: actions/checkout@v4
with:
fetch-depth: 1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
driver-opts: |
image=moby/buildkit:latest

- name: Log in to Container Registry
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push data stage
uses: docker/build-push-action@v5
with:
context: misc/containers
file: misc/containers/Dockerfile
platforms: linux/amd64,linux/arm64
target: data
build-args: |
ASSEMBLY=${{ env.ASSEMBLY }}
cache-from: type=registry,ref=${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-data
tags: ${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-data
push: true

- name: Build and push build stage
uses: docker/build-push-action@v5
with:
context: misc/containers
file: misc/containers/Dockerfile
platforms: linux/amd64,linux/arm64
target: build
build-args: |
ASSEMBLY=${{ env.ASSEMBLY }}
cache-from: |
type=registry,ref=${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-data
type=registry,ref=${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-build
tags: ${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-build
push: true

- name: Build and push final stage
uses: docker/build-push-action@v5
with:
context: misc/containers
file: misc/containers/Dockerfile
platforms: linux/amd64,linux/arm64
target: vrs-python
build-args: |
ASSEMBLY=${{ env.ASSEMBLY }}
cache-from: |
type=registry,ref=${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-data
type=registry,ref=${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}-build
type=registry,ref=${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}
tags: |
${{ env.BASE_TAG }}:${{ env.ASSEMBLY }}
${{ env.BASE_TAG }}:latest
push: true
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ services:
volumes:
- uta_vol:/var/lib/postgresql/data
ports:
- 5432:5432
- 5433:5432

volumes:
seqrepo_vol:
Expand Down
91 changes: 91 additions & 0 deletions misc/containers/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
###
# podman build --arch linux/amd64,linux/arm64 --build-arg ASSEMBLY=GRCh38 -t docker.io/ga4gh/vrs-python:GRCh38 -f ./Dockerfile .
# podman build --arch linux/arm64 --build-arg ASSEMBLY=GRCh38 --target build -t docker.io/ga4gh/vrs-python:GRCh38-build -f ./Dockerfile .
###
# Data layer - downloads genomic reference files
FROM python:3.12-slim AS data

# Either 'GRCh38' or 'GRCh37'
ARG ASSEMBLY="GRCh38"

# Tell build-seqrepo where to put the data
ENV SEQREPO_ROOT_DIR=/seqrepo-${ASSEMBLY}

# Install curl for downloading
RUN apt-get update && apt-get install -y curl \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /data

# Download the appropriate genomic reference file based on assembly
COPY build-${ASSEMBLY}.bash /data/
RUN . /data/build-${ASSEMBLY}.bash \
&& download_reference

# Builder image
FROM python:3.12-slim AS build

# Either 'GRCh38' or 'GRCh37'
ARG ASSEMBLY="GRCh38"

# Install packages needed for the build
RUN apt-get update && apt-get upgrade -y && apt-get install -y \
curl \
git \
libpq-dev \
python3-pip \
python3-venv \
tabix \
rsync \
zlib1g-dev \
postgresql-client \
unzip \
libhts3 \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /vrs-python

# Copy downloaded genomic files from data layer
COPY --from=data /data/* /data/

# Setup the virtual env for vrs-python
RUN python3 -m venv /vrs-python/venv
ENV PATH=/vrs-python/venv/bin:$PATH

# Tell build-seqrepo where to put the data
ENV SEQREPO_ROOT_DIR=/seqrepo-${ASSEMBLY}

# Install vrs-python
RUN /vrs-python/venv/bin/python3 -m pip install -U setuptools 'ga4gh.vrs[extras]' biocommons.seqrepo

# Build the seqrepo data using provided function
COPY build-${ASSEMBLY}.bash /tmp/build-${ASSEMBLY}.bash
RUN cd /data && . /tmp/build-${ASSEMBLY}.bash \
&& build_seqrepo

# Final image
FROM python:3.12-slim AS vrs-python
ARG ASSEMBLY="GRCh38"
ENV ASSEMBLY=${ASSEMBLY}

# Install runtime required packages
RUN apt-get update && apt-get install -y libpq-dev \
&& rm -rf /var/lib/apt/lists/*

# Copy over artifacts from the builder
COPY --from=build /vrs-python /vrs-python
COPY --from=build /seqrepo-${ASSEMBLY} /seqrepo-${ASSEMBLY}

# Copy over run script
COPY ./entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

# Set environment variables
ENV GA4GH_VRS_DATAPROXY_URI="seqrepo+file:///seqrepo-${ASSEMBLY}/master"
ENV SEQREPO_ROOT_DIR=/seqrepo-${ASSEMBLY}
ENV VIRTUAL_ENV=/vrs-python/venv
ENV PATH=/vrs-python/venv/bin:$PATH

WORKDIR /

ENTRYPOINT [ "/entrypoint.sh" ]
23 changes: 23 additions & 0 deletions misc/containers/build-GRCh37.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash
set -xeuo pipefail

if [ -z $SEQREPO_ROOT_DIR ]; then
echo "Must set SEQREPO_ROOT_DIR"
exit 1
fi

reference_url=https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.13_GRCh37/GCF_000001405.13_GRCh37_genomic.fna.gz
reference_fname=$(basename $reference_url)

download_reference() {
curl -O $reference_url
echo "$reference_url"
}

build_seqrepo() {
# Load reference genome from pre-downloaded file
# File should already be present from Docker data layer
seqrepo -r $SEQREPO_ROOT_DIR init
seqrepo -r $SEQREPO_ROOT_DIR load -n NCBI $reference_fname
seqrepo -r $SEQREPO_ROOT_DIR add-assembly-names
}
25 changes: 25 additions & 0 deletions misc/containers/build-GRCh38.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash
set -xeuo pipefail

if [ -z $SEQREPO_ROOT_DIR ]; then
echo "Must set SEQREPO_ROOT_DIR"
exit 1
fi

reference_url=https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.26_GRCh38/GCF_000001405.26_GRCh38_genomic.fna.gz
reference_fname=$(basename $reference_url)

echo "PATH: $PATH"

download_reference() {
curl -O $reference_url
echo "$reference_url"
}

build_seqrepo() {
# Load reference genome from pre-downloaded file
# File should already be present from Docker data layer
seqrepo -r $SEQREPO_ROOT_DIR init
seqrepo -r $SEQREPO_ROOT_DIR load -n NCBI $reference_fname
seqrepo -r $SEQREPO_ROOT_DIR add-assembly-names
}
76 changes: 76 additions & 0 deletions misc/containers/build-with-tar.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash

# Build script that creates a tar.gz with only necessary files for container build
# Usage: ./build-with-tar.sh [ASSEMBLY]

set -e

ASSEMBLY=${1:-GRCh38}
TAR_NAME="build-context.tar.gz"
BUILD_DIR="build-context"

echo "Building container with assembly: $ASSEMBLY"
echo "Creating build context tar.gz..."

# Clean up any existing build context
rm -rf "$BUILD_DIR" "$TAR_NAME"

# Create build directory
mkdir -p "$BUILD_DIR/misc/containers"

# Copy necessary files for the container build
echo "Copying files to build context..."

# Container-specific files
cp misc/containers/Dockerfile "$BUILD_DIR/misc/containers/"
cp misc/containers/entrypoint.sh "$BUILD_DIR/misc/containers/"
cp misc/containers/build-${ASSEMBLY}.bash "$BUILD_DIR/misc/containers/"

# Create the tar.gz
echo "Creating tar.gz..."
rm -rf "$TAR_NAME"
tar -czf "$TAR_NAME" -C "$BUILD_DIR" .

# Clean up build directory
rm -rf "$BUILD_DIR"

echo "Build context created: $TAR_NAME"

# Detect container runtime
if command -v docker >/dev/null 2>&1; then
CONTAINER_CMD="docker"
echo "Using Docker for build..."
elif command -v podman >/dev/null 2>&1; then
CONTAINER_CMD="podman"
echo "Using Podman for build..."
else
echo "Error: Neither docker nor podman found in PATH"
exit 1
fi

# Run container build with the tar.gz as context
cat "$TAR_NAME" | $CONTAINER_CMD build \
--arch linux/arm64,linux/amd64 \
--build-arg ASSEMBLY="$ASSEMBLY" \
--target data \
-t ghcr.io/theferrit32/vrs-python:${ASSEMBLY}-data \
-f ./misc/containers/Dockerfile

cat "$TAR_NAME" | $CONTAINER_CMD build \
--arch linux/arm64,linux/amd64 \
--build-arg ASSEMBLY="$ASSEMBLY" \
--target build \
-t ghcr.io/theferrit32/vrs-python:${ASSEMBLY}-build \
-f ./misc/containers/Dockerfile

cat "$TAR_NAME" | $CONTAINER_CMD build \
--arch linux/arm64,linux/amd64 \
--build-arg ASSEMBLY="$ASSEMBLY" \
--target build \
-t ghcr.io/theferrit32/vrs-python:${ASSEMBLY} \
-f ./misc/containers/Dockerfile

# Clean up tar file
# rm -f "$TAR_NAME"

echo "Build completed successfully!"
2 changes: 2 additions & 0 deletions misc/containers/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/sh
/vrs-python/venv/bin/vrs-annotate vcf --assembly ${ASSEMBLY} $@
Loading