-
Notifications
You must be signed in to change notification settings - Fork 95
AWS maintenance #1810
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
AWS maintenance #1810
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,252 @@ | ||||||||
| name: Upload to S3 | ||||||||
|
|
||||||||
| on: | ||||||||
| workflow_dispatch: | ||||||||
| inputs: | ||||||||
| release_tag: | ||||||||
| description: "Tag of the draft release holding the file to upload" | ||||||||
| required: true | ||||||||
| type: string | ||||||||
| s3_destination: | ||||||||
| description: "S3 path within the stdpopsim bucket (e.g. annotations/HomSap/file.tar.gz)" | ||||||||
| required: true | ||||||||
| type: string | ||||||||
| expected_sha256: | ||||||||
| description: "Expected SHA256 checksum of the file" | ||||||||
| required: true | ||||||||
| type: string | ||||||||
| species_id: | ||||||||
| description: "Species ID (e.g. HomSap)" | ||||||||
| required: true | ||||||||
| type: string | ||||||||
| resource_type: | ||||||||
| description: "Resource type: genetic_map or annotation" | ||||||||
| required: true | ||||||||
| type: string | ||||||||
| resource_id: | ||||||||
| description: "Resource ID as defined in the catalog (e.g. HapMapII_GRCh38)" | ||||||||
| required: true | ||||||||
| type: string | ||||||||
| dry_run: | ||||||||
| description: "If true, run all validation but skip the actual S3 upload" | ||||||||
| required: false | ||||||||
| type: boolean | ||||||||
| default: false | ||||||||
|
|
||||||||
| concurrency: | ||||||||
| group: s3-upload | ||||||||
| cancel-in-progress: false | ||||||||
|
|
||||||||
| jobs: | ||||||||
| upload: | ||||||||
| runs-on: ubuntu-latest | ||||||||
| environment: s3-upload | ||||||||
| permissions: | ||||||||
| contents: write # needed to download release assets and delete releases | ||||||||
| steps: | ||||||||
| - name: Validate inputs | ||||||||
| run: | | ||||||||
| DEST="${{ inputs.s3_destination }}" | ||||||||
| TYPE="${{ inputs.resource_type }}" | ||||||||
|
|
||||||||
| # Resource type must be genetic_map or annotation | ||||||||
| if [[ "$TYPE" != "genetic_map" && "$TYPE" != "annotation" ]]; then | ||||||||
| echo "ERROR: resource_type must be 'genetic_map' or 'annotation', got: $TYPE" | ||||||||
| exit 1 | ||||||||
| fi | ||||||||
|
|
||||||||
| # S3 destination must start with genetic_maps/ or annotations/ | ||||||||
| if [[ ! "$DEST" =~ ^(genetic_maps|annotations)/ ]]; then | ||||||||
| echo "ERROR: S3 destination must start with 'genetic_maps/' or 'annotations/'" | ||||||||
| exit 1 | ||||||||
| fi | ||||||||
|
|
||||||||
| # S3 destination must end with .tar.gz or .tgz | ||||||||
| if [[ ! "$DEST" =~ \.(tar\.gz|tgz)$ ]]; then | ||||||||
| echo "ERROR: S3 destination must end with .tar.gz or .tgz" | ||||||||
| exit 1 | ||||||||
| fi | ||||||||
|
|
||||||||
| echo "Input validation passed." | ||||||||
| echo " Species: ${{ inputs.species_id }}" | ||||||||
| echo " Type: ${{ inputs.resource_type }}" | ||||||||
| echo " Resource: ${{ inputs.resource_id }}" | ||||||||
| echo " S3 dest: $DEST" | ||||||||
|
|
||||||||
| - name: Checkout repository | ||||||||
| uses: actions/checkout@v4 | ||||||||
|
|
||||||||
| - name: Set up Python | ||||||||
| uses: actions/setup-python@v5 | ||||||||
| with: | ||||||||
| python-version: "3.12" | ||||||||
|
|
||||||||
| - name: Install stdpopsim | ||||||||
| run: | | ||||||||
| pip install -e . | ||||||||
|
|
||||||||
| - name: Validate against catalog | ||||||||
| run: | | ||||||||
| python3 << 'PYEOF' | ||||||||
| import stdpopsim | ||||||||
| import sys | ||||||||
| import re | ||||||||
|
|
||||||||
| species_id = "${{ inputs.species_id }}" | ||||||||
| resource_type = "${{ inputs.resource_type }}" | ||||||||
| resource_id = "${{ inputs.resource_id }}" | ||||||||
| s3_dest = "${{ inputs.s3_destination }}" | ||||||||
| expected_sha256 = "${{ inputs.expected_sha256 }}" | ||||||||
|
|
||||||||
| # Verify species exists | ||||||||
| try: | ||||||||
| species = stdpopsim.get_species(species_id) | ||||||||
| except (ValueError, KeyError): | ||||||||
| available = [s.id for s in stdpopsim.all_species()] | ||||||||
| print(f"ERROR: Species '{species_id}' not found in catalog.") | ||||||||
| print(f"Available species: {available}") | ||||||||
| sys.exit(1) | ||||||||
|
|
||||||||
| # Verify resource exists and extract expected URL/SHA256 | ||||||||
| if resource_type == "genetic_map": | ||||||||
| resources = {gm.id: gm for gm in species.genetic_maps} | ||||||||
| if resource_id not in resources: | ||||||||
| print(f"ERROR: Genetic map '{resource_id}' not found for {species_id}.") | ||||||||
| print(f"Available genetic maps: {list(resources.keys())}") | ||||||||
| sys.exit(1) | ||||||||
| resource = resources[resource_id] | ||||||||
| catalog_url = resource.url | ||||||||
| catalog_sha256 = resource.sha256 | ||||||||
|
|
||||||||
| elif resource_type == "annotation": | ||||||||
| resources = {a.id: a for a in species.annotations} | ||||||||
| if resource_id not in resources: | ||||||||
| print(f"ERROR: Annotation '{resource_id}' not found for {species_id}.") | ||||||||
| print(f"Available annotations: {list(resources.keys())}") | ||||||||
| sys.exit(1) | ||||||||
| resource = resources[resource_id] | ||||||||
| catalog_url = resource.intervals_url | ||||||||
| catalog_sha256 = resource.intervals_sha256 | ||||||||
|
|
||||||||
| # Verify the S3 destination matches what the catalog expects | ||||||||
| m = re.match(r"https://stdpopsim\.s3[.-]us-west-2\.amazonaws\.com/(.*)", catalog_url) | ||||||||
| if not m: | ||||||||
| print(f"ERROR: Could not parse S3 URL from catalog: {catalog_url}") | ||||||||
| sys.exit(1) | ||||||||
|
|
||||||||
| catalog_s3_dest = m.group(1) | ||||||||
| if catalog_s3_dest != s3_dest: | ||||||||
| print(f"ERROR: S3 destination mismatch!") | ||||||||
| print(f" Provided: {s3_dest}") | ||||||||
| print(f" Catalog expects: {catalog_s3_dest}") | ||||||||
| sys.exit(1) | ||||||||
|
|
||||||||
| # Verify SHA256 matches what the catalog expects | ||||||||
| if catalog_sha256 != expected_sha256: | ||||||||
| print(f"ERROR: SHA256 mismatch with catalog!") | ||||||||
| print(f" Provided: {expected_sha256}") | ||||||||
| print(f" Catalog expects: {catalog_sha256}") | ||||||||
| sys.exit(1) | ||||||||
|
|
||||||||
| print("Catalog validation passed!") | ||||||||
| print(f" Species: {species_id} ({species.name})") | ||||||||
| print(f" Type: {resource_type}") | ||||||||
| print(f" Resource: {resource_id}") | ||||||||
| print(f" URL: {catalog_url}") | ||||||||
| print(f" SHA256: {catalog_sha256}") | ||||||||
| PYEOF | ||||||||
|
|
||||||||
| - name: Download asset from draft release | ||||||||
| env: | ||||||||
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||||||
| run: | | ||||||||
| echo "Downloading asset from release: ${{ inputs.release_tag }}" | ||||||||
| gh release download "${{ inputs.release_tag }}" \ | ||||||||
| --repo "${{ github.repository }}" \ | ||||||||
| --dir ./download | ||||||||
|
|
||||||||
| # Expect exactly one file | ||||||||
| FILE_COUNT=$(find ./download -type f | wc -l) | ||||||||
| if [ "$FILE_COUNT" -ne 1 ]; then | ||||||||
| echo "ERROR: Expected exactly 1 asset, found $FILE_COUNT" | ||||||||
| exit 1 | ||||||||
| fi | ||||||||
|
|
||||||||
| DOWNLOADED_FILE=$(find ./download -type f) | ||||||||
| echo "Downloaded: $DOWNLOADED_FILE" | ||||||||
| echo "DOWNLOADED_FILE=$DOWNLOADED_FILE" >> "$GITHUB_ENV" | ||||||||
|
|
||||||||
| - name: Verify SHA256 | ||||||||
| run: | | ||||||||
| ACTUAL_SHA256=$(sha256sum "$DOWNLOADED_FILE" | awk '{print $1}') | ||||||||
| EXPECTED="${{ inputs.expected_sha256 }}" | ||||||||
|
|
||||||||
| echo "Expected SHA256: $EXPECTED" | ||||||||
| echo "Actual SHA256: $ACTUAL_SHA256" | ||||||||
|
|
||||||||
| if [ "$ACTUAL_SHA256" != "$EXPECTED" ]; then | ||||||||
| echo "ERROR: SHA256 mismatch!" | ||||||||
| exit 1 | ||||||||
| fi | ||||||||
|
|
||||||||
| echo "SHA256 verified." | ||||||||
|
|
||||||||
| - name: Check if S3 object already exists | ||||||||
| if: ${{ inputs.dry_run != true }} | ||||||||
| env: | ||||||||
| AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||||||||
| AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||||||||
| AWS_DEFAULT_REGION: us-west-2 | ||||||||
| run: | | ||||||||
| DEST="${{ inputs.s3_destination }}" | ||||||||
| if aws s3api head-object --bucket stdpopsim --key "$DEST" 2>/dev/null; then | ||||||||
| echo "ERROR: s3://stdpopsim/$DEST already exists. Refusing to overwrite." | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
| echo "If you need to replace this file, delete it from S3 first." | ||||||||
| exit 1 | ||||||||
| fi | ||||||||
| echo "Confirmed: s3://stdpopsim/$DEST does not exist yet." | ||||||||
|
|
||||||||
| - name: Upload to S3 | ||||||||
| if: ${{ inputs.dry_run != true }} | ||||||||
| env: | ||||||||
| AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||||||||
| AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||||||||
| AWS_DEFAULT_REGION: us-west-2 | ||||||||
| run: | | ||||||||
| DEST="${{ inputs.s3_destination }}" | ||||||||
| echo "Uploading to s3://stdpopsim/$DEST" | ||||||||
| aws s3 cp "$DOWNLOADED_FILE" "s3://stdpopsim/$DEST" | ||||||||
| echo "" | ||||||||
| echo "Upload complete." | ||||||||
| echo "S3 URL: https://stdpopsim.s3-us-west-2.amazonaws.com/$DEST" | ||||||||
| echo "SHA256: ${{ inputs.expected_sha256 }}" | ||||||||
|
|
||||||||
| - name: Dry run summary | ||||||||
| if: ${{ inputs.dry_run == true }} | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If someone runs |
||||||||
| run: | | ||||||||
| echo "=== DRY RUN COMPLETE ===" | ||||||||
| echo "All validation passed. Skipped S3 upload." | ||||||||
| echo "" | ||||||||
| echo " Species: ${{ inputs.species_id }}" | ||||||||
| echo " Type: ${{ inputs.resource_type }}" | ||||||||
| echo " Resource: ${{ inputs.resource_id }}" | ||||||||
| echo " S3 dest: s3://stdpopsim/${{ inputs.s3_destination }}" | ||||||||
| echo " SHA256: ${{ inputs.expected_sha256 }}" | ||||||||
| echo "" | ||||||||
| echo "To perform the actual upload, re-run without --dry-run." | ||||||||
|
|
||||||||
| - name: Clean up draft release | ||||||||
| if: always() | ||||||||
| env: | ||||||||
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||||||||
| run: | | ||||||||
| TAG="${{ inputs.release_tag }}" | ||||||||
| echo "Cleaning up draft release: $TAG" | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should at least validate the form of |
||||||||
|
|
||||||||
| # Delete the release | ||||||||
| gh release delete "$TAG" \ | ||||||||
| --repo "${{ github.repository }}" \ | ||||||||
| --yes \ | ||||||||
| --cleanup-tag 2>/dev/null || echo "Warning: could not delete release $TAG" | ||||||||
|
|
||||||||
| echo "Cleanup complete." | ||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about we also validate it's of the form
_vN.tar.gz(as required)?