diff --git a/pipeline_versions.txt b/pipeline_versions.txt index 0232abd068..d87ec0f22d 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -1,4 +1,4 @@ -ArrayImputationQC 1.2.2 2025-10-07 +ArrayImputationQC 1.2.3 2025-10-17 ArrayImputationQuotaConsumed 1.1.0 2025-09-29 BuildIndices 5.0.0 2025-09-30 CramToUnmappedBams 1.1.3 2024-08-02 @@ -6,7 +6,7 @@ ExomeGermlineSingleSample 3.2.6 2025-10-09 ExomeReprocessing 3.3.6 2025-10-09 IlluminaGenotypingArray 1.12.26 2025-10-09 Imputation 1.1.23 2025-10-03 -ImputationBeagle 2.2.3 2025-10-07 +ImputationBeagle 2.2.4 2025-10-17 JointGenotyping 1.7.3 2025-08-11 MultiSampleSmartSeq2SingleNucleus 2.2.2 2025-06-20 Multiome 6.1.3 2025-08-15 @@ -17,7 +17,7 @@ Pipeline Name Version Date of Last Commit RNAWithUMIsPipeline 1.0.19 2025-08-11 ReblockGVCF 2.4.3 2025-10-09 SlideSeq 3.6.3 2025-06-20 -SlideTags 1.0.4 2025-10-03 +SlideTags 1.0.5 2025-10-24 UltimaGenomicsJointGenotyping 1.2.3 2025-08-11 UltimaGenomicsWholeGenomeCramOnly 1.1.2 2025-10-09 UltimaGenomicsWholeGenomeGermline 1.2.1 2025-10-09 diff --git a/pipelines/wdl/arrays/imputation_beagle/ImputationBeagle.changelog.md b/pipelines/wdl/arrays/imputation_beagle/ImputationBeagle.changelog.md index 6eecffc173..8f1b7d632d 100644 --- a/pipelines/wdl/arrays/imputation_beagle/ImputationBeagle.changelog.md +++ b/pipelines/wdl/arrays/imputation_beagle/ImputationBeagle.changelog.md @@ -1,3 +1,8 @@ +# 2.2.4 +2025-10-17 (Date of Last Commit) + +* Update input_qc_version to 1.2.3 to match latest changes in InputQC wdl + # 2.2.3 2025-10-07 (Date of Last Commit) diff --git a/pipelines/wdl/arrays/imputation_beagle/ImputationBeagle.wdl b/pipelines/wdl/arrays/imputation_beagle/ImputationBeagle.wdl index 76ce0e276e..a142002786 100644 --- a/pipelines/wdl/arrays/imputation_beagle/ImputationBeagle.wdl +++ b/pipelines/wdl/arrays/imputation_beagle/ImputationBeagle.wdl @@ -5,8 +5,8 @@ import "../../../../tasks/wdl/ImputationTasks.wdl" as tasks import "../../../../tasks/wdl/ImputationBeagleTasks.wdl" as beagleTasks workflow ImputationBeagle { - String pipeline_version = "2.2.3" - String input_qc_version = "1.2.2" + String pipeline_version = "2.2.4" + String input_qc_version = "1.2.3" String quota_consumed_version = "1.1.0" input { diff --git a/pipelines/wdl/arrays/imputation_beagle/input_qc/ArrayImputationQC.changelog.md b/pipelines/wdl/arrays/imputation_beagle/input_qc/ArrayImputationQC.changelog.md index 1fc5542b09..c6607262de 100644 --- a/pipelines/wdl/arrays/imputation_beagle/input_qc/ArrayImputationQC.changelog.md +++ b/pipelines/wdl/arrays/imputation_beagle/input_qc/ArrayImputationQC.changelog.md @@ -1,3 +1,8 @@ +# 1.2.3 +2025-10-17 (Date of Last Commit) + +* Update QcChecks task to improve error messages. + # 1.2.2 2025-10-07 (Date of Last Commit) @@ -6,7 +11,7 @@ # 1.2.1 2025-10-01 (Date of Last Commit) -* Add check for vcf being bgzipped and in sorted order +* Add check for vcf being bgzipped and in sorted order. # 1.2.0 2025-09-29 (Date of Last Commit) @@ -21,12 +26,12 @@ # 1.0.3 2025-09-15 (Date of Last Commit) -* Add instruction for updating ImputationBeagle wdl when this workflow's pipeline_version changes for improved version tracking across wdls +* Add instruction for updating ImputationBeagle wdl when this workflow's pipeline_version changes for improved version tracking across wdls. # 1.0.2 2025-09-03 (Date of Last Commit) -* Add optional pipeline_header_line input to match beagle imputation pipeline inputs +* Add optional pipeline_header_line input to match beagle imputation pipeline inputs. # 1.0.1 2025-08-26 (Date of Last Commit) diff --git a/pipelines/wdl/arrays/imputation_beagle/input_qc/ArrayImputationQC.wdl b/pipelines/wdl/arrays/imputation_beagle/input_qc/ArrayImputationQC.wdl index 2e0d55e5aa..e553d53e1d 100644 --- a/pipelines/wdl/arrays/imputation_beagle/input_qc/ArrayImputationQC.wdl +++ b/pipelines/wdl/arrays/imputation_beagle/input_qc/ArrayImputationQC.wdl @@ -4,7 +4,7 @@ import "../../../../../tasks/wdl/ImputationBeagleQcTasks.wdl" as tasks workflow InputQC { # if this changes, update the input_qc_version value in ImputationBeagle.wdl - String pipeline_version = "1.2.2" + String pipeline_version = "1.2.3" input { diff --git a/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/NA12878_chr20_qc_fails_no_valid_contigs.json b/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HDGP1000G_500_chr20_qc_fails_no_valid_contigs.json similarity index 100% rename from pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/NA12878_chr20_qc_fails_no_valid_contigs.json rename to pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HDGP1000G_500_chr20_qc_fails_no_valid_contigs.json diff --git a/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/NA12878_qc_fails_not_bgzipped.json b/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HDGP1000G_500_qc_fails_not_bgzipped.json similarity index 100% rename from pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/NA12878_qc_fails_not_bgzipped.json rename to pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HDGP1000G_500_qc_fails_not_bgzipped.json diff --git a/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/NA12878_qc_fails_v3.json b/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HDGP1000G_500_qc_fails_v3.json similarity index 100% rename from pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/NA12878_qc_fails_v3.json rename to pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HDGP1000G_500_qc_fails_v3.json diff --git a/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/NA12878_chr20_qc_succeeds.json b/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HGDP1000G_500_chr20_qc_succeeds.json similarity index 100% rename from pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/NA12878_chr20_qc_succeeds.json rename to pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HGDP1000G_500_chr20_qc_succeeds.json diff --git a/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/NA12878_qc_fails_bad_contig_header_length.json b/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HGDP1000G_500_qc_fails_bad_contig_header_length.json similarity index 100% rename from pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/NA12878_qc_fails_bad_contig_header_length.json rename to pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HGDP1000G_500_qc_fails_bad_contig_header_length.json diff --git a/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HGDP1000G_500_qc_fails_multiple_errors.json b/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HGDP1000G_500_qc_fails_multiple_errors.json new file mode 100644 index 0000000000..c3965ce0c7 --- /dev/null +++ b/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HGDP1000G_500_qc_fails_multiple_errors.json @@ -0,0 +1,8 @@ +{ + "ArrayImputationQC.multi_sample_vcf": "gs://broad-gotc-test-storage/imputation_beagle/input_qc/scientific/vcfs/test_qc_v3_no_data_not_bgzipped.vcf.gz", + "ArrayImputationQC.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ArrayImputationQC.reference_panel_path_prefix": "gs://broad-gotc-test-storage/imputation_beagle/scientific/1000G_HGDP_no_singletons_reference_panel/hgdp.tgp.gwaspy.AN_added.bcf.ac2", + "ArrayImputationQC.contigs": ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22"], + "ArrayImputationQC.genetic_maps_path": "gs://broad-gotc-test-storage/imputation_beagle/scientific/plink-genetic-maps/", + "ArrayImputationQC.output_basename": "plumbing_test_fail_multiple_errors" +} diff --git a/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/NA12878_qc_fails_wrong_contig_name_header.json b/pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HGDP1000G_500_qc_fails_wrong_contig_name_header.json similarity index 100% rename from pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/NA12878_qc_fails_wrong_contig_name_header.json rename to pipelines/wdl/arrays/imputation_beagle/input_qc/test_inputs/Plumbing/HGDP1000G_500_qc_fails_wrong_contig_name_header.json diff --git a/pipelines/wdl/slidetags/SlideTags.changelog.md b/pipelines/wdl/slidetags/SlideTags.changelog.md index e72fd6f896..5b48883710 100644 --- a/pipelines/wdl/slidetags/SlideTags.changelog.md +++ b/pipelines/wdl/slidetags/SlideTags.changelog.md @@ -1,3 +1,8 @@ +# 1.0.5 +2025-10-24 (Date of Last Commit) + +* Updated the positioning.wdl code to the latest version from the Macosko Lab repository to incorporate recent improvements + # 1.0.4 2025-10-03 (Date of Last Commit) diff --git a/pipelines/wdl/slidetags/SlideTags.wdl b/pipelines/wdl/slidetags/SlideTags.wdl index de2fdcd16f..579d117d00 100644 --- a/pipelines/wdl/slidetags/SlideTags.wdl +++ b/pipelines/wdl/slidetags/SlideTags.wdl @@ -6,7 +6,7 @@ import "../optimus/Optimus.wdl" as optimus workflow SlideTags { - String pipeline_version = "1.0.4" + String pipeline_version = "1.0.5" input { @@ -19,13 +19,13 @@ workflow SlideTags { # Optimus inputs Array[File] gex_r1_fastq Array[File] gex_r2_fastq - Array[File]? gex_i1_fastq + Array[File]? gex_i1_fastq File tar_star_reference File annotations_gtf File gex_whitelist String cloud_provider = "gcp" String input_id - Int expected_cells = 3000 + Int expected_cells = 3000 String counting_mode = "sn_rna" Int tenx_chemistry_version = 3 Int emptydrops_lower = 100 @@ -39,13 +39,13 @@ workflow SlideTags { String docker = "us.gcr.io/broad-gotc-prod/slide-tags:1.2.0" } - + parameter_meta { spatial_fastq: "Array of paths to spatial fastq files" pucks: "Array of paths to puck files" docker: "Docker image to use" } - + # Call the optimus workflow call optimus.Optimus as Optimus { input: @@ -69,8 +69,8 @@ workflow SlideTags { count_exons = count_exons, soloMultiMappers = soloMultiMappers, gex_expected_cells = expected_cells - } - + } + call SpatialCount.count as spatial_count { input: fastq_paths = spatial_fastq, @@ -92,13 +92,13 @@ workflow SlideTags { # Version of Optimus pipeline String optimus_pipeline_version_out = Optimus.pipeline_version_out File optimus_genomic_reference_version = Optimus.genomic_reference_version - + # Optimus Metrics outputs File optimus_cell_metrics = Optimus.cell_metrics File optimus_gene_metrics = Optimus.gene_metrics File? optimus_cell_calls = Optimus.cell_calls - - # Optimus Star outputs + + # Optimus Star outputs File optimus_library_metrics = Optimus.library_metrics File optimus_bam = Optimus.bam File optimus_matrix = Optimus.matrix @@ -111,10 +111,10 @@ workflow SlideTags { File? optimus_multimappers_Uniform_matrix = Optimus.multimappers_Uniform_matrix File? optimus_multimappers_Rescue_matrix = Optimus.multimappers_Rescue_matrix File? optimus_multimappers_PropUnique_matrix = Optimus.multimappers_PropUnique_matrix - + # Optimus H5ad File optimus_h5ad_output_file = Optimus.h5ad_output_file - + # Optimus Cellbender outputs File? cb_cell_barcodes_csv = Optimus.cell_barcodes_csv File? cb_checkpoint_file = Optimus.checkpoint_file @@ -134,6 +134,7 @@ workflow SlideTags { File positioning_summary_pdf = positioning.summary_pdf File positioning_intermediates = positioning.intermediates_file File positioning_positioning_log = positioning.positioning_log + } } diff --git a/tasks/wdl/ImputationBeagleQcTasks.wdl b/tasks/wdl/ImputationBeagleQcTasks.wdl index a3b8314ae1..d1709b4181 100644 --- a/tasks/wdl/ImputationBeagleQcTasks.wdl +++ b/tasks/wdl/ImputationBeagleQcTasks.wdl @@ -11,10 +11,6 @@ task QcChecks { Int memory_mb = 4000 Int disk_size_gb = ceil(1.1*size(vcf_input, "GiB")) + 10 } - Int command_mem = memory_mb - 1500 - Int max_heap = memory_mb - 1000 - - String vcf_basename = basename(vcf_input) String ref_dict_basename = basename(ref_dict) @@ -25,7 +21,7 @@ task QcChecks { # check for a large number of variants in input vcf and exit if greater than 10 million line_count=$(bcftools stats ~{vcf_input} | grep "number of records:" | awk '{print $6}') if [ "$line_count" -gt 10000000 ]; then - echo "Greater than 10 million variants found in input VCF." >> qc_messages.txt + echo "Greater than 10 million variants found in the input VCF." >> qc_messages.txt echo "false" > passes_qc.txt exit 0 else @@ -56,19 +52,38 @@ task QcChecks { done if [ ${#filtered_chromosomes[@]} -eq 0 ]; then - echo "Input must include data for at least one chromosome in the allowed contigs (${allowed_chromosomes[*]})." >> qc_messages.txt + echo "No variant data found for any chromosome in the supported contigs: (${allowed_chromosomes[*]})." >> qc_messages.txt else - echo "Found data for chromosomes: ${filtered_chromosomes[*]}." + echo "Found variants for chromosomes: ${filtered_chromosomes[*]}." fi # check for sorted or non bgzf compressed vcf bcftools index -t ~{vcf_input} 2> index_stderr.txt + + # note if both of these are true, only BGZF error will be reported because indexing stops after that error + NOT_SORTED_MESSAGE="Input VCF is not sorted." if grep -qiE "unsorted positions|not continuous" index_stderr.txt; then - echo "Input VCF is not sorted." >> qc_messages.txt; + echo "${NOT_SORTED_MESSAGE}" >> qc_messages.txt; fi + NOT_BGZF_MESSAGE="Input VCF is not BGZF compressed." if grep -q "not BGZF compressed" index_stderr.txt; then - echo "Input VCF is not BGZF compressed." >> qc_messages.txt; + echo "${NOT_BGZF_MESSAGE}" >> qc_messages.txt; + fi + + if [ -f "~{vcf_input}.tbi" ]; then + echo "Input VCF indexed successfully. It therefore is sorted and bgzf-compressed." + else + # only add a message if there are not index-related errors already + if ! grep -q "${NOT_SORTED_MESSAGE}" qc_messages.txt && ! grep -q "${NOT_BGZF_MESSAGE}" qc_messages.txt; then + echo "Failed to index input VCF for an unknown reason." >> qc_messages.txt + # echo index stderr to logs for debugging + echo "Contents of index_stderr.txt:" + cat index_stderr.txt + fi + echo "false" > passes_qc.txt + # exit now if indexing failed, since ValidateVariants requires an index + exit 0 fi # check reference header lines if they exist @@ -78,9 +93,8 @@ task QcChecks { --validation-type-to-exclude ALL \ 2> gatk_output.txt - ref_dict_basename="~{ref_dict_basename}" if grep -q "incompatible contigs" gatk_output.txt; then - echo "Found incompatible contigs (against reference dictionary $ref_dict_basename) in VCF header." >> qc_messages.txt; + echo "VCF header contains none of the expected contigs." >> qc_messages.txt else echo "No incompatible contigs found in VCF header." fi diff --git a/tasks/wdl/slidetags_utils/positioning.wdl b/tasks/wdl/slidetags_utils/positioning.wdl index 3b34dc3732..23b6e80f57 100644 --- a/tasks/wdl/slidetags_utils/positioning.wdl +++ b/tasks/wdl/slidetags_utils/positioning.wdl @@ -20,10 +20,10 @@ task generate_positioning { gcloud config set storage/thread_count 2 # is this set by user? # Download the scripts -- these need to be changed -- also need to add to docker - wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/e7a4fe892acb47e8e83c1ee585109c99c946e94a/slide-tags/run-positioning.R - wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/e7a4fe892acb47e8e83c1ee585109c99c946e94a/slide-tags/positioning.R - wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/e7a4fe892acb47e8e83c1ee585109c99c946e94a/slide-tags/helpers.R - wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/e7a4fe892acb47e8e83c1ee585109c99c946e94a/slide-tags/plots.R + wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/261750c2e15da5ff17533150b62578dda629da08/slide-tags/run-positioning.R + wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/261750c2e15da5ff17533150b62578dda629da08/slide-tags/positioning.R + wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/261750c2e15da5ff17533150b62578dda629da08/slide-tags/helpers.R + wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/261750c2e15da5ff17533150b62578dda629da08/slide-tags/plots.R echo "RNA: ~{sep=' ' rna_paths}" echo "SB: ~{sb_path}" diff --git a/website/docs/Pipelines/SlideTags_Pipeline/README.md b/website/docs/Pipelines/SlideTags_Pipeline/README.md index c5fc29c623..2f3bcf46bd 100644 --- a/website/docs/Pipelines/SlideTags_Pipeline/README.md +++ b/website/docs/Pipelines/SlideTags_Pipeline/README.md @@ -158,9 +158,8 @@ All releases of the pipeline are documented in the repository’s changelog. ## Citing the Slide-tags Pipeline -If you use the Slide-tags Pipeline in your research, please cite the original sources: - -- **Macosko Lab Pipelines:** https://github.com/MacoskoLab/Macosko-Pipelines/tree/main/slide-tags +If you use the Slide-tags Pipeline in your research, please identify the pipeline in your methods section using the [Slide-tags SciCrunch resource identifier](https://scicrunch.org/resources/data/record/nlx_144509-1/SCR_027567/resolver?q=SCR_027567&l=SCR_027567&i=rrid:SCR_027567). +* Ex: *Slide-tags Pipeline (RRID:SCR_027567)* Please also consider citing our preprint: @@ -168,7 +167,7 @@ Degatano, K., Awdeh, A., Cox III, R.S., Dingman, W., Grant, G., Khajouei, F., Ki ## Acknowledgements -We are immensely grateful Matthew Shabet and the Macosko Lab for development of these analsyes, for their generous time making these scripts FAIR, and for the many hours working with the WARP team to incoporate the scripts into WDL. +We are immensely grateful to Matthew Shabet and the Macosko Lab for the development of these analyses, for their generous time in making these scripts FAIR, and for the many hours spent working with the WARP team to incorporate the scripts into WDL. ## Feedback