diff --git a/conf/modules.config b/conf/modules.config index 6fad45aa..97493a5e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -96,6 +96,16 @@ process { ] } + withName: '.*:WHATSHAP_STATS' { + ext.prefix = { "${meta.id}_whatshap_stats" } + ext.args = { "--sample ${meta.id}" } + publishDir = [ + path: { "${params.outdir}/${meta.id}/qc/whatshap_stats" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // // Preprocessing and other processes // @@ -209,7 +219,7 @@ process { // withName: '.*:LONGPHASE_PHASE' { - ext.prefix = { "somatic_smallvariants" } + ext.prefix = { "germline_smallvariants" } ext.args = { [ meta.platform == 'pb' ? '--pb' : '--ont', @@ -222,6 +232,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: '.*:LONGPHASE_HAPLOTAG' { ext.prefix = { "${meta.id}_${meta.type}" } publishDir = [ @@ -262,6 +273,7 @@ process { ] } withName: '.*:CLAIRSTO' { + ext.args = { "--sample_name ${meta.id}" } publishDir = [ path: { "${params.outdir}/${meta.id}/variants/clairsto" }, mode: params.publish_dir_mode, @@ -279,6 +291,7 @@ process { } withName: '.*:CLAIR3' { + ext.args = { "--sample_name=${meta.id}" } publishDir = [ path: { "${params.outdir}/${meta.id}/variants/clair3" }, mode: params.publish_dir_mode, diff --git a/modules.json b/modules.json index 8947a202..5d114f9c 100644 --- a/modules.json +++ b/modules.json @@ -90,6 +90,11 @@ "git_sha": "2c73cc8fa92cf48de3da0b643fdf357a8a290b36", "installed_by": ["modules"] }, + "nanoplot": { + "branch": "master", + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c", + "installed_by": ["modules"] + }, "pigz/uncompress": { "branch": "master", "git_sha": "f84336b7fa91a65aa61d215b8c109fbb8e4b4ac6", @@ -146,6 +151,11 @@ "branch": "master", "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] + }, + "whatshap/stats": { + "branch": "master", + "git_sha": "bfab71f4d68c1aaff09335a3433e7b2836918b2a", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/deepvariant/callvariants/main.nf b/modules/nf-core/deepvariant/callvariants/main.nf new file mode 100644 index 00000000..2fc656ee --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/main.nf @@ -0,0 +1,50 @@ + +process DEEPVARIANT_CALLVARIANTS { + tag "$meta.id" + label 'process_high' + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" + + input: + tuple val(meta), path(make_examples_tfrecords) + + output: + tuple val(meta), path("${prefix}.call-*-of-*.tfrecord.gz"), emit: call_variants_tfrecords + tuple val("${task.process}"), val('deepvariant'), eval("/opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //'"), topic: versions, emit: versions_deepvariant + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def matcher = make_examples_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ + if (!matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + make_examples_tfrecords[0].baseName + "' doesn't match the expected pattern") + } + def examples_tfrecord_name = matcher[0][1] + def shardCount = matcher[0][2] + // Reconstruct the logical name - ${tfrecord_name}.examples.tfrecord@${task.cpus}.gz + def examples_tfrecords_logical_name = "${examples_tfrecord_name}@${shardCount}.gz" + + """ + /opt/deepvariant/bin/call_variants \\ + ${args} \\ + --outfile "${prefix}.call.tfrecord.gz" \\ + --examples "${examples_tfrecords_logical_name}" + + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.call-00000-of-00001.tfrecord.gz + + """ +} diff --git a/modules/nf-core/deepvariant/callvariants/meta.yml b/modules/nf-core/deepvariant/callvariants/meta.yml new file mode 100644 index 00000000..fa1aaa42 --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/meta.yml @@ -0,0 +1,68 @@ +name: deepvariant_callvariants +description: Call variants from the examples produced by make_examples +keywords: + - variant calling + - machine learning + - neural network +tools: + - deepvariant: + description: DeepVariant is an analysis pipeline that uses a deep neural network + to call genetic variants from next-generation DNA sequencing data + homepage: https://github.com/google/deepvariant + documentation: https://github.com/google/deepvariant + tool_dev_url: https://github.com/google/deepvariant + doi: "10.1038/nbt.4235" + licence: ["BSD-3-clause"] + identifier: biotools:deepvariant +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - make_examples_tfrecords: + type: file + description: The actual sharded input files, from DEEPVARIANT_MAKEEXAMPLES process + pattern: "*.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format +output: + call_variants_tfrecords: + - - meta: + type: list + description: | + Each output contains: unique ID string from input channel, meta, tfrecord file with variant calls. + - ${prefix}.call-*-of-*.tfrecord.gz: + type: list + description: | + Each output contains: unique ID string from input channel, meta, tfrecord file with variant calls. + versions_deepvariant: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/deepvariant/callvariants/tests/main.nf.test b/modules/nf-core/deepvariant/callvariants/tests/main.nf.test new file mode 100644 index 00000000..d617650b --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process DEEPVARIANT_CALLVARIANTS" + script "../main.nf" + config "./nextflow.config" + process "DEEPVARIANT_CALLVARIANTS" + + tag "deepvariant/makeexamples" + tag "deepvariant/callvariants" + tag "deepvariant" + tag "modules" + tag "modules_nfcore" + + test("homo_sapiens - wgs") { + setup { + run("DEEPVARIANT_MAKEEXAMPLES") { + script "../../makeexamples/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + } + when { + process { + """ + input[0] = DEEPVARIANT_MAKEEXAMPLES.out.examples + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.call_variants_tfrecords.get(0).get(0) == [ id:'test', single_end:false ] }, + // The tfrecord binary representation is not stable, but we check the name of the output. + { assert snapshot(file(process.out.call_variants_tfrecords.get(0).get(1)).name).match("homo_sapiens-wgs-call_variants_tfrecords-filenames")}, + ) + } + } + + test("homo_sapiens - wgs - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta + [] // No input paths are needed in stub mode + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap b/modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap new file mode 100644 index 00000000..ce71dac2 --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap @@ -0,0 +1,55 @@ +{ + "homo_sapiens-wgs-call_variants_tfrecords-filenames": { + "content": [ + "test.call-00000-of-00001.tfrecord.gz" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-04T17:04:33.276938" + }, + "homo_sapiens - wgs - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.call-00000-of-00001.tfrecord.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + "DEEPVARIANT_CALLVARIANTS", + "deepvariant", + "1.9.0" + ] + ], + "call_variants_tfrecords": [ + [ + { + "id": "test", + "single_end": false + }, + "test.call-00000-of-00001.tfrecord.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_deepvariant": [ + [ + "DEEPVARIANT_CALLVARIANTS", + "deepvariant", + "1.9.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:02:54.403068431" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepvariant/callvariants/tests/nextflow.config b/modules/nf-core/deepvariant/callvariants/tests/nextflow.config new file mode 100644 index 00000000..68aec144 --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: "DEEPVARIANT_CALLVARIANTS" { + ext.args = '--checkpoint "/opt/models/wgs"' + cpus = 2 // Keep CPUs fixed so the number of output files is reproducible + } +} +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"' + } +} diff --git a/modules/nf-core/deepvariant/makeexamples/main.nf b/modules/nf-core/deepvariant/makeexamples/main.nf new file mode 100644 index 00000000..77d2f331 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/main.nf @@ -0,0 +1,58 @@ +process DEEPVARIANT_MAKEEXAMPLES { + tag "$meta.id" + label 'process_high' + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" + + input: + tuple val(meta), path(input), path(index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + tuple val(meta5), path(par_bed) + + output: + tuple val(meta), path("${prefix}.examples.tfrecord-*-of-*.gz{,.example_info.json}"), emit: examples + tuple val(meta), path("${prefix}.gvcf.tfrecord-*-of-*.gz"), emit: gvcf + tuple val(meta), path("${prefix}_call_variant_outputs.examples.tfrecord-*-of-*.gz", arity: "0..*"), emit: small_model_calls + tuple val("${task.process}"), val('deepvariant'), eval("/opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //'"), topic: versions, emit: versions_deepvariant + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def regions = intervals ? "--regions ${intervals}" : "" + def par_regions = par_bed ? "--par_regions_bed=${par_bed}" : "" + + """ + seq 0 ${task.cpus - 1} | parallel -q --halt 2 --line-buffer /opt/deepvariant/bin/make_examples \\ + --mode calling \\ + --ref "${fasta}" \\ + --reads "${input}" \\ + --examples "./${prefix}.examples.tfrecord@${task.cpus}.gz" \\ + --gvcf "./${prefix}.gvcf.tfrecord@${task.cpus}.gz" \\ + ${regions} \\ + ${par_regions} \\ + ${args} \\ + --task {} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + printf -v SHARD_COUNT "%04d" ${task.cpus} + for i in \$( seq -f "%04g" 0 ${task.cpus-1} ) + do + echo "" | gzip > ${prefix}.examples.tfrecord-\$i-of-\$SHARD_COUNT.tfrecord.gz + touch ${prefix}.examples.tfrecord-\$i-of-\$SHARD_COUNT.tfrecord.gz.example_info.json + echo "" | gzip > ${prefix}.gvcf.tfrecord-\$i-of-\$SHARD_COUNT.tfrecord.gz + done + """ +} diff --git a/modules/nf-core/deepvariant/makeexamples/meta.yml b/modules/nf-core/deepvariant/makeexamples/meta.yml new file mode 100644 index 00000000..12056fbd --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/meta.yml @@ -0,0 +1,135 @@ +name: deepvariant_makeexamples +description: Transforms the input alignments to a format suitable for the deep neural + network variant caller +keywords: + - variant calling + - machine learning + - neural network +tools: + - deepvariant: + description: DeepVariant is an analysis pipeline that uses a deep neural network + to call genetic variants from next-generation DNA sequencing data + homepage: https://github.com/google/deepvariant + documentation: https://github.com/google/deepvariant + tool_dev_url: https://github.com/google/deepvariant + doi: "10.1038/nbt.4235" + licence: ["BSD-3-clause"] + identifier: biotools:deepvariant +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.bam/cram" + ontologies: [] + - index: + type: file + description: Index of BAM/CRAM file + pattern: "*.bai/crai" + ontologies: [] + - intervals: + type: file + description: Interval file for targeted regions + pattern: "*.bed" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - gzi: + type: file + description: GZI index of reference fasta file + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + pattern: "*.gzi" + - par_bed: + type: file + description: BED file containing PAR regions + pattern: "*.bed" + ontologies: [] +output: + examples: + - - meta: + type: list + description: | + Tuple containing sample metadata and examples that can be used for calling + - ${prefix}.examples.tfrecord-*-of-*.gz{,.example_info.json}: + type: list + description: | + Tuple containing sample metadata and examples that can be used for calling + gvcf: + - - meta: + type: list + description: | + Tuple containing sample metadata and examples that can be used for calling + - ${prefix}.gvcf.tfrecord-*-of-*.gz: + type: list + description: | + Tuple containing sample metadata and the GVCF data in tfrecord format + small_model_calls: + - - meta: + type: list + description: | + Tuple containing sample metadata and examples that can be used for calling + - ${prefix}_call_variant_outputs.examples.tfrecord-*-of-*.gz: + type: list + description: | + Optional variant calls from the small model, if enabled, in tfrecord format + versions_deepvariant: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test b/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test new file mode 100644 index 00000000..cc06f780 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test @@ -0,0 +1,204 @@ +nextflow_process { + + name "Test Process DEEPVARIANT_MAKEEXAMPLES" + script "../main.nf" + config "./nextflow.config" + process "DEEPVARIANT_MAKEEXAMPLES" + + tag "deepvariant/makeexamples" + tag "deepvariant" + tag "modules" + tag "modules_nfcore" + + test("homo_sapiens - [bam, bai] - fasta - fai") { + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + + { assert process.out.examples[0][0] == [id:'test', single_end:false] }, + { assert process.out.gvcf[0][0] == [id:'test', single_end:false] }, + { assert process.out.examples[0][1].size() == 4 }, + { assert snapshot( + process.out.examples[0][1].collect { file(it).name } + ).match("test1-examples-filenames") }, + { assert process.out.gvcf[0][1].size() == 2 }, + { assert snapshot(process.out.versions_deepvariant).match("test1-versions") }, + { assert snapshot( + process.out.gvcf[0][1].collect { file(it).name } + ).match("test1-gvcf-filenames") } + ) + } + } + + test("homo_sapiens - [cram, crai, genome_bed] - fasta - fai") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.examples[0][0] == [id:'test', single_end:false] }, + { assert process.out.gvcf[0][0] == [id:'test', single_end:false] }, + { assert process.out.examples[0][1].size() == 4 }, + { assert snapshot( + process.out.examples[0][1].collect { file(it).name } + ).match("test2-examples-filenames") }, + { assert process.out.gvcf[0][1].size() == 2 }, + { assert snapshot(process.out.versions_deepvariant).match("test2-versions") }, + { assert snapshot( + process.out.gvcf[0][1].collect { file(it).name } + ).match("test2-gvcf-filenames") } + ) + } + } + + test("homo_sapiens - [bam, bai] - fasta_gz - fasta_gz_fai") { + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true) + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.examples[0][0] == [id:'test', single_end:false] }, + { assert process.out.gvcf[0][0] == [id:'test', single_end:false] }, + { assert process.out.examples[0][1].size() == 4 }, + { assert snapshot( + process.out.examples[0][1].collect { file(it).name } + ).match("test3-examples-filenames") }, + { assert process.out.gvcf[0][1].size() == 2 }, + { assert snapshot(process.out.versions_deepvariant).match("test3-versions") }, + { assert snapshot( + process.out.gvcf[0][1].collect { file(it).name } + ).match("test3-gvcf-filenames") } + ) + } + } + + test("stub") { + + options "-stub" + + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.gvcf[0][0] == [id:'test', single_end:false] }, + { assert process.out.examples[0][1].size() == 4 }, + { assert snapshot( + process.out.examples[0][1].collect { file(it).name } + ).match("test4-examples-filenames") }, + { assert process.out.gvcf[0][1].size() == 2 }, + { assert snapshot(process.out.versions_deepvariant).match("test4-versions") }, + { assert snapshot( + process.out.gvcf[0][1].collect { file(it).name } + ).match("test4-gvcf-filenames") } + ) + } + } +} diff --git a/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap b/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap new file mode 100644 index 00000000..729f0dc5 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap @@ -0,0 +1,178 @@ +{ + "test1-gvcf-filenames": { + "content": [ + [ + "test.gvcf.tfrecord-00000-of-00002.gz", + "test.gvcf.tfrecord-00001-of-00002.gz" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:12:57.93412258" + }, + "test3-versions": { + "content": [ + [ + [ + "DEEPVARIANT_MAKEEXAMPLES", + "deepvariant", + "1.9.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:09:55.034298895" + }, + "test2-examples-filenames": { + "content": [ + [ + "test.examples.tfrecord-00000-of-00002.gz", + "test.examples.tfrecord-00000-of-00002.gz.example_info.json", + "test.examples.tfrecord-00001-of-00002.gz", + "test.examples.tfrecord-00001-of-00002.gz.example_info.json" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:14:02.467533548" + }, + "test1-examples-filenames": { + "content": [ + [ + "test.examples.tfrecord-00000-of-00002.gz", + "test.examples.tfrecord-00000-of-00002.gz.example_info.json", + "test.examples.tfrecord-00001-of-00002.gz", + "test.examples.tfrecord-00001-of-00002.gz.example_info.json" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:12:57.790379812" + }, + "test2-versions": { + "content": [ + [ + [ + "DEEPVARIANT_MAKEEXAMPLES", + "deepvariant", + "1.9.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:07:52.050411549" + }, + "test4-versions": { + "content": [ + [ + [ + "DEEPVARIANT_MAKEEXAMPLES", + "deepvariant", + "1.9.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:12:07.012233232" + }, + "test4-examples-filenames": { + "content": [ + [ + "test.examples.tfrecord-0000-of-0002.tfrecord.gz", + "test.examples.tfrecord-0000-of-0002.tfrecord.gz.example_info.json", + "test.examples.tfrecord-0001-of-0002.tfrecord.gz", + "test.examples.tfrecord-0001-of-0002.tfrecord.gz.example_info.json" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:15:58.286077155" + }, + "test1-versions": { + "content": [ + [ + [ + "DEEPVARIANT_MAKEEXAMPLES", + "deepvariant", + "1.9.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:05:28.75651648" + }, + "test3-gvcf-filenames": { + "content": [ + [ + "test.gvcf.tfrecord-00000-of-00002.gz", + "test.gvcf.tfrecord-00001-of-00002.gz" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:15:03.780115231" + }, + "test3-examples-filenames": { + "content": [ + [ + "test.examples.tfrecord-00000-of-00002.gz", + "test.examples.tfrecord-00000-of-00002.gz.example_info.json", + "test.examples.tfrecord-00001-of-00002.gz", + "test.examples.tfrecord-00001-of-00002.gz.example_info.json" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:15:03.702565392" + }, + "test2-gvcf-filenames": { + "content": [ + [ + "test.gvcf.tfrecord-00000-of-00002.gz", + "test.gvcf.tfrecord-00001-of-00002.gz" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:14:02.550236324" + }, + "test4-gvcf-filenames": { + "content": [ + [ + "test.gvcf.tfrecord-0000-of-0002.tfrecord.gz", + "test.gvcf.tfrecord-0001-of-0002.tfrecord.gz" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:15:58.412547051" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepvariant/makeexamples/tests/nextflow.config b/modules/nf-core/deepvariant/makeexamples/tests/nextflow.config new file mode 100644 index 00000000..6811fe48 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + cpus = 2 // The number of output files is determined by cpus - keep it the same for tests + ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"' + } +} diff --git a/modules/nf-core/deepvariant/postprocessvariants/main.nf b/modules/nf-core/deepvariant/postprocessvariants/main.nf new file mode 100644 index 00000000..0830f9ac --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/main.nf @@ -0,0 +1,86 @@ +process DEEPVARIANT_POSTPROCESSVARIANTS { + tag "$meta.id" + label 'process_medium' + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" + + input: + tuple val(meta), path(variant_calls_tfrecord_files), path(gvcf_tfrecords), path(small_model_calls), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.{tbi,csi}") , emit: vcf_index + tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf + tuple val(meta), path("${prefix}.g.vcf.gz.{tbi,csi}") , emit: gvcf_index + tuple val("${task.process}"), val('deepvariant'), eval("/opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //'"), topic: versions, emit: versions_deepvariant + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def regions = intervals ? "--regions ${intervals}" : "" + def variant_calls_tfrecord_name = variant_calls_tfrecord_files[0].name.replaceFirst(/-\d{5}-of-\d{5}/, "") + + def gvcf_matcher = gvcf_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ + if (!gvcf_matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + gvcf_tfrecords[0].baseName + "' doesn't match the expected pattern") + } + def gvcf_tfrecord_name = gvcf_matcher[0][1] + def gvcf_shardCount = gvcf_matcher[0][2] + // Reconstruct the logical name - ${tfrecord_name}.examples.tfrecord@${task.cpus}.gz + def gvcf_tfrecords_logical_name = "${gvcf_tfrecord_name}@${gvcf_shardCount}.gz" + + // The following block determines whether the small model was used, and if so, adds the variant calls from it + // to the argument --small_model_cvo_records. + def small_model_arg = "" + if (small_model_calls) { + small_model_matcher = (small_model_calls[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/) + if (!small_model_matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + small_model_calls[0].baseName + "' doesn't match the expected pattern") + } + small_model_tfrecord_name = small_model_matcher[0][1] + small_model_shardCount = small_model_matcher[0][2] + // Reconstruct the logical name. Example: test_call_variant_outputs.examples.tfrecord@12.gz + small_model_tfrecords_logical_name = "${small_model_tfrecord_name}@${small_model_shardCount}.gz" + small_model_arg = "--small_model_cvo_records ${small_model_tfrecords_logical_name}" + } + + """ + /opt/deepvariant/bin/postprocess_variants \\ + ${args} \\ + --ref "${fasta}" \\ + --infile "${variant_calls_tfrecord_name}" \\ + --outfile "${prefix}.vcf.gz" \\ + --nonvariant_site_tfrecord_path "${gvcf_tfrecords_logical_name}" \\ + --gvcf_outfile "${prefix}.g.vcf.gz" \\ + ${regions} \\ + ${small_model_arg} \\ + --cpus $task.cpus + + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + """ +} diff --git a/modules/nf-core/deepvariant/postprocessvariants/meta.yml b/modules/nf-core/deepvariant/postprocessvariants/meta.yml new file mode 100644 index 00000000..4a087011 --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/meta.yml @@ -0,0 +1,155 @@ +name: deepvariant_postprocessvariants +description: DeepVariant is an analysis pipeline that uses a deep neural network to + call genetic variants from next-generation DNA sequencing data +keywords: + - variant calling + - machine learning + - neural network +tools: + - deepvariant: + description: DeepVariant is an analysis pipeline that uses a deep neural network + to call genetic variants from next-generation DNA sequencing data + homepage: https://github.com/google/deepvariant + documentation: https://github.com/google/deepvariant + tool_dev_url: https://github.com/google/deepvariant + doi: "10.1038/nbt.4235" + licence: ["BSD-3-clause"] + identifier: biotools:deepvariant +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variant_calls_tfrecord_files: + type: file + description: | + One or more data files containing variant calls from DEEPVARIANT_CALLVARIANTS + pattern: "*.tfrecord.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - gvcf_tfrecords: + type: file + description: | + Sharded tfrecord file from DEEPVARIANT_MAKEEXAMPLES with the coverage information used for GVCF output + pattern: "*.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - small_model_calls: + type: file + description: | + Sharded tfrecord file from DEEPVARIANT_MAKEEXAMPLES with variant calls from the small model + pattern: "*.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - intervals: + type: file + description: Interval file for targeted regions + pattern: "*.bed" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - gzi: + type: file + description: GZI index of reference fasta file + pattern: "*.gzi" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.vcf.gz: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + vcf_index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.vcf.gz.{tbi,csi}: + type: file + description: Index for VCF + pattern: "$*.vcf.gz.{tbi,csi}" + ontologies: [] + gvcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.g.vcf.gz: + type: file + description: Compressed GVCF file + pattern: "*.g.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + gvcf_index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.g.vcf.gz.{tbi,csi}: + type: file + description: Index for GVCF + pattern: "*.g.vcf.gz.{tbi,csi}" + ontologies: [] + versions_deepvariant: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test b/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test new file mode 100644 index 00000000..ef9110b0 --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test @@ -0,0 +1,123 @@ +nextflow_process { + + name "Test Process DEEPVARIANT_POSTPROCESSVARIANTS" + script "../main.nf" + process "DEEPVARIANT_POSTPROCESSVARIANTS" + config "./nextflow.config" + + tag "deepvariant/makeexamples" + tag "deepvariant/callvariants" + tag "deepvariant/postprocessvariants" + tag "deepvariant" + tag "modules" + tag "modules_nfcore" + + test("homo_sapiens - wgs") { + setup { + run("DEEPVARIANT_MAKEEXAMPLES") { + script "../../makeexamples/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [], + + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + run("DEEPVARIANT_CALLVARIANTS") { + script "../../callvariants/main.nf" + process { + """ + input[0] = DEEPVARIANT_MAKEEXAMPLES.out.examples + """ + } + } + } + when { + process { + """ + input[0] = DEEPVARIANT_CALLVARIANTS.out.call_variants_tfrecords.join( + DEEPVARIANT_MAKEEXAMPLES.out.gvcf, + failOnMismatch: true + ).join( + DEEPVARIANT_MAKEEXAMPLES.out.small_model_calls, + failOnMismatch: true + ).map { meta, tf, gvcf, small_model_calls -> [ meta, tf, gvcf, small_model_calls, [] ] } + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - wgs - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [], + [], + [], + [], + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("stub") } + ) + } + } + +} diff --git a/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap b/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap new file mode 100644 index 00000000..a981cf84 --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap @@ -0,0 +1,196 @@ +{ + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "DEEPVARIANT_POSTPROCESSVARIANTS", + "deepvariant", + "1.9.0" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_deepvariant": [ + [ + "DEEPVARIANT_POSTPROCESSVARIANTS", + "deepvariant", + "1.9.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:22:12.888323156" + }, + "homo_sapiens - wgs": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ] + ], + "4": [ + [ + "DEEPVARIANT_POSTPROCESSVARIANTS", + "deepvariant", + "1.9.0" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ] + ], + "versions_deepvariant": [ + [ + "DEEPVARIANT_POSTPROCESSVARIANTS", + "deepvariant", + "1.9.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:19:32.037352523" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config b/modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config new file mode 100644 index 00000000..b8f3f47a --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + withName: "DEEPVARIANT_CALLVARIANTS" { + ext.args = '--checkpoint "/opt/models/wgs"' + } +} +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"' + } +} diff --git a/modules/nf-core/whatshap/stats/environment.yml b/modules/nf-core/whatshap/stats/environment.yml new file mode 100644 index 00000000..389d6871 --- /dev/null +++ b/modules/nf-core/whatshap/stats/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::whatshap=2.8" diff --git a/modules/nf-core/whatshap/stats/main.nf b/modules/nf-core/whatshap/stats/main.nf new file mode 100644 index 00000000..54abecd0 --- /dev/null +++ b/modules/nf-core/whatshap/stats/main.nf @@ -0,0 +1,59 @@ + +process WHATSHAP_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/whatshap:2.8--py39h2de1943_0': + 'biocontainers/whatshap:2.8--py39h2de1943_0' }" + + input: + tuple val(meta), path(vcf) // channel: [ val(meta), path(vcf) ] + val(include_tsv_output) // value: [ true | false ] + val(include_gtf_output) // value: [ true | false ] + val(inlude_block_output) // value: [ true | false ] + + output: + tuple val(meta), path("${prefix}.tsv"), emit: tsv, optional: true + tuple val(meta), path("${prefix}.gtf"), emit: gtf, optional: true + tuple val(meta), path("${prefix}.txt"), emit: block, optional: true + tuple val(meta), path("${prefix}.log"), emit: log + tuple val("${task.process}"), val('whatshap'), eval("whatshap --version"), emit: versions_whatshap, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def output_tsv = include_tsv_output ? "--tsv ${prefix}.tsv" : '' + def output_gtf = include_gtf_output ? "--gtf ${prefix}.gtf" : '' + def output_block = inlude_block_output ? "--block-list ${prefix}.txt" : '' + """ + whatshap stats \\ + $args \\ + $output_tsv \\ + $output_gtf \\ + $output_block \\ + $vcf \\ + | tee ${prefix}.log + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def tsv_touch_cmd = include_tsv_output ? "touch ${prefix}.tsv" : '' + def gtf_touch_cmd = include_gtf_output ? "touch ${prefix}.gtf" : '' + def block_touch_cmd = inlude_block_output ? "touch ${prefix}.txt" : '' + def log_touch_cmd = "touch ${prefix}.log" + """ + echo $args + + $tsv_touch_cmd + $gtf_touch_cmd + $block_touch_cmd + $log_touch_cmd + """ +} diff --git a/modules/nf-core/whatshap/stats/meta.yml b/modules/nf-core/whatshap/stats/meta.yml new file mode 100644 index 00000000..5db5a176 --- /dev/null +++ b/modules/nf-core/whatshap/stats/meta.yml @@ -0,0 +1,109 @@ +name: "whatshap_stats" +description: Compute statistics from phased variant file using Whatshap +keywords: + - vcf + - whatshap + - stats + - phasing + - phase +tools: + - "whatshap": + description: Phase genomic variants using DNA sequencing reads (haplotype + assembly). + args_id: "$args" + homepage: "https://whatshap.readthedocs.io" + documentation: "https://whatshap.readthedocs.io" + tool_dev_url: "https://github.com/whatshap/whatshap" + doi: "10.1101/085050" + licence: ["MIT"] + identifier: biotools:whatshap +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - vcf: + type: file + description: Phased variant vcf file + pattern: "*.vcf" + ontologies: + - edam: "http://edamontology.org/format_3016" # VCF + - include_tsv_output: + type: boolean + description: Whether to include TSV output file + default: false + - include_gtf_output: + type: boolean + description: Whether to include GTF output file + default: false + - inlude_block_output: + type: boolean + description: Whether to include block list output file + default: false +output: + tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}.tsv: + type: file + description: Whatshap stats output in TSV format + pattern: "*.tsv" + gtf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}.gtf: + type: file + description: Whatshap stats output in GTF format + pattern: "*.gtf" + block: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}.txt: + type: file + description: Whatshap stats block list output + pattern: "*.txt" + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${prefix}.log: + type: file + description: Whatshap stats output in TXT format + pattern: "*.log" + versions_whatshap: + - - ${task.process}: + type: string + description: The name of the process + - whatshap: + type: string + description: The name of the tool + - whatshap --version: + type: string + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - whatshap: + type: string + description: The name of the tool + - whatshap --version: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@eliottBo" +maintainers: + - "@eliottBo" diff --git a/modules/nf-core/whatshap/stats/tests/main.nf.test b/modules/nf-core/whatshap/stats/tests/main.nf.test new file mode 100644 index 00000000..12cc2f57 --- /dev/null +++ b/modules/nf-core/whatshap/stats/tests/main.nf.test @@ -0,0 +1,105 @@ +nextflow_process { + + name "Test Process WHATSHAP_STATS" + script "../main.nf" + process "WHATSHAP_STATS" + + tag "modules" + tag "modules_nfcore" + tag "whatshap" + tag "whatshap/stats" + + test("homo_sapiens - all_output - vcf") { + + when { + process { + """ + + input[0] = [ + [ id:'test_sample' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr21.vcf.gz', checkIfExists: true), + ] + input[1] = true + input[2] = true + input[3] = true + + """ + } + } + + then { + + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions")}, + process.out).match()} + + ) + + } + + } + test("homo_sapiens - tsv_output - vcf") { + + when { + process { + """ + + input[0] = [ + [ id:'test_sample' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr21.vcf.gz', checkIfExists: true), + ] + input[1] = true + input[2] = false + input[3] = false + + """ + } + } + + then { + + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions")}, process.out).match() }, + + ) + + } + + } + + test("homo_sapiens - vcf -stub") { + + options "-stub" + + when { + process { + """ + + input[0] = [ + [ id:'test_sample' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr21.vcf.gz', checkIfExists: true), + ] + input[1] = true + input[2] = true + input[3] = true + """ + } + } + + then { + + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions")}, + process.out + ).match() }, + ) + + } + + } + +} diff --git a/modules/nf-core/whatshap/stats/tests/main.nf.test.snap b/modules/nf-core/whatshap/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..fea3adca --- /dev/null +++ b/modules/nf-core/whatshap/stats/tests/main.nf.test.snap @@ -0,0 +1,276 @@ +{ + "homo_sapiens - tsv_output - vcf": { + "content": [ + { + "versions_whatshap": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ] + }, + { + "0": [ + [ + { + "id": "test_sample" + }, + "test_sample.tsv:md5,647d19183ff8efb21e48bea633ca375c" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test_sample" + }, + "test_sample.log:md5,5f818f833f6f66d852b638e2327b7671" + ] + ], + "4": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ], + "block": [ + + ], + "gtf": [ + + ], + "log": [ + [ + { + "id": "test_sample" + }, + "test_sample.log:md5,5f818f833f6f66d852b638e2327b7671" + ] + ], + "tsv": [ + [ + { + "id": "test_sample" + }, + "test_sample.tsv:md5,647d19183ff8efb21e48bea633ca375c" + ] + ], + "versions_whatshap": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-09T10:56:19.643449202" + }, + "homo_sapiens - all_output - vcf": { + "content": [ + { + "versions_whatshap": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ] + }, + { + "0": [ + [ + { + "id": "test_sample" + }, + "test_sample.tsv:md5,647d19183ff8efb21e48bea633ca375c" + ] + ], + "1": [ + [ + { + "id": "test_sample" + }, + "test_sample.gtf:md5,4a2b521799cdccfc2a296f49df39e313" + ] + ], + "2": [ + [ + { + "id": "test_sample" + }, + "test_sample.txt:md5,aa64e268909459b49a82ebab3b8bde5f" + ] + ], + "3": [ + [ + { + "id": "test_sample" + }, + "test_sample.log:md5,5f818f833f6f66d852b638e2327b7671" + ] + ], + "4": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ], + "block": [ + [ + { + "id": "test_sample" + }, + "test_sample.txt:md5,aa64e268909459b49a82ebab3b8bde5f" + ] + ], + "gtf": [ + [ + { + "id": "test_sample" + }, + "test_sample.gtf:md5,4a2b521799cdccfc2a296f49df39e313" + ] + ], + "log": [ + [ + { + "id": "test_sample" + }, + "test_sample.log:md5,5f818f833f6f66d852b638e2327b7671" + ] + ], + "tsv": [ + [ + { + "id": "test_sample" + }, + "test_sample.tsv:md5,647d19183ff8efb21e48bea633ca375c" + ] + ], + "versions_whatshap": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-09T10:56:07.409933392" + }, + "homo_sapiens - vcf -stub": { + "content": [ + { + "versions_whatshap": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ] + }, + { + "0": [ + [ + { + "id": "test_sample" + }, + "test_sample.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test_sample" + }, + "test_sample.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test_sample" + }, + "test_sample.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test_sample" + }, + "test_sample.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ], + "block": [ + [ + { + "id": "test_sample" + }, + "test_sample.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gtf": [ + [ + { + "id": "test_sample" + }, + "test_sample.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test_sample" + }, + "test_sample.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv": [ + [ + { + "id": "test_sample" + }, + "test_sample.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_whatshap": [ + [ + "WHATSHAP_STATS", + "whatshap", + "2.8" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-09T10:56:23.489785877" + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index c1315f99..bed1fe49 100644 --- a/nextflow.config +++ b/nextflow.config @@ -40,6 +40,7 @@ params { skip_fiber = false skip_m6a = false skip_vep = false + skip_whatshapstats = false // minimap2 options minimap2_ont_model = null diff --git a/nextflow_schema.json b/nextflow_schema.json index b25ea2ee..b68ffd1f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -214,6 +214,10 @@ "skip_nanoplot": { "type": "boolean", "description": "Skip Nanoplot" + }, + "skip_whatshapstats": { + "type": "boolean", + "description": "Skip WhatsHap stats" } } }, diff --git a/subworkflows/local/tumor_normal_happhase.nf b/subworkflows/local/tumor_normal_happhase.nf index 0580c10e..b2cb135f 100644 --- a/subworkflows/local/tumor_normal_happhase.nf +++ b/subworkflows/local/tumor_normal_happhase.nf @@ -111,14 +111,6 @@ workflow TUMOR_NORMAL_HAPPHASE { .set{ normal_bams_germlinevcf } // [meta, bam, bai, germline_vcf, [], []] -- svs and mods are empty placeholders for LONGPHASE_PHASE input - CLAIR3.out.vcf - .map { meta, vcf -> - def extra = [] - return [meta, vcf, extra] - } - .set { germline_vep } - // [meta, clair3_vcf, []] -- germline small variants for VEP annotation - // // MODULE: LONGPHASE_PHASE // @@ -132,6 +124,14 @@ workflow TUMOR_NORMAL_HAPPHASE { ch_versions = ch_versions.mix(LONGPHASE_PHASE.out.versions) + LONGPHASE_PHASE.out.snv_vcf + .map { meta, vcf -> + def extra = [] + return [meta, vcf, extra] + } + .set { germline_vep } + // [meta, clair3_vcf, []] -- germline small variants for VEP annotation + // Add phased vcf to normal bams // Add type information back // both are needed for mixing with the tumor bams diff --git a/subworkflows/local/tumor_only_happhase.nf b/subworkflows/local/tumor_only_happhase.nf index 2b8b3b75..24095c56 100644 --- a/subworkflows/local/tumor_only_happhase.nf +++ b/subworkflows/local/tumor_only_happhase.nf @@ -80,14 +80,6 @@ workflow TUMOR_ONLY_HAPPHASE { .set { somatic_vep } // [meta, somatic_vcf, []] -- PASS (somatic) variants for VEP annotation - VCFSPLIT.out.germline_vcf - .map { meta, vcf -> - def extra = [] - return [meta,vcf, extra] - } - .set { germline_vep } - // [meta, germline_vcf, []] -- non-somatic variants (relabelled PASS) for VEP annotation - // // MODULES: LONGPHASE_PHASE // @@ -100,6 +92,15 @@ workflow TUMOR_ONLY_HAPPHASE { ch_versions = ch_versions.mix(LONGPHASE_PHASE.out.versions) + LONGPHASE_PHASE.out.snv_vcf + .map { meta, vcf -> + def extra = [] + return [meta,vcf, extra] + } + .set { germline_vep } + // [meta, germline_vcf, []] -- non-somatic variants (relabelled PASS) for VEP annotation + + // Add phased nonsomatic vcf info // remove model info tumor_bams diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index bd36bc15..8a214ec5 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -84,6 +84,9 @@ "WGET": { "wget": "1.21.4" }, + "WHATSHAP_STATS": { + "whatshap": 2.8 + }, "Workflow": { "IntGenomicsLab/lrsomatic": "v1.1.0dev" } @@ -107,6 +110,8 @@ "multiqc/multiqc_data/multiqc_samtools_stats.txt", "multiqc/multiqc_data/multiqc_software_versions.txt", "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/multiqc_whatshap_phased_bp_plot.txt", + "multiqc/multiqc_data/multiqc_whatshap_stats.txt", "multiqc/multiqc_data/samtools-flagstat-pct-table.txt", "multiqc/multiqc_data/samtools-flagstat-table.txt", "multiqc/multiqc_data/samtools-idxstats-mapped-reads-plot_Normalised_Counts.txt", @@ -114,11 +119,13 @@ "multiqc/multiqc_data/samtools-idxstats-mapped-reads-plot_Raw_Counts.txt", "multiqc/multiqc_data/samtools-stats-dp.txt", "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/whatshap-stats-table.txt", "multiqc/multiqc_plots", "multiqc/multiqc_plots/pdf", "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/multiqc_whatshap_phased_bp_plot.pdf", "multiqc/multiqc_plots/pdf/samtools-flagstat-pct-table.pdf", "multiqc/multiqc_plots/pdf/samtools-flagstat-table.pdf", "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Normalised_Counts-cnt.pdf", @@ -130,10 +137,12 @@ "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/whatshap-stats-table.pdf", "multiqc/multiqc_plots/png", "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/multiqc_whatshap_phased_bp_plot.png", "multiqc/multiqc_plots/png/samtools-flagstat-pct-table.png", "multiqc/multiqc_plots/png/samtools-flagstat-table.png", "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Normalised_Counts-cnt.png", @@ -145,10 +154,12 @@ "multiqc/multiqc_plots/png/samtools-stats-dp.png", "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/whatshap-stats-table.png", "multiqc/multiqc_plots/svg", "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/multiqc_whatshap_phased_bp_plot.svg", "multiqc/multiqc_plots/svg/samtools-flagstat-pct-table.svg", "multiqc/multiqc_plots/svg/samtools-flagstat-table.svg", "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Normalised_Counts-cnt.svg", @@ -160,6 +171,7 @@ "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/whatshap-stats-table.svg", "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/lrsomatic_software_mqc_versions.yml", @@ -232,6 +244,10 @@ "sample1/qc/tumor/samtools/sample1.flagstat", "sample1/qc/tumor/samtools/sample1.idxstats", "sample1/qc/tumor/samtools/sample1.stats", + "sample1/qc/whatshap_stats", + "sample1/qc/whatshap_stats/sample1_whatshap_stats.gtf", + "sample1/qc/whatshap_stats/sample1_whatshap_stats.log", + "sample1/qc/whatshap_stats/sample1_whatshap_stats.tsv", "sample1/variants", "sample1/variants/clair3", "sample1/variants/clair3/merge_output.vcf.gz", @@ -242,8 +258,8 @@ "sample1/variants/clairs/snvs.vcf.gz", "sample1/variants/clairs/snvs.vcf.gz.tbi", "sample1/variants/phased", - "sample1/variants/phased/somatic_smallvariants.vcf.gz", - "sample1/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample1/variants/phased/germline_smallvariants.vcf.gz", + "sample1/variants/phased/germline_smallvariants.vcf.gz.tbi", "sample1/variants/severus", "sample1/variants/severus/all_SVs", "sample1/variants/severus/all_SVs/severus_all.vcf.gz", @@ -335,6 +351,10 @@ "sample2/qc/tumor/samtools/sample2.flagstat", "sample2/qc/tumor/samtools/sample2.idxstats", "sample2/qc/tumor/samtools/sample2.stats", + "sample2/qc/whatshap_stats", + "sample2/qc/whatshap_stats/sample2_whatshap_stats.gtf", + "sample2/qc/whatshap_stats/sample2_whatshap_stats.log", + "sample2/qc/whatshap_stats/sample2_whatshap_stats.tsv", "sample2/variants", "sample2/variants/clair3", "sample2/variants/clair3/merge_output.vcf.gz", @@ -345,8 +365,8 @@ "sample2/variants/clairs/snvs.vcf.gz", "sample2/variants/clairs/snvs.vcf.gz.tbi", "sample2/variants/phased", - "sample2/variants/phased/somatic_smallvariants.vcf.gz", - "sample2/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample2/variants/phased/germline_smallvariants.vcf.gz", + "sample2/variants/phased/germline_smallvariants.vcf.gz.tbi", "sample2/variants/severus", "sample2/variants/severus/all_SVs", "sample2/variants/severus/all_SVs/severus_all.vcf.gz", @@ -405,6 +425,10 @@ "sample3/qc/tumor/samtools/sample3.flagstat", "sample3/qc/tumor/samtools/sample3.idxstats", "sample3/qc/tumor/samtools/sample3.stats", + "sample3/qc/whatshap_stats", + "sample3/qc/whatshap_stats/sample3_whatshap_stats.gtf", + "sample3/qc/whatshap_stats/sample3_whatshap_stats.log", + "sample3/qc/whatshap_stats/sample3_whatshap_stats.tsv", "sample3/variants", "sample3/variants/clairsto", "sample3/variants/clairsto/germline.vcf.gz", @@ -416,8 +440,8 @@ "sample3/variants/clairsto/somatic.vcf.gz", "sample3/variants/clairsto/somatic.vcf.gz.tbi", "sample3/variants/phased", - "sample3/variants/phased/somatic_smallvariants.vcf.gz", - "sample3/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample3/variants/phased/germline_smallvariants.vcf.gz", + "sample3/variants/phased/germline_smallvariants.vcf.gz.tbi", "sample3/variants/severus", "sample3/variants/severus/all_SVs", "sample3/variants/severus/all_SVs/severus_all.vcf.gz", @@ -442,21 +466,24 @@ "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz_summary.html" ], [ - "sample1_normal.bam:md5,58854f8d427538288322ac20df574fe1", - "sample1_normal.bam.bai:md5,398a33445ef807ebc83c851c3f3c0df6", - "sample1_tumor.bam:md5,0e2ebc65d456e9aa614f5e4714ea2f97", - "sample1_tumor.bam.bai:md5,b92667a80d721b8d515ad099e4f97925", + "sample1_normal.bam:md5,c4eda86b77d9280bbcb922ab944ce68b", + "sample1_normal.bam.bai:md5,0d0e6451f37ddd209bc8804c50e29e85", + "sample1_tumor.bam:md5,57ad020a25e02fe94e9d6199c396aaf1", + "sample1_tumor.bam.bai:md5,28ffee78ed0ed13e0749e8d08bbb52c6", "sample1.flagstat:md5,1c41ea9923945501eb7e41f83a90502d", "sample1.idxstats:md5,902e503387799123ea59255e3fca172c", "sample1.stats:md5,5a76f92088d36f8e93d72351e521b59b", "sample1.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", "sample1.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", "sample1.stats:md5,8cec99bd9c1ba4ee22619b66d4fec02a", + "sample1_whatshap_stats.gtf:md5,eff050a68e36e778b06e0ec19435c569", + "sample1_whatshap_stats.log:md5,76b73731f74fe32ef2d11f6bb0a0f71a", + "sample1_whatshap_stats.tsv:md5,f566ae25b3c5a8f7e94b3d6c1b0417f8", "breakpoints_double.csv:md5,fd92fe40bc0ab3b836dedc395b80d6e2", "read_qual.txt:md5,78247dfa2ea336eac0e128eba5e9eef4", - "sample2_normal.bam:md5,c9b1ac8d2faec01ecb178f0b351af69c", - "sample2_normal.bam.bai:md5,b8f81d2703b1d06128e23b860a9fd635", - "sample2_tumor.bam:md5,2ba9456992d944b63d63e067d5e3bc56", + "sample2_normal.bam:md5,32cb1237503f716d788c8d49106d57cc", + "sample2_normal.bam.bai:md5,bda7beaa98b119d07bb61da781af2033", + "sample2_tumor.bam:md5,b9ec59ff00a6c561a614eeb7553c352c", "sample2_tumor.bam.bai:md5,8342f0fbbc8ce4f4c79aa42c7804df9d", "sample2.flagstat:md5,714d0cc0c213e2640e54a16f3d0e6e7e", "sample2.idxstats:md5,72eb83bb11748dc863fef1a0a5497e4b", @@ -464,18 +491,24 @@ "sample2.flagstat:md5,4344a8745efef9cc2a017024218d61c6", "sample2.idxstats:md5,69467fc02c83a30084736aeea8b785fb", "sample2.stats:md5,7cbbff1faaf2e030470a8c1e69434b48", + "sample2_whatshap_stats.gtf:md5,4d8f4393e3aebe4e945c0b8236cf3b3e", + "sample2_whatshap_stats.log:md5,10bba7bae6dd99b989ece5e5dac7a8f9", + "sample2_whatshap_stats.tsv:md5,bb46226e486af9026ab76e014624e903", "breakpoints_double.csv:md5,d3f0957887406fb79f9dcc3707324d8c", "read_qual.txt:md5,8b92ff7dc4536188be159b95525511cd", - "sample3_tumor.bam:md5,7107cfc84eafca8f1ae918e775111090", - "sample3_tumor.bam.bai:md5,d2855691846361e01999895250c835e4", + "sample3_tumor.bam:md5,72e067ec99d694f5e1549edca7196054", + "sample3_tumor.bam.bai:md5,4c7386eb59528e81bd48e6a77952f1c3", "sample3.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", "sample3.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", "sample3.stats:md5,6825d4e497aef80ed7160afbef5076d9", + "sample3_whatshap_stats.gtf:md5,46a97067376b06b476d180709bc9e3d8", + "sample3_whatshap_stats.log:md5,376254ec9c98f9ba204895e7085516ed", + "sample3_whatshap_stats.tsv:md5,f7cc79156f23e884ead18e50b8434dbf", "breakpoints_double.csv:md5,41bb00e81dd6c319c13e754fa853ca68", "read_qual.txt:md5,b918430d35354dad1d7f02f21e4cd4ed" ] ], - "timestamp": "2026-03-13T14:26:30.747596368", + "timestamp": "2026-03-13T17:15:05.714239372", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.0" diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index fc61e9c6..e34b6363 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -32,6 +32,8 @@ include { FIBERTOOLSRS_QC } from '../modules/local/fibertoolsr include { ENSEMBLVEP_VEP as SOMATIC_VEP } from '../modules/nf-core/ensemblvep/vep/main.nf' include { ENSEMBLVEP_VEP as GERMLINE_VEP } from '../modules/nf-core/ensemblvep/vep/main.nf' include { ENSEMBLVEP_VEP as SV_VEP } from '../modules/nf-core/ensemblvep/vep/main.nf' +include { WHATSHAP_STATS } from '../modules/nf-core/whatshap/stats/main' + // // IMPORT SUBWORKFLOWS // @@ -42,9 +44,6 @@ include { TUMOR_NORMAL_HAPPHASE } from '../subworkflows/local/tumor_normal_h include { TUMOR_ONLY_HAPPHASE } from '../subworkflows/local/tumor_only_happhase' - - - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -419,12 +418,42 @@ workflow LRSOMATIC { gnomad ) + // Set channel for phased germline variants germline_vep = TUMOR_NORMAL_HAPPHASE.out.germline_vep.mix(TUMOR_ONLY_HAPPHASE.out.germline_vep) // [meta, vcf, []] -- germline variants merged from T/N and tumor-only paths + + // Set channel for somatic variants somatic_vep = TUMOR_NORMAL_HAPPHASE.out.somatic_vep.mix(TUMOR_ONLY_HAPPHASE.out.somatic_vep) // [meta, vcf, []] -- somatic variants merged from T/N and tumor-only paths + + whatshap_stats_txt = channel.empty() + + if (!params.skip_qc && !params.skip_whatshapstats) { + + // Create channel for whatshap stats + germline_vep + .map { meta, vcf, _extra -> + return [meta, vcf] } + .set { ch_whatshap_stats } + + // + // Module: WHATSHAP_STATS + // + + WHATSHAP_STATS ( + ch_whatshap_stats, + true, + true, + false + ) + + whatshap_stats_txt = WHATSHAP_STATS.out.tsv + + } + if (!params.skip_vep) { + // // MODULE: GERMLINE_VEP // @@ -536,8 +565,6 @@ workflow LRSOMATIC { } - - // // Module: MOSDEPTH // @@ -690,6 +717,8 @@ workflow LRSOMATIC { ch_multiqc_files = ch_multiqc_files.mix(ch_nanoplot_pre_txt.collect{it -> it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_nanoplot_post_txt.collect{it -> it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(whatshap_stats_txt.collect{it -> it[1]}.ifEmpty([])) + MULTIQC ( ch_multiqc_files .collect()