diff --git a/src/sv-pipeline/scripts/vcf_qc/collectQC.perSample_benchmarking.sh b/src/sv-pipeline/scripts/vcf_qc/collectQC.perSample_benchmarking.sh index a2fc5b5d6..b4f77028c 100755 --- a/src/sv-pipeline/scripts/vcf_qc/collectQC.perSample_benchmarking.sh +++ b/src/sv-pipeline/scripts/vcf_qc/collectQC.perSample_benchmarking.sh @@ -155,7 +155,7 @@ while read ID; do if [ ! -z ${VIDlist} ] && [ -s ${VIDlist} ]; then echo $ID zcat ${VCFSTATS} | head -n1 > ${OVRTMP}/SET1_calls/${ID}.SET1.SV_calls.bed - zcat ${VIDlist} | cut -f1 | fgrep -wf - <( zcat ${VCFSTATS} ) >> ${OVRTMP}/SET1_calls/${ID}.SET1.SV_calls.bed + zcat ${VIDlist} | awk '$2 != "./." {print $1}' | fgrep -wf - <( zcat ${VCFSTATS} ) >> ${OVRTMP}/SET1_calls/${ID}.SET1.SV_calls.bed bgzip -f ${OVRTMP}/SET1_calls/${ID}.SET1.SV_calls.bed rm -f ${OVRTMP}/SET1_calls/${ID}.SET1.SV_calls.bed tabix -f ${OVRTMP}/SET1_calls/${ID}.SET1.SV_calls.bed.gz diff --git a/wdl/CollectQcPerSample.wdl b/wdl/CollectQcPerSample.wdl index 2ebd232e7..200bf67aa 100644 --- a/wdl/CollectQcPerSample.wdl +++ b/wdl/CollectQcPerSample.wdl @@ -102,14 +102,14 @@ task CollectVidsPerSample { | bcftools view -i 'SVTYPE=="CNV" || AC>0' \ | bcftools query -f '[%SAMPLE\t%ID\t%ALT\t%GT\t%GQ\t%CN\t%CNQ\n]' \ | awk '{OFS="\t"; gt = $4; gq = $5; if ($3 == "") { gq = $7; if ($6 == 2) { gt = "0/0" } else if ($6 == 1 || $6 == 3) { gt = "0/1" } else { gt = "1/1"} }; print $1, $2, gt, gq}' \ - | awk -v outprefix="~{outdirprefix}" '$3 != "0/0" && $3 != "./." {OFS="\t"; print $2, $3, $4 >> outprefix"/"$1".VIDs_genotypes.txt" }' + | awk -v outprefix="~{outdirprefix}" '$3 != "0/0" {OFS="\t"; print $2, $3, $4 >> outprefix"/"$1".VIDs_genotypes.txt" }' else bcftools view -S ~{samples_list} ~{vcf} \ | bcftools +fill-tags -- -t AC \ | bcftools view -i 'SVTYPE=="CNV" || AC>0' \ | bcftools query -f '[%SAMPLE\t%ID\t%ALT\t%GT\t%GQ\n]' \ | awk '{OFS="\t"; gt = $4; gq = $5; if ($3 ~ /CN0/) { if ($4 == "0/2") { gt = "0/0" } else if ($4 == "0/1" || $4 == "0/3") { gt = "0/1" } else { gt = "1/1"} }; print $1, $2, gt, gq}' \ - | awk -v outprefix="~{outdirprefix}" '$3 != "0/0" && $3 != "./." {OFS="\t"; print $2, $3, $4 >> outprefix"/"$1".VIDs_genotypes.txt" }' + | awk -v outprefix="~{outdirprefix}" '$3 != "0/0" {OFS="\t"; print $2, $3, $4 >> outprefix"/"$1".VIDs_genotypes.txt" }' fi # Gzip all output lists diff --git a/wdl/MainVcfQc.wdl b/wdl/MainVcfQc.wdl index bc662cfae..c5060f55f 100644 --- a/wdl/MainVcfQc.wdl +++ b/wdl/MainVcfQc.wdl @@ -512,12 +512,14 @@ task PlotQcPerSample { # Make per-sample directory mkdir ~{prefix}_perSample/ - # Untar per-sample VID lists + # Untar per-sample VID lists and exclude null genotypes mkdir tmp_untar/ tar -xvzf ~{per_sample_tarball} \ --directory tmp_untar/ find tmp_untar/ -name "*.VIDs_genotypes.txt.gz" | while read FILE; do - mv $FILE ~{prefix}_perSample/ + bname=$(basename $FILE) + zcat $FILE | awk '$2 != "./."' | gzip -f > ~{prefix}_perSample/$bname + rm $FILE done # Plot per-sample distributions