Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion tools/pairtools/.lint_skip

This file was deleted.

88 changes: 61 additions & 27 deletions tools/pairtools/dedup.xml
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
<tool id="pairtools_dedup" name="Pairtools dedup" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="23.2" license="MIT">
<tool id="pairtools_dedup" name="Pairtools dedup" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="@PROFILE_VERSION@" license="MIT">
<description>Find and remove PCR/optical duplicates</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
#if $pairs_path.is_of_type('4dn_pairs.gz') or $pairs_path.is_of_type('4dn_pairsam.gz'):
#set $input_link = "input.gz"
#set $output_dedup_pairs_link = "output_dedup_pairs.ga"
#set $output_dups_pairs_link = "output_dups_pairs.gz"
#else
#set $input_link = "input"
#set $output_dedup_pairs_link = "output_dedup_pairs"
#set $output_dups_pairs_link = "output_dups_pairs"
#end if
ln -s '$pairs_path' '$input_link' &&
ln -s '$output_dedup_pairs' '$output_dedup_pairs_link' &&
ln -s '$output_dups_pairs' '$output_dups_pairs_link' &&
pairtools dedup
'$pairs_path'
-o '$output_dedup_pairs'
'$input_link'
-o '$output_dedup_pairs_link'
#if $output_dups:
--output-dups '$output_dups_pairs'
--output-dups '$output_dups_pairs_link'
#end if
$mark_dups
#if $output_stats:
Expand All @@ -24,16 +36,17 @@
--max-mismatch ${max_mismatch}
]]></command>
<inputs>
<param name="pairs_path" type="data" format="4dn_pairs,4dn_pairsam" label="Input pairs file" help="Input triu-flipped sorted .pairs or .pairsam file"/>
<param name="pairs_path" type="data" format="4dn_pairs,4dn_pairsam,4dn_pairs.gz,4dn_pairsam.gz" label="Input pairs file" help="Input triu-flipped sorted .pairs or .pairsam file"/>
<param argument="--mark-dups" type="boolean" truevalue="--mark-dups" falsevalue="" checked="True" label="Duplicate pairs are marked as DD in pair_type and as a duplicate in the SAM entries"/>
<param argument="--output-dups" type="boolean" truevalue="--output-dups" falsevalue="" checked="False" label="Output file for duplicate pairs"/>
<param argument="--output-stats" type="boolean" truevalue="--output-stats" falsevalue="" checked="False" label="Output file for duplicate statistics"/>
<param argument="--max-mismatch" type="integer" value="3" min="0" label="Maximum number of mismatches. Pairs with both sides mapped within this distance &quot;bp&quot; from each other are considered duplicates."/>
<param argument="--output-bytile-stats" type="boolean" truevalue="--output-bytile-stats" falsevalue="" checked="False" label="Output file for optical duplicate statistics for datasets with original Illumina-generated read IDs."/>
<param name="compress_output" type="boolean" truevalue=".gz" falsevalue="" checked="false" label="Compress output files" />
</inputs>
<outputs>
<data name="output_dedup_pairs" format_source="pairs_path" label="${tool.name} on ${on_string}: Deduplicated Pairs"/>
<data name="output_dups_pairs" format_source="pairs_path" label="${tool.name} on ${on_string}: Duplicate Pairs">
<data name="output_dedup_pairs" label="${tool.name} on ${on_string}: Deduplicated Pairs" format_source="pairs_path" />
<data name="output_dups_pairs" label="${tool.name} on ${on_string}: Duplicate Pairs" format_source="pairs_path" >
<filter>output_dups</filter>
</data>
<data name="dedup_pairs_stats" format="tabular" label="${tool.name} on ${on_string}: Deduplicated stats">
Expand All @@ -44,46 +57,67 @@
</data>
</outputs>
<tests>

<!--Test 01 with default parameters-->
<test expect_num_outputs="1">
<param name="pairs_path" value="output_sorted_pairs.pairsam"/>
<output name="output_dedup_pairs" file="output_dedup_pairs.pairsam" lines_diff="20"/>
<param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
<output name="output_dedup_pairs" file="output_dedup_pairs.pairsam" ftype="4dn_pairsam" lines_diff="20" />
</test>
<!--Test 02 mark_dups enabled and output_dups-->
<!--Test 02 with default parameters and compressed output-->
<test expect_num_outputs="1">
<param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
<param name="compress_output" value="true"/>
<output name="output_dedup_pairs" file="output_dedup_pairs.pairsam" ftype="4dn_pairsam" decompress="true" lines_diff="20"/>
</test>
<!--Test 03 with default parameters and compressed iput-->
<test expect_num_outputs="1">
<param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam.gz"/>
<output name="output_dedup_pairs" file="output_dedup_pairs.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
</test>
<!--Test 04 mark_dups enabled and output_dups-->
<test expect_num_outputs="2">
<param name="pairs_path" value="output_sorted_pairs.pairsam"/>
<param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
<param name="mark_dups" value="true"></param>
<param name="output_dups" value="true"></param>
<output name="output_dedup_pairs" file="output_dedup_pairs_markdups.pairsam" lines_diff="20"/>
<output name="output_dups_pairs" file="output_dups_pairs_markdups.pairsam" lines_diff="20"/>
<output name="output_dedup_pairs" file="output_dedup_pairs_markdups.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
<output name="output_dups_pairs" file="output_dups_pairs_markdups.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
</test>
<!--Test 03 mark_dups and output_stats enabled-->
<!--Test 05 mark_dups and output_stats enabled-->
<test expect_num_outputs="2">
<param name="pairs_path" value="output_sorted_pairs.pairsam"/>
<param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
<param name="mark_dups" value="true"></param>
<param name="output_stats" value="true"></param>
<output name="output_dedup_pairs" file="output_dedup_pairs_markdups.pairsam" lines_diff="20"/>
<output name="dedup_pairs_stats" file="output_dedup_pairs.stats" lines_diff="20"/>
<output name="output_dedup_pairs" file="output_dedup_pairs_markdups.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
<output name="dedup_pairs_stats" file="output_dedup_pairs.stats" ftype="tabular" lines_diff="20"/>
</test>
<!--Test 04 mark_dups and output_stats enabled, max_mismatch set to 0-->
<!--Test 06 mark_dups and output_stats enabled, max_mismatch set to 0-->
<test expect_num_outputs="2">
<param name="pairs_path" value="output_sorted_pairs.pairsam"/>
<param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
<param name="mark_dups" value="true"></param>
<param name="output_stats" value="true"></param>
<param name="max_mismatch" value="0"></param>
<output name="output_dedup_pairs" file="output_dedup_max_mismatch0_sorted.pairsam" lines_diff="20"/>
<output name="dedup_pairs_stats" file="output_dedup_max_mismatch0_sorted.stats" lines_diff="20"/>
<output name="output_dedup_pairs" file="output_dedup_max_mismatch0_sorted.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
<output name="dedup_pairs_stats" file="output_dedup_max_mismatch0_sorted.stats" ftype="tabular" lines_diff="20"/>
</test>
<!--Test 07 mark_dups and output_stats + bytile_stats enabled-->
<test expect_num_outputs="3">
<param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
<param name="mark_dups" value="true"></param>
<param name="output_stats" value="true"></param>
<param name="output_bytile_stats" value="true"></param>
<output name="output_dedup_pairs" file="output_dedup_max_parent_id_bytile_sorted.pairsam" ftype="4dn_pairsam" lines_diff="20"/>
<output name="dedup_pairs_stats" file="output_dedup_max_parent_id_bytile_sorted.stats" ftype="tabular" lines_diff="20"/>
<output name="dedup_bytile_stats" file="output_dedup_max_parent_id_bytile_sorted_tile_dups.stats" ftype="tabular" lines_diff="20"/>
</test>
<!--Test 05 mark_dups and output_stats + bytile_stats enabled-->
<!--Test 08 mark_dups and output_stats + bytile_stats enabled, compress output-->
<test expect_num_outputs="3">
<param name="pairs_path" value="output_sorted_pairs.pairsam"/>
<param name="pairs_path" ftype="4dn_pairsam" value="output_sorted_pairs.pairsam"/>
<param name="mark_dups" value="true"></param>
<param name="output_stats" value="true"></param>
<param name="compress_output" value="true"></param>
<param name="output_bytile_stats" value="true"></param>
<output name="output_dedup_pairs" file="output_dedup_max_parent_id_bytile_sorted.pairsam" lines_diff="20"/>
<output name="dedup_pairs_stats" file="output_dedup_max_parent_id_bytile_sorted.stats" lines_diff="20"/>
<output name="dedup_bytile_stats" file="output_dedup_max_parent_id_bytile_sorted_tile_dups.stats" lines_diff="20"/>
<output name="output_dedup_pairs" file="output_dedup_max_parent_id_bytile_sorted.pairsam" ftype="4dn_pairsam" decompress="true" lines_diff="20"/>
<output name="dedup_pairs_stats" file="output_dedup_max_parent_id_bytile_sorted.stats" ftype="tabular" decompress="true" lines_diff="20"/>
<output name="dedup_bytile_stats" file="output_dedup_max_parent_id_bytile_sorted_tile_dups.stats" ftype="tabular" decompress="true" lines_diff="20"/>
</test>
</tests>
<help><![CDATA[
Expand Down
5 changes: 3 additions & 2 deletions tools/pairtools/macros.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<macros>
<token name="@TOOL_VERSION@">1.1.3</token>
<token name="@SUFFIX_VERSION@">1</token>
<token name="@SUFFIX_VERSION@">3</token>
<token name="@PROFILE_VERSION@">25.0</token>
<xml name="edam_ontology">
<edam_datas>
<edam_data>topic_1381</edam_data>
Expand Down Expand Up @@ -29,4 +30,4 @@
<option value="3unique">3unique - Report the 3'-most unique alignment on each side, if present</option>
<option value="all">all - Report all available unique alignments on each side</option>
</xml>
</macros>
</macros>
45 changes: 31 additions & 14 deletions tools/pairtools/parse.xml
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
<tool id="pairtools_parse" name="Pairtools parse" version="@TOOL_VERSION@+galaxy2" profile="23.2" license="MIT">
<tool id="pairtools_parse" name="Pairtools parse" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="@PROFILE_VERSION@" license="MIT">
<description>Find ligation pairs in alignments and create pairs.</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
#set $output = "output" + str($compress_output)
ln -s '$output_parsed_pairs' '$output' &&
pairtools parse
'$sam_path'
-c '$chroms_path'
#if str($assembly_name).strip():
--assembly '$assembly_name'
#end if
-o '$output_parsed_pairs'
-o '$output'
--min-mapq '$min_mapq'
--max-molecule-size '$max_molecule_size'
$drop_readid
Expand Down Expand Up @@ -66,11 +68,16 @@
</conditional>
<param argument="--walks-policy" type="select" label="Walks Policy" help="The policy for reporting unrescuable walks.">
<expand macro="walks_policy_options"/>
</param>
</param>
<param name="compress_output" type="boolean" truevalue=".gz" falsevalue="" checked="false" label="Compress output file" />
<param argument="max_inter_algn_gap" type="integer" min="0" value="30" label="Max alignment gap" help="read segments that are not covered by any alignment and longer than the specified value are treated as null alignments."/>
</inputs>
<outputs>
<data name="output_parsed_pairs" format="4dn_pairs" label="${tool.name} on ${on_string}: .pairs"/>
<data name="output_parsed_pairs" format="4dn_pairsam" label="${tool.name} on ${on_string}: .pairs">
<change_format>
<when input="compress_output" value=".gz" format="4dn_pairsam.gz"/>
</change_format>
</data>
<data name="parsed_pairs_stats" format="tabular" label="${tool.name} on ${on_string}: parsed.stats">
<filter>output_stats</filter>
</data>
Expand All @@ -83,7 +90,7 @@
<param name="min_mapq" value="1"/>
<param name="walks_policy" value="mask"/>
<param name="max_inter_algn_gap" value="20"/>
<output name="output_parsed_pairs" ftype="4dn_pairs" file="output_parsed_pairs_sam.pairs" lines_diff="10"/>
<output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_sam.pairs" lines_diff="10"/>
</test>
<!--Test 02 with BAM file as input and default parameters-->
<test expect_num_outputs="1">
Expand All @@ -92,7 +99,7 @@
<param name="min_mapq" value="1"/>
<param name="walks_policy" value="mask"/>
<param name="max_inter_algn_gap" value="20"/>
<output name="output_parsed_pairs" ftype="4dn_pairs" file="output_parsed_pairs_bam.pairs" lines_diff="10"/>
<output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_bam.pairs" lines_diff="10"/>
</test>
<!--Test 03 with BAM file as input and minimal mapq of 40-->
<test expect_num_outputs="1">
Expand All @@ -101,7 +108,7 @@
<param name="min_mapq" value="40"/>
<param name="walks_policy" value="mask"/>
<param name="max_inter_algn_gap" value="20"/>
<output name="output_parsed_pairs" ftype="4dn_pairs" file="output_parsed_pairs_bam_min_mapq_40.pairs" lines_diff="10"/>
<output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_bam_min_mapq_40.pairs" lines_diff="10"/>
</test>
<!--Test 04 with BAM file as input and walk policy of 5unique-->
<test expect_num_outputs="1">
Expand All @@ -110,7 +117,7 @@
<param name="min_mapq" value="40"/>
<param name="walks_policy" value="5unique"/>
<param name="max_inter_algn_gap" value="20"/>
<output name="output_parsed_pairs" ftype="4dn_pairs" file="output_parsed_pairs_bam_5unique.pairs" lines_diff="10"/>
<output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_bam_5unique.pairs" lines_diff="10"/>
</test>
<!--Test 05 with BAM file as input and read id dropped-->
<test expect_num_outputs="1">
Expand All @@ -120,7 +127,7 @@
<param name="walks_policy" value="5unique"/>
<param name="max_inter_algn_gap" value="20"/>
<param name="drop_readid" value="true"></param>
<output name="output_parsed_pairs" ftype="4dn_pairs" file="output_parsed_pairs_bam_readid_dropped.pairs" lines_diff="10"/>
<output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_bam_readid_dropped.pairs" lines_diff="10"/>
</test>
<!--Test 06 with SAM file as input and drop_seq enabled-->
<test expect_num_outputs="1">
Expand All @@ -130,7 +137,7 @@
<param name="walks_policy" value="5unique"/>
<param name="max_inter_algn_gap" value="20"/>
<param name="drop_seq" value="true"></param>
<output name="output_parsed_pairs" ftype="4dn_pairs" file="output_parsed_pairs_bam_readid_dropped_seq.pairs" lines_diff="10"/>
<output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_bam_readid_dropped_seq.pairs" lines_diff="10"/>
</test>
<!--Test 07 with SAM file as input and output_stats enabled-->
<test expect_num_outputs="2">
Expand All @@ -150,9 +157,20 @@
<param name="min_mapq" value="1"/>
<param name="walks_policy" value="mask"/>
<param name="max_inter_algn_gap" value="20"/>
<output name="output_parsed_pairs" ftype="4dn_pairs" file="output_parsed_pairs_sam_assemblyname.pairs" lines_diff="10"/>
<output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_sam_assemblyname.pairs" lines_diff="10"/>
</test>
<!--Test 09 with SAM file as input and default parameters and assembly name and compressed output-->
<test expect_num_outputs="1">
<param name="sam_path" value="test.sam"/>
<param name="chroms_path" value="test.genome"/>
<param name="assembly_name" value="test_assembly"/>
<param name="min_mapq" value="1"/>
<param name="walks_policy" value="mask"/>
<param name="max_inter_algn_gap" value="20"/>
<param name="compress_output" value="true"/>
<output name="output_parsed_pairs" ftype="4dn_pairsam.gz" file="output_parsed_pairs_sam_assemblyname.pairs.gz" decompress="true" lines_diff="10"/>
</test>
<!--Test 09 with SAM file as input and add columns-->
<!--Test 10 with SAM file as input and add columns-->
<test expect_num_outputs="1">
<param name="sam_path" value="test.sam"/>
<param name="chroms_path" value="test.genome"/>
Expand All @@ -163,9 +181,8 @@
<param name="add_columns_selection" value="yes"/>
<param name="add_columns" value="mapq,seq"/>
</conditional>
<output name="output_parsed_pairs" ftype="4dn_pairs" file="output_parsed_pairs_sam_mapq.pairs" lines_diff="10"/>
<output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_sam_mapq.pairs" lines_diff="10"/>
</test>

</tests>
<help><![CDATA[
**Pairtools parse**
Expand Down
Loading