|
33 | 33 | # perl LCA_table_to_kraken_output_format.pl |
34 | 34 | # [output of retrieve_top_blast_hits_LCA_for_each_sequence.pl for one blast search] |
35 | 35 | # [fasta file that was input to blast search (to retrieve sequence lengths and names of unclassified sequences)] |
36 | | -# [output kraken format table] |
| 36 | +# > [output kraken format table] |
37 | 37 |
|
38 | 38 |
|
39 | 39 | use strict; |
|
56 | 56 | my $LCA_taxon_species_column = 3; |
57 | 57 | my $LCA_taxon_genus_column = 4; |
58 | 58 | my $LCA_taxon_family_column = 5; |
59 | | -my $evalue_of_top_hits_column = 6; |
60 | | -my $lowest_pident_of_top_hits_column = 7; |
61 | | -my $mean_pident_of_top_hits_column = 8; |
62 | | -my $highest_pident_of_top_hits_column = 9; |
63 | | -my $lowest_qcovs_of_top_hits_column = 10; |
64 | | -my $mean_qcovs_of_top_hits_column = 11; |
65 | | -my $highest_qcovs_of_top_hits_column = 12; |
66 | | -my $number_top_hits_column = 13; |
| 59 | +my $LCA_taxon_superkingdom_column = 6; |
| 60 | +my $evalue_of_top_hits_column = 7; |
| 61 | +my $lowest_pident_of_top_hits_column = 8; |
| 62 | +my $mean_pident_of_top_hits_column = 9; |
| 63 | +my $highest_pident_of_top_hits_column = 10; |
| 64 | +my $lowest_qcovs_of_top_hits_column = 11; |
| 65 | +my $mean_qcovs_of_top_hits_column = 12; |
| 66 | +my $highest_qcovs_of_top_hits_column = 13; |
| 67 | +my $number_top_hits_column = 14; |
67 | 68 |
|
68 | 69 |
|
69 | 70 | # reads in sequence names and lengths from fasta file |
|
126 | 127 | # prints kraken format row for unclassified sequence |
127 | 128 | # "C"/"U": a one letter code indicating that the sequence was either classified |
128 | 129 | # or unclassified. |
129 | | - print "C".$DELIMITER; |
| 130 | + if($assigned_taxon_id == 0) |
| 131 | + { |
| 132 | + print "U".$DELIMITER; |
| 133 | + } |
| 134 | + else |
| 135 | + { |
| 136 | + print "C".$DELIMITER; |
| 137 | + } |
130 | 138 |
|
131 | 139 | # The sequence ID, obtained from the FASTA/FASTQ header. |
132 | 140 | print $sequence_name.$DELIMITER; |
|
0 commit comments