Skip to content

Commit 8d72975

Browse files
src/load_data_for_complexity.cpp: cleanup
1 parent 2289cfe commit 8d72975

File tree

1 file changed

+29
-36
lines changed

1 file changed

+29
-36
lines changed

src/load_data_for_complexity.cpp

Lines changed: 29 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
#include <GenomicRegion.hpp>
2323
#include <MappedRead.hpp>
24-
#include <smithlab_utils.hpp>
2524

2625
#include <algorithm>
2726
#include <cstddef>
@@ -42,7 +41,7 @@
4241
#include <htslib/sam.h>
4342
#endif
4443

45-
// NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions)
44+
// NOLINTBEGIN(*-narrowing-conversions)
4645

4746
static bool
4847
update_pe_duplicate_counts_hist(const GenomicRegion &curr_gr,
@@ -263,7 +262,7 @@ load_counts(const std::string &input_file_name,
263262
}
264263
else if (val != 0)
265264
throw std::runtime_error("problem reading file at line " +
266-
toa(n_counts + 1));
265+
std::to_string(n_counts + 1));
267266
}
268267
in.peek();
269268
}
@@ -294,12 +293,12 @@ load_histogram(const std::string &filename, std::vector<double> &counts_hist) {
294293
// error reading input
295294
if (!(is >> read_count >> frequency))
296295
throw std::runtime_error("bad histogram line format:\n" + buffer + "\n" +
297-
"(line " + toa(line_count) + ")");
296+
"(line " + std::to_string(line_count) + ")");
298297

299298
// histogram is out of order?
300299
if (read_count < prev_read_count)
301300
throw std::runtime_error("bad line order in file " + filename + "\n" +
302-
"(line " + toa(line_count) + ")");
301+
"(line " + std::to_string(line_count) + ")");
303302
counts_hist.resize(read_count + 1, 0.0);
304303
counts_hist[read_count] = frequency;
305304
if (read_count == 0ul) {
@@ -313,17 +312,13 @@ load_histogram(const std::string &filename, std::vector<double> &counts_hist) {
313312
return n_reads;
314313
}
315314

316-
/////////////////////////////////////////////////////////
317315
// Loading coverage counts
318-
////////////////////////////////////////////////////////
319316

320-
// probabilistically split genomic regions into mutiple
321-
// genomic regions of width equal to bin_size
317+
// probabilistically split genomic regions into mutiple genomic regions of
318+
// width equal to bin_size
322319
[[nodiscard]] static auto
323-
split_genomic_region(const GenomicRegion &inputGR, std::mt19937 &generator,
320+
split_genomic_region(GenomicRegion gr, std::mt19937 &generator,
324321
const std::size_t bin_size) -> std::vector<GenomicRegion> {
325-
GenomicRegion gr(inputGR);
326-
327322
const auto frac = static_cast<double>(gr.get_start() % bin_size) / bin_size;
328323
const auto width = gr.get_width();
329324

@@ -345,20 +340,18 @@ split_genomic_region(const GenomicRegion &inputGR, std::mt19937 &generator,
345340
return outputGRs;
346341
}
347342

348-
// split a mapped read into multiple genomic regions
349-
// based on the number of bases in each
343+
// split a mapped read into multiple genomic regions based on the number of
344+
// bases in each
350345
[[nodiscard]] static auto
351-
SplitMappedRead(const MappedRead &inputMR, std::mt19937 &generator,
346+
SplitMappedRead(const MappedRead &mr, std::mt19937 &generator,
352347
const std::size_t bin_size) -> std::vector<GenomicRegion> {
353-
outputGRs.clear();
354-
355348
std::size_t covered_bases{};
356-
std::size_t read_idx{inputMR.r.get_start()};
349+
std::size_t read_idx{mr.r.get_start()};
357350
std::size_t seq_idx{};
358351

359352
std::vector<GenomicRegion> outputGRs;
360-
while (seq_idx < std::size(inputMR.seq)) {
361-
if (inputMR.seq[seq_idx] != 'N')
353+
while (seq_idx < std::size(mr.seq)) {
354+
if (mr.seq[seq_idx] != 'N')
362355
++covered_bases;
363356

364357
// if we reach the end of a bin, probabilistically create a binned read
@@ -369,9 +362,9 @@ SplitMappedRead(const MappedRead &inputMR, std::mt19937 &generator,
369362
if (dist(generator) <= frac) {
370363
const std::size_t curr_start = read_idx - (read_idx % bin_size);
371364
const std::size_t curr_end = curr_start + bin_size;
372-
outputGRs.emplace_back(inputMR.r.get_chrom(), curr_start, curr_end,
373-
inputMR.r.get_name(), inputMR.r.get_score(),
374-
inputMR.r.get_strand());
365+
outputGRs.emplace_back(mr.r.get_chrom(), curr_start, curr_end,
366+
mr.r.get_name(), mr.r.get_score(),
367+
mr.r.get_strand());
375368
}
376369
covered_bases = 0;
377370
}
@@ -384,10 +377,11 @@ SplitMappedRead(const MappedRead &inputMR, std::mt19937 &generator,
384377
if (dist(generator) <= frac) {
385378
const std::size_t curr_start = read_idx - (read_idx % bin_size);
386379
const std::size_t curr_end = curr_start + bin_size;
387-
outputGRs.emplace_back(inputMR.r.get_chrom(), curr_start, curr_end,
388-
inputMR.r.get_name(), inputMR.r.get_score(),
389-
inputMR.r.get_strand());
380+
outputGRs.emplace_back(mr.r.get_chrom(), curr_start, curr_end,
381+
mr.r.get_name(), mr.r.get_score(),
382+
mr.r.get_strand());
390383
}
384+
return outputGRs;
391385
}
392386

393387
std::size_t
@@ -413,7 +407,7 @@ load_coverage_counts_MR(const std::string &input_file_name,
413407
const MappedRead mr(line);
414408
if (mr.r.get_width() > max_width)
415409
throw std::runtime_error("Encountered read of width " +
416-
toa(mr.r.get_width()) +
410+
std::to_string(mr.r.get_width()) +
417411
"max_width set too small");
418412

419413
const auto splitGRs = SplitMappedRead(mr, generator, bin_size);
@@ -479,18 +473,17 @@ load_coverage_counts_GR(const std::string &infile, const std::uint32_t seed,
479473
}
480474

481475
#ifdef HAVE_HTSLIB
482-
// Deal with SAM/BAM format only if we have htslib
483476

484477
static inline bool
485478
not_mapped(const bamxx::bam_rec &aln) {
486479
return get_tid(aln) == -1;
487480
}
488481

489482
struct aln_pos {
490-
int32_t tid{};
483+
std::int32_t tid{};
491484
hts_pos_t pos{};
492485
aln_pos() = default;
493-
aln_pos(const int32_t tid, const hts_pos_t pos) : tid{tid}, pos{pos} {}
486+
aln_pos(const std::int32_t tid, const hts_pos_t pos) : tid{tid}, pos{pos} {}
494487
explicit aln_pos(const bamxx::bam_rec &a) :
495488
tid{get_tid(a)}, pos{get_pos(a)} {}
496489
bool
@@ -509,9 +502,9 @@ struct aln_pos {
509502
};
510503

511504
struct aln_pos_pair {
512-
int32_t tid{};
505+
std::int32_t tid{};
513506
hts_pos_t pos{};
514-
int32_t mtid{};
507+
std::int32_t mtid{};
515508
hts_pos_t mpos{};
516509
explicit aln_pos_pair(const bamxx::bam_rec &a) :
517510
tid{get_tid(a)}, pos{get_pos(a)}, mtid{get_mtid(a)}, mpos{get_mpos(a)} {}
@@ -622,7 +615,7 @@ load_counts_BAM_pe(const std::uint32_t n_threads, const std::string &inputfile,
622615
}
623616

624617
struct genomic_interval {
625-
int32_t tid{}; // indicates uninitialized
618+
std::int32_t tid{}; // indicates uninitialized
626619
hts_pos_t start{};
627620
hts_pos_t stop{};
628621
bool
@@ -651,8 +644,8 @@ round_prob(const T x, const std::uint32_t bin_size, const double frac) {
651644
return frac < (x - lo) ? lo : hi;
652645
}
653646

654-
// split a mapped read into multiple genomic intervals based on the
655-
// number of base pairs in each
647+
// split a mapped read into multiple genomic intervals based on the number of
648+
// base pairs in each
656649
static void
657650
split_genomic_interval(const genomic_interval &gi, std::mt19937 &generator,
658651
const hts_pos_t bin_size, std::vector<aln_pos> &output) {
@@ -784,4 +777,4 @@ load_coverage_counts_BAM(const std::uint32_t n_threads,
784777

785778
#endif // HAVE_HTSLIB
786779

787-
// NOLINTEND(*-avoid-magic-numbers,*-narrowing-conversions)
780+
// NOLINTEND(*-narrowing-conversions)

0 commit comments

Comments
 (0)