Skip to content

Add bespoke classes for accessing SEQ and QUAL #1347

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion pysam/libcalignedsegment.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ from pysam.libcalignmentfile cimport AlignmentFile, AlignmentHeader
ctypedef AlignmentFile AlignmentFile_t


cdef class AlignedSegmentBases:
cdef bam1_t *_delegate


cdef class _AlignedSegment_Cache: # For internal use only
cdef clear_query_sequences(self)
cdef clear_query_qualities(self)
Expand All @@ -55,7 +59,7 @@ cdef class AlignedSegment:
# caching of array properties for quick access
cdef _AlignedSegment_Cache cache

cdef object unused1
cdef AlignedSegmentBases _query_bases
cdef object unused2
cdef object unused3

Expand Down
65 changes: 65 additions & 0 deletions pysam/libcalignedsegment.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ cdef AlignedSegment makeAlignedSegment(bam1_t *src,
dest._delegate = bam_dup1(src)
dest.header = header
dest.cache = _AlignedSegment_Cache()
dest._query_bases = AlignedSegmentBases(dest)
return dest


Expand Down Expand Up @@ -912,6 +913,64 @@ cdef class _AlignedSegment_Cache:
self.query_alignment_qualities_str = NotImplemented


cdef class AlignedSegmentBases:
def __cinit__(self, AlignedSegment aseg):
self._delegate = aseg._delegate

def __getitem__(self, key):
cdef bam1_t *src = self._delegate
cdef int length = src.core.l_qseq
cdef uint8_t *seq = pysam_bam_get_seq(src)

cdef int qpos, start, stop
cdef uint8_t twobases

if isinstance(key, int):
qpos = key
if qpos < 0: qpos += length
if qpos < 0 or qpos >= length:
raise IndexError("query_bases index out of range")

twobases = seq[qpos // 2]
return chr(seq_nt16_str[twobases >> 4 if qpos % 2 == 0 else twobases & 0x0f])

elif isinstance(key, slice):
if length == 0:
return ""

start = key.start if key.start is not None else 0
if start < 0: start += length
if start < 0: start = 0
if start > length: start = length

stop = key.stop if key.stop is not None else length
if stop < 0: stop += length
if stop < 0: stop = 0
if stop > length: stop = length

return getSequenceInRange(src, start, stop).decode('ascii')

else:
raise TypeError("query_bases indices must be integers or slices")

def __len__(self):
return self._delegate.core.l_qseq

def __repr__(self):
cdef bam1_t *src = self._delegate
cdef length = src.core.l_qseq

if src.core.l_qname == 0:
return f"<{type(self).__name__} {self[:48]}{'...' if length > 48 else ''}>"

cdef char *qname = pysam_bam_get_qname(src)
return f"<{type(self).__name__} {self[:48]}{'...' if length > 48 else ''} for {qname.decode('ascii')!r}>"

def __str__(self):
cdef bam1_t *src = self._delegate
return getSequenceInRange(src, 0, src.core.l_qseq).decode('ascii')


cdef class AlignedSegment:
'''Class representing an aligned segment.

Expand Down Expand Up @@ -966,6 +1025,7 @@ cdef class AlignedSegment:
self.cache = _AlignedSegment_Cache()

self.header = header
self._query_bases = AlignedSegmentBases(self)

def __dealloc__(self):
bam_destroy1(self._delegate)
Expand Down Expand Up @@ -1112,6 +1172,7 @@ cdef class AlignedSegment:
dest._delegate = <bam1_t*>calloc(1, sizeof(bam1_t))
dest.header = header
dest.cache = _AlignedSegment_Cache()
dest._query_bases = AlignedSegmentBases(dest)

cdef kstring_t line
line.l = line.m = len(sam)
Expand Down Expand Up @@ -1408,6 +1469,10 @@ cdef class AlignedSegment:
def __set__(self, isize):
self._delegate.core.isize = isize

property query_bases:
def __get__(self):
return self._query_bases

property query_sequence:
"""read sequence bases, including :term:`soft clipped` bases
(None if not present).
Expand Down
Loading