Skip to content

Commit 6c8cbad

Browse files
authored
feat!: Use longest compatible remaining transcript (#439)
closes #438
1 parent 49ad51b commit 6c8cbad

File tree

3 files changed

+26
-5
lines changed

3 files changed

+26
-5
lines changed

src/cool_seq_tool/app.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ def __init__(
107107
self.ex_g_coords_mapper = ExonGenomicCoordsMapper(
108108
self.seqrepo_access,
109109
self.uta_db,
110+
self.mane_transcript,
110111
self.mane_transcript_mappings,
111112
self.liftover,
112113
)

src/cool_seq_tool/mappers/exon_genomic_coords.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
1010
from cool_seq_tool.mappers.liftover import LiftOver
11+
from cool_seq_tool.mappers.mane_transcript import ManeTranscript
1112
from cool_seq_tool.schemas import (
13+
AnnotationLayer,
1214
Assembly,
1315
BaseModelForbidExtra,
1416
CoordinateType,
@@ -273,6 +275,7 @@ def __init__(
273275
self,
274276
seqrepo_access: SeqRepoAccess,
275277
uta_db: UtaDatabase,
278+
mane_transcript: ManeTranscript,
276279
mane_transcript_mappings: ManeTranscriptMappings,
277280
liftover: LiftOver,
278281
) -> None:
@@ -297,11 +300,13 @@ def __init__(
297300
298301
:param seqrepo_access: SeqRepo instance to give access to query SeqRepo database
299302
:param uta_db: UtaDatabase instance to give access to query UTA database
303+
:param mane_transcript: ManeTranscript instance to give access to ManeTranscript class
300304
:param mane_transcript_mappings: Instance to provide access to ManeTranscriptMappings class
301305
:param liftover: Instance to provide mapping between human genome assemblies
302306
"""
303307
self.seqrepo_access = seqrepo_access
304308
self.uta_db = uta_db
309+
self.mane_transcript = mane_transcript
305310
self.mane_transcript_mappings = mane_transcript_mappings
306311
self.liftover = liftover
307312

@@ -870,14 +875,18 @@ async def _genomic_to_tx_segment(
870875
if mane_transcripts:
871876
transcript = mane_transcripts[0]["RefSeq_nuc"]
872877
else:
873-
# Attempt to find a coding transcript if a MANE transcript
878+
# Attempt to find longest compatible transcript if a MANE transcript
874879
# cannot be found
875-
results = await self.uta_db.get_transcripts(
876-
gene=gene, alt_ac=genomic_ac
880+
results = await self.mane_transcript.get_longest_compatible_transcript(
881+
start_pos=genomic_pos,
882+
end_pos=genomic_pos,
883+
gene=gene,
884+
alt_ac=genomic_ac,
885+
start_annotation_layer=AnnotationLayer.GENOMIC,
877886
)
878887

879-
if not results.is_empty():
880-
transcript = results[0]["tx_ac"][0]
888+
if results:
889+
transcript = results.refseq
881890
else:
882891
# Run if gene is for a noncoding transcript
883892
query = f"""

tests/mappers/test_exon_genomic_coords.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,17 @@ async def test_genomic_to_transcript_fusion_context(
10711071
resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
10721072
genomic_tx_seg_service_checks(resp, gusbp3_exon5_start)
10731073

1074+
# Test case where gene does not have a MANE transcript. We are looking
1075+
# to check that the same transcript accession is returned across runs
1076+
inputs = {
1077+
"genomic_ac": "NC_000001.11",
1078+
"seg_end_genomic": 156421555,
1079+
"gene": "MIR9-1HG",
1080+
}
1081+
resp = await test_egc_mapper.genomic_to_tx_segment(**inputs)
1082+
assert resp.tx_ac == "NM_001320454.2"
1083+
assert resp.tx_status == "longest_compatible_remaining"
1084+
10741085

10751086
@pytest.mark.asyncio
10761087
async def test_get_alt_ac_start_and_end(

0 commit comments

Comments
 (0)