Skip to content

Commit fb6789b

Browse files
authored
feat: Add tx_status field in TranscriptSegment output (#437)
closes #436
1 parent 31b379b commit fb6789b

File tree

4 files changed

+61
-2
lines changed

4 files changed

+61
-2
lines changed

src/cool_seq_tool/mappers/exon_genomic_coords.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
CoordinateType,
1515
ServiceMeta,
1616
Strand,
17+
TranscriptPriority,
1718
)
1819
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
1920
from cool_seq_tool.sources.uta_database import GenomicAlnData, UtaDatabase
@@ -113,6 +114,9 @@ class GenomicTxSeg(BaseModelForbidExtra):
113114
)
114115
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
115116
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
117+
tx_status: TranscriptPriority | None = Field(
118+
None, description="Transcript priority for RefSeq transcript accession"
119+
)
116120
strand: Strand | None = Field(
117121
None, description="The strand that the transcript accession exists on."
118122
)
@@ -144,6 +148,7 @@ def check_errors(cls, values: dict) -> dict: # noqa: N805
144148
"gene": "TPM3",
145149
"genomic_ac": "NC_000001.11",
146150
"tx_ac": "NM_152263.3",
151+
"tx_status": "longest_compatible_remaining",
147152
"strand": -1,
148153
"seg": {
149154
"exon_ord": 0,
@@ -172,6 +177,9 @@ class GenomicTxSegService(BaseModelForbidExtra):
172177
)
173178
genomic_ac: StrictStr | None = Field(None, description="RefSeq genomic accession.")
174179
tx_ac: StrictStr | None = Field(None, description="RefSeq transcript accession.")
180+
tx_status: TranscriptPriority | None = Field(
181+
None, description="Transcript priority for RefSeq transcript accession"
182+
)
175183
strand: Strand | None = Field(
176184
None, description="The strand that the transcript exists on."
177185
)
@@ -211,6 +219,7 @@ def add_meta_check_errors(cls, values: dict) -> dict: # noqa: N805
211219
"gene": "TPM3",
212220
"genomic_ac": "NC_000001.11",
213221
"tx_ac": "NM_152263.3",
222+
"tx_status": "longest_compatible_remaining",
214223
"strand": -1,
215224
"seg_start": {
216225
"exon_ord": 0,
@@ -431,6 +440,7 @@ async def tx_segment_to_genomic(
431440
gene=gene,
432441
genomic_ac=genomic_ac,
433442
tx_ac=transcript,
443+
tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
434444
strand=strand,
435445
seg_start=seg_start,
436446
seg_end=seg_end,
@@ -522,6 +532,7 @@ async def genomic_to_tx_segment(
522532
params["gene"] = start_tx_seg_data.gene
523533
params["genomic_ac"] = start_tx_seg_data.genomic_ac
524534
params["tx_ac"] = start_tx_seg_data.tx_ac
535+
params["tx_status"] = start_tx_seg_data.tx_status
525536
params["strand"] = start_tx_seg_data.strand
526537
params["seg_start"] = start_tx_seg_data.seg
527538
else:
@@ -557,6 +568,7 @@ async def genomic_to_tx_segment(
557568
params["gene"] = end_tx_seg_data.gene
558569
params["genomic_ac"] = end_tx_seg_data.genomic_ac
559570
params["tx_ac"] = end_tx_seg_data.tx_ac
571+
params["tx_status"] = end_tx_seg_data.tx_status
560572
params["strand"] = end_tx_seg_data.strand
561573

562574
params["seg_end"] = end_tx_seg_data.seg
@@ -962,6 +974,7 @@ async def _genomic_to_tx_segment(
962974
gene=gene,
963975
genomic_ac=genomic_ac,
964976
tx_ac=transcript,
977+
tx_status=self.mane_transcript_mappings.get_transcript_status(transcript),
965978
strand=strand,
966979
seg=TxSegment(
967980
exon_ord=exon_num,

src/cool_seq_tool/sources/mane_transcript_mappings.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import polars as pl
99

1010
from cool_seq_tool.resources.data_files import DataFile, get_data_file
11-
from cool_seq_tool.schemas import ManeGeneData
11+
from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority
1212

1313
_logger = logging.getLogger(__name__)
1414

@@ -85,6 +85,22 @@ def get_mane_from_transcripts(self, transcripts: list[str]) -> list[dict]:
8585
return []
8686
return mane_rows.to_dicts()
8787

88+
def get_transcript_status(self, tx_ac: str) -> TranscriptPriority:
89+
"""Get MANE status for a transcript
90+
91+
:param tx_ac: A RefSeq transcript accession
92+
:return: A TranscriptPriority object
93+
"""
94+
mane_info = self.get_mane_from_transcripts([tx_ac])
95+
if not mane_info:
96+
return TranscriptPriority.LONGEST_COMPATIBLE_REMAINING
97+
mane_info = mane_info[0]["MANE_status"]
98+
return (
99+
TranscriptPriority.MANE_SELECT
100+
if mane_info == "MANE Select"
101+
else TranscriptPriority.MANE_PLUS_CLINICAL
102+
)
103+
88104
def get_mane_data_from_chr_pos(
89105
self, alt_ac: str, start: int, end: int
90106
) -> list[dict]:

tests/mappers/test_exon_genomic_coords.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ def tpm3_exon1():
172172
"gene": "TPM3",
173173
"genomic_ac": "NC_000001.11",
174174
"tx_ac": "NM_152263.3",
175+
"tx_status": "longest_compatible_remaining",
175176
"strand": -1,
176177
"seg": {
177178
"exon_ord": 0,
@@ -197,6 +198,7 @@ def tpm3_exon8():
197198
"gene": "TPM3",
198199
"genomic_ac": "NC_000001.11",
199200
"tx_ac": "NM_152263.3",
201+
"tx_status": "longest_compatible_remaining",
200202
"strand": -1,
201203
"seg": {
202204
"exon_ord": 7,
@@ -222,6 +224,7 @@ def tpm3_exon1_g(tpm3_exon1):
222224
"gene": tpm3_exon1.gene,
223225
"genomic_ac": tpm3_exon1.genomic_ac,
224226
"tx_ac": tpm3_exon1.tx_ac,
227+
"tx_status": tpm3_exon1.tx_status,
225228
"strand": tpm3_exon1.strand,
226229
"seg_start": tpm3_exon1.seg,
227230
}
@@ -235,6 +238,7 @@ def tpm3_exon8_g(tpm3_exon8):
235238
"gene": tpm3_exon8.gene,
236239
"genomic_ac": tpm3_exon8.genomic_ac,
237240
"tx_ac": tpm3_exon8.tx_ac,
241+
"tx_status": tpm3_exon8.tx_status,
238242
"strand": tpm3_exon8.strand,
239243
"seg_end": tpm3_exon8.seg,
240244
}
@@ -248,6 +252,7 @@ def tpm3_exon1_exon8(tpm3_exon1, tpm3_exon8):
248252
"gene": tpm3_exon8.gene,
249253
"genomic_ac": tpm3_exon8.genomic_ac,
250254
"tx_ac": tpm3_exon8.tx_ac,
255+
"tx_status": tpm3_exon8.tx_status,
251256
"strand": tpm3_exon8.strand,
252257
"seg_start": tpm3_exon1.seg,
253258
"seg_end": tpm3_exon8.seg,
@@ -269,6 +274,7 @@ def tpm3_exon1_exon8_offset(tpm3_exon1, tpm3_exon8):
269274
"gene": "TPM3",
270275
"genomic_ac": "NC_000001.11",
271276
"tx_ac": "NM_152263.3",
277+
"tx_status": "longest_compatible_remaining",
272278
"strand": -1,
273279
"seg_start": tpm3_exon1_cpy.seg,
274280
"seg_end": tpm3_exon8_cpy.seg,
@@ -283,6 +289,7 @@ def mane_braf():
283289
"gene": "BRAF",
284290
"genomic_ac": "NC_000007.14",
285291
"tx_ac": "NM_004333.6",
292+
"tx_status": "mane_select",
286293
"strand": -1,
287294
"seg_start": {
288295
"exon_ord": 5,
@@ -321,6 +328,7 @@ def wee1_exon2_exon11():
321328
"gene": "WEE1",
322329
"genomic_ac": "NC_000011.10",
323330
"tx_ac": "NM_003390.3",
331+
"tx_status": "longest_compatible_remaining",
324332
"strand": 1,
325333
"seg_start": {
326334
"exon_ord": 1,
@@ -359,6 +367,7 @@ def mane_wee1_exon2_exon11():
359367
"gene": "WEE1",
360368
"genomic_ac": "NC_000011.10",
361369
"tx_ac": "NM_003390.4",
370+
"tx_status": "mane_select",
362371
"strand": 1,
363372
"seg_start": {
364373
"exon_ord": 1,
@@ -397,6 +406,7 @@ def ntrk1_exon10_exon17():
397406
"gene": "NTRK1",
398407
"genomic_ac": "NC_000001.11",
399408
"tx_ac": "NM_002529.3",
409+
"tx_status": "longest_compatible_remaining",
400410
"strand": 1,
401411
"seg_start": {
402412
"exon_ord": 9,
@@ -435,6 +445,7 @@ def zbtb10_exon3_end():
435445
"gene": "ZBTB10",
436446
"genomic_ac": "NC_000008.11",
437447
"tx_ac": "NM_001105539.3",
448+
"tx_status": "mane_select",
438449
"strand": 1,
439450
"seg_start": None,
440451
"seg_end": {
@@ -461,6 +472,7 @@ def zbtb10_exon5_start():
461472
"gene": "ZBTB10",
462473
"genomic_ac": "NC_000008.11",
463474
"tx_ac": "NM_001105539.3",
475+
"tx_status": "mane_select",
464476
"strand": 1,
465477
"seg_start": {
466478
"exon_ord": 4,
@@ -487,6 +499,7 @@ def tpm3_exon6_end():
487499
"gene": "TPM3",
488500
"genomic_ac": "NC_000001.11",
489501
"tx_ac": "NM_152263.4",
502+
"tx_status": "mane_select",
490503
"strand": -1,
491504
"seg_start": None,
492505
"seg_end": {
@@ -513,6 +526,7 @@ def tpm3_exon5_start():
513526
"gene": "TPM3",
514527
"genomic_ac": "NC_000001.11",
515528
"tx_ac": "NM_152263.4",
529+
"tx_status": "mane_select",
516530
"strand": -1,
517531
"seg_start": {
518532
"exon_ord": 4,
@@ -539,6 +553,7 @@ def gusbp3_exon2_end():
539553
"gene": "GUSBP3",
540554
"genomic_ac": "NC_000005.10",
541555
"tx_ac": "NR_027386.2",
556+
"tx_status": "longest_compatible_remaining",
542557
"strand": -1,
543558
"seg_start": None,
544559
"seg_end": {
@@ -565,6 +580,7 @@ def eln_grch38_intronic():
565580
"gene": "ELN",
566581
"genomic_ac": "NC_000007.14",
567582
"tx_ac": "NM_000501.4",
583+
"tx_status": "mane_select",
568584
"strand": 1,
569585
"seg_start": {
570586
"exon_ord": 0,
@@ -603,6 +619,7 @@ def gusbp3_exon5_start():
603619
"gene": "GUSBP3",
604620
"genomic_ac": "NC_000005.10",
605621
"tx_ac": "NR_027386.2",
622+
"tx_status": "longest_compatible_remaining",
606623
"strand": -1,
607624
"seg_start": {
608625
"exon_ord": 4,
@@ -645,6 +662,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
645662
assert actual.gene == expected.gene
646663
assert actual.genomic_ac == expected.genomic_ac
647664
assert actual.tx_ac == expected.tx_ac
665+
assert actual.tx_status == expected.tx_status
648666
assert actual.strand == expected.strand
649667

650668
for seg_attr in ["seg_start", "seg_end"]:
@@ -676,6 +694,7 @@ def genomic_tx_seg_service_checks(actual, expected=None, is_valid=True):
676694
assert actual.gene is None
677695
assert actual.genomic_ac is None
678696
assert actual.tx_ac is None
697+
assert actual.tx_status is None
679698
assert actual.strand is None
680699
assert actual.seg_start is None
681700
assert actual.seg_end is None
@@ -720,6 +739,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
720739
assert actual.gene == expected.gene
721740
assert actual.genomic_ac == expected.genomic_ac
722741
assert actual.tx_ac == expected.tx_ac
742+
assert actual.tx_status == expected.tx_status
723743
assert actual.strand == expected.strand
724744

725745
expected_seg = expected.seg
@@ -747,6 +767,7 @@ def genomic_tx_seg_checks(actual, expected=None, is_valid=True):
747767
assert actual.gene is None
748768
assert actual.genomic_ac is None
749769
assert actual.tx_ac is None
770+
assert actual.tx_status is None
750771
assert actual.strand is None
751772
assert actual.seg is None
752773
assert len(actual.errors) > 0

tests/sources/test_mane_transcript_mappings.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import polars as pl
66
import pytest
77

8-
from cool_seq_tool.schemas import ManeGeneData
8+
from cool_seq_tool.schemas import ManeGeneData, TranscriptPriority
99

1010

1111
@pytest.fixture(scope="module")
@@ -168,6 +168,15 @@ def test_get_mane_from_transcripts(
168168
assert resp == []
169169

170170

171+
def test_get_transcript_status(test_mane_transcript_mappings):
172+
"""Test that get_transcript_status works correctly"""
173+
actual = test_mane_transcript_mappings.get_transcript_status("NM_152263.4")
174+
assert actual == TranscriptPriority.MANE_SELECT
175+
176+
actual = test_mane_transcript_mappings.get_transcript_status("NM_152263.3")
177+
assert actual == TranscriptPriority.LONGEST_COMPATIBLE_REMAINING
178+
179+
171180
def test_get_mane_data_from_chr_pos(
172181
test_mane_transcript_mappings, braf_select, braf_plus_clinical
173182
):

0 commit comments

Comments
 (0)