Skip to content

Commit 068337d

Browse files
authored
feat: expose cool-seq-tool feature overlap endpoint (#618)
close #521 Initial work done in #523 Going to do error handling overhaul in #617
1 parent 7f30a0c commit 068337d

File tree

4 files changed

+125
-4
lines changed

4 files changed

+125
-4
lines changed

Pipfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ ruff = "==0.5.0"
1919
fastapi = "*"
2020
uvicorn = "*"
2121
pydantic = "==2.*"
22-
"ga4gh.vrs" = {version = "==2.*", extras = ["extras"]}
22+
"ga4gh.vrs" = {version = ">=2.1.3,<3.0", extras = ["extras"]}
2323
gene-normalizer = ">=0.9.0"
2424
boto3 = "*"
25-
cool-seq-tool = "~=0.13.1"
25+
cool-seq-tool = "~=0.14.2"
2626
bioutils = "*"

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ dependencies = [
3131
"ga4gh.vrs[extras] >=2.1.3,<3.0",
3232
"gene-normalizer >=0.9.0",
3333
"boto3",
34-
"cool-seq-tool ~=0.14.1",
34+
"cool-seq-tool ~=0.14.2",
3535
"bioutils"
3636
]
3737
dynamic = ["version"]

src/variation/main.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import pkg_resources
1313
from bioutils.exceptions import BioutilsError
14+
from cool_seq_tool.mappers.feature_overlap import FeatureOverlap, FeatureOverlapError
1415
from cool_seq_tool.schemas import Assembly, CoordinateType
1516
from fastapi import FastAPI, Query
1617
from ga4gh.vrs import models
@@ -39,6 +40,7 @@
3940
TranslateIdentifierService,
4041
)
4142
from variation.schemas.service_schema import (
43+
FeatureOverlapService,
4244
ToCdnaService,
4345
ToGenomicService,
4446
)
@@ -64,9 +66,11 @@ class Tag(Enum):
6466
VRS_PYTHON = "VRS-Python"
6567
TO_COPY_NUMBER_VARIATION = "To Copy Number Variation"
6668
ALIGNMENT_MAPPER = "Alignment Mapper"
69+
FEATURE_OVERLAP = "Feature Overlap"
6770

6871

6972
query_handler = QueryHandler()
73+
feature_overlap = FeatureOverlap(query_handler.seqrepo_access)
7074

7175

7276
@asynccontextmanager
@@ -828,3 +832,72 @@ async def p_to_g(
828832
response_datetime=datetime.datetime.now(tz=datetime.UTC),
829833
),
830834
)
835+
836+
837+
@app.get(
838+
"/variation/feature_overlap",
839+
summary="Given GRCh38 genomic data, find the overlapping MANE features (gene and cds)",
840+
response_model_exclude_none=True,
841+
response_description="A response to a validly-formed query.",
842+
description="The genomic data is specified as a sequence location by `chromosome`, `start`, `end`. All CDS regions with which the input sequence location has nonzero base pair overlap will be returned.",
843+
tags=[Tag.FEATURE_OVERLAP],
844+
)
845+
def get_feature_overlap(
846+
start: Annotated[int, Query(description="GRCh38 start position")] = ...,
847+
end: Annotated[int, Query(description="GRCh38 end position")] = ...,
848+
chromosome: Annotated[
849+
str | None,
850+
Query(
851+
description="Chromosome. 1..22, X, or Y. If not provided, must provide `identifier`. If both `chromosome` and `identifier` are provided, `chromosome` will be used."
852+
),
853+
] = None,
854+
identifier: Annotated[
855+
str | None,
856+
Query(
857+
description="Genomic identifier on GRCh38 assembly. If not provided, must provide `chromosome`. If both `chromosome` and `identifier` are provided, `chromosome` will be used."
858+
),
859+
] = None,
860+
coordinate_type: Annotated[
861+
CoordinateType, Query(description="Coordinate type for `start` and `end`")
862+
] = CoordinateType.RESIDUE,
863+
) -> FeatureOverlapService:
864+
"""Given GRCh38 genomic data, find the overlapping MANE features (gene and cds)
865+
The genomic data is specified as a sequence location by `chromosome`, `start`,
866+
`end`. All CDS regions with which the input sequence location has nonzero base
867+
pair overlap will be returned.
868+
869+
:param start: GRCh38 start position
870+
:param end: GRCh38 end position
871+
:param chromosome: Chromosome. 1..22, X, or Y. If not provided, must provide
872+
`identifier`. If both `chromosome` and `identifier` are provided,
873+
`chromosome` will be used.
874+
:param identifier: Genomic identifier on GRCh38 assembly. If not provided, must
875+
provide `chromosome`. If both `chromosome` and `identifier` are provided,
876+
`chromosome` will be used.
877+
:param coordinate_type: Residue mode for `start` and `end`
878+
:return: MANE feature (gene/cds) overlap data represented as a dict. The
879+
dictionary will be keyed by genes which overlap the input sequence location.
880+
Each gene contains a list of the overlapping CDS regions with the beginning
881+
and end of the input sequence location's overlap with each
882+
"""
883+
try:
884+
overlap_data = feature_overlap.get_grch38_mane_gene_cds_overlap(
885+
start=start,
886+
end=end,
887+
chromosome=chromosome,
888+
identifier=identifier,
889+
coordinate_type=coordinate_type,
890+
)
891+
errors = []
892+
except FeatureOverlapError as e:
893+
errors = [str(e)]
894+
overlap_data = None
895+
896+
return FeatureOverlapService(
897+
feature_overlap=overlap_data,
898+
warnings=errors,
899+
service_meta=ServiceMeta(
900+
version=__version__,
901+
response_datetime=datetime.datetime.now(tz=datetime.UTC),
902+
),
903+
)

src/variation/schemas/service_schema.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from enum import Enum
44
from typing import Literal
55

6-
from cool_seq_tool.schemas import CoordinateType
6+
from cool_seq_tool.schemas import CdsOverlap, CoordinateType
77
from pydantic import BaseModel, ConfigDict, StrictInt, StrictStr
88

99
from variation import __version__
@@ -118,3 +118,51 @@ class ToGenomicService(BaseModel, extra="forbid"):
118118
}
119119
}
120120
)
121+
122+
123+
class FeatureOverlapService(BaseModel, extra="forbid"):
124+
"""Service model response for feature overlap"""
125+
126+
feature_overlap: dict[str, list[CdsOverlap]] | None = None
127+
warnings: list[StrictStr] = []
128+
service_meta: ServiceMeta
129+
130+
model_config = ConfigDict(
131+
json_schema_extra={
132+
"example": {
133+
"feature_overlap": {
134+
"BRAF": [
135+
{
136+
"cds": {
137+
"id": "ga4gh:SL.fYRYzNIAoe6UQF9MT1XaYsFscoU68ZJv",
138+
"type": "SequenceLocation",
139+
"sequenceReference": {
140+
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
141+
"type": "SequenceReference",
142+
},
143+
"start": 140726493,
144+
"end": 140726516,
145+
},
146+
"overlap": {
147+
"id": "ga4gh:SL.fYRYzNIAoe6UQF9MT1XaYsFscoU68ZJv",
148+
"type": "SequenceLocation",
149+
"sequenceReference": {
150+
"refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
151+
"type": "SequenceReference",
152+
},
153+
"start": 140726493,
154+
"end": 140726516,
155+
},
156+
}
157+
],
158+
},
159+
"warnings": [],
160+
"service_meta_": {
161+
"name": "variation-normalizer",
162+
"version": __version__,
163+
"response_datetime": "2021-04-05T16:44:15.367831",
164+
"url": "https://github.com/cancervariants/variation-normalization",
165+
},
166+
}
167+
}
168+
)

0 commit comments

Comments
 (0)