Skip to content

Commit 6268fa3

Browse files
authored
feat: add '<gene> mutation' catvars from MOA (#651)
* Identify MOA variants that can be represented with [FeatureContextConstraints](https://cat-vrs.ga4gh.org/en/latest/concepts/catvrs_model.html#featurecontextconstraint) and construct said constraints. I think this could account for up to 200 additional statements. * Update DB ingest method to block upload of unsupported catvars on the DB side. Once this PR is approved, a subsequent ticket could handle ingest and retrieval of them. MOA examples: https://moalmanac.org/assertion/559, https://moalmanac.org/assertion/521 See here for an example of CDM output: https://gist.github.com/jsstevenson/5fc111fdff9e7a23efba18bf682341bc
1 parent e4facc6 commit 6268fa3

File tree

9 files changed

+1106
-374
lines changed

9 files changed

+1106
-374
lines changed

server/src/metakb/services/manage_data.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,24 @@ def is_loadable_statement(statement: Statement) -> bool:
9898
proposition.subjectVariant,
9999
)
100100
success = False
101+
else:
102+
if len(proposition.subjectVariant.constraints) != 1:
103+
_logger.info(
104+
"%s could not be loaded because it contains more than 1 constraint: %s",
105+
statement.id,
106+
proposition.subjectVariant.constraints,
107+
)
108+
success = False
109+
if (
110+
proposition.subjectVariant.constraints[0].root.type
111+
!= "DefiningAlleleConstraint"
112+
):
113+
_logger.info(
114+
"%s could not be loaded because it doesn't use a DefiningAlleleConstraint: %s",
115+
statement.id,
116+
proposition.subjectVariant.constraints,
117+
)
118+
success = False
101119
if isinstance(proposition, VariantTherapeuticResponseProposition):
102120
if not _is_loadable_condition(
103121
proposition.conditionQualifier.root, statement.id

server/src/metakb/transformers/moa.py

Lines changed: 58 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55
from pathlib import Path
66
from typing import ClassVar
77

8-
from ga4gh.cat_vrs.models import CategoricalVariant, DefiningAlleleConstraint
8+
from ga4gh.cat_vrs.models import (
9+
CategoricalVariant,
10+
DefiningAlleleConstraint,
11+
FeatureContextConstraint,
12+
)
913
from ga4gh.core import sha512t24u
1014
from ga4gh.core.models import (
1115
Coding,
@@ -39,7 +43,7 @@
3943
_TransformedRecordsCache,
4044
)
4145

42-
logger = logging.getLogger(__name__)
46+
_logger = logging.getLogger(__name__)
4347

4448

4549
class _MoaTransformedCache(_TransformedRecordsCache):
@@ -111,7 +115,7 @@ async def _add_variant_study_stmt(self, assertion: dict) -> None:
111115
# Check cache for variation record (which contains gene information)
112116
variation_gene_map = self._cache.variations.get(variant_id)
113117
if not variation_gene_map:
114-
logger.debug(
118+
_logger.debug(
115119
"%s has no variation for variant_id %s", assertion_id, variant_id
116120
)
117121
return
@@ -136,7 +140,7 @@ async def _add_variant_study_stmt(self, assertion: dict) -> None:
136140
# Add disease
137141
moa_disease = self._add_disease(assertion["disease"])
138142
if not moa_disease:
139-
logger.debug(
143+
_logger.debug(
140144
"%s has no disease for disease %s", assertion_id, assertion["disease"]
141145
)
142146
return
@@ -169,7 +173,7 @@ async def _add_variant_study_stmt(self, assertion: dict) -> None:
169173
prop_params["objectTherapeutic"] = self._get_therapy_or_group(assertion)
170174

171175
if not prop_params["objectTherapeutic"]:
172-
logger.debug(
176+
_logger.debug(
173177
"%s has no therapy for therapy_name %s",
174178
assertion_id,
175179
assertion["therapy"]["name"],
@@ -233,8 +237,48 @@ async def _add_categorical_variants(self, variants: list[dict]) -> None:
233237
constraints = None
234238
extensions = []
235239

236-
if "rearrangement_type" in variant or not protein_change or not gene:
237-
logger.debug(
240+
if (
241+
variant["feature_type"] == "somatic_variant"
242+
and variant["alternate_allele"] is None
243+
and feature == gene
244+
and protein_change is None
245+
# no slam-dunk catvar solution exists for defining specific exons as features --
246+
# see https://github.com/ga4gh/cat-vrs/discussions/161
247+
and variant["exon"] is None
248+
):
249+
gene_norm_resp, normalized_gene_id = (
250+
self.vicc_normalizers.normalize_gene(feature)
251+
)
252+
feature = f"{feature} Mutation"
253+
if not normalized_gene_id:
254+
_logger.debug("Unable to normalize feature term: %s", feature)
255+
extensions.append(self._get_vicc_normalizer_failure_ext())
256+
else:
257+
mappings = []
258+
extensions = []
259+
if normalized_gene_id:
260+
mappings.extend(
261+
self._get_vicc_normalizer_mappings(
262+
normalized_gene_id, gene_norm_resp
263+
)
264+
)
265+
id_ = f"moa.{gene_norm_resp.gene.id}"
266+
else:
267+
id_ = f"moa.gene:{_sanitize_name(feature)}"
268+
extensions.append(self._get_vicc_normalizer_failure_ext())
269+
270+
gene_concept = MappableConcept(
271+
id=id_,
272+
conceptType="Gene",
273+
name=gene,
274+
mappings=mappings or None,
275+
extensions=extensions or None,
276+
)
277+
constraints = [
278+
FeatureContextConstraint(featureContext=gene_concept)
279+
]
280+
elif "rearrangement_type" in variant or not protein_change or not gene:
281+
_logger.debug(
238282
"Variation Normalizer does not support %s: %s",
239283
moa_variant_id,
240284
feature,
@@ -249,7 +293,7 @@ async def _add_categorical_variants(self, variants: list[dict]) -> None:
249293
vrs_variation = await self.vicc_normalizers.normalize_variation(query)
250294

251295
if not vrs_variation:
252-
logger.debug(
296+
_logger.debug(
253297
"Variation Normalizer unable to normalize: moa.variant: %s using query: %s",
254298
variant_id,
255299
query,
@@ -355,12 +399,12 @@ async def _get_variation_members(
355399
genomic_params["name"] = gnomad_vcf
356400
members = [Variation(**genomic_params)]
357401
else:
358-
logger.debug(
402+
_logger.debug(
359403
"Variation Normalizer unable to normalize genomic representation: %s",
360404
gnomad_vcf,
361405
)
362406
else:
363-
logger.debug(
407+
_logger.debug(
364408
"Not enough enough information provided to create genomic representation: %s",
365409
moa_rep_coord,
366410
)
@@ -432,7 +476,7 @@ def _get_therapy_or_group(
432476
therapy = assertion["therapy"]
433477
therapy_name = therapy["name"]
434478
if not therapy_name:
435-
logger.debug("%s has no therapy_name", assertion["id"])
479+
_logger.debug("%s has no therapy_name", assertion["id"])
436480
return None
437481

438482
therapy_type = therapy["type"]
@@ -503,7 +547,7 @@ def _resolve_concept_discrepancy(
503547
:param is_disease: ``True`` if ``cached_obj`` is a disease. ``False`` if
504548
``cached_obj`` is a therapy
505549
"""
506-
logger.debug(
550+
_logger.debug(
507551
"MOA %s and %s resolve to same concept %s",
508552
moa_concept_label,
509553
cached_label,
@@ -585,7 +629,7 @@ def _resolve_therapy_discrepancy(
585629
) = self.vicc_normalizers.normalize_therapy(name)
586630

587631
if not normalized_therapeutic_id:
588-
logger.debug("Therapy Normalizer unable to normalize: %s", therapy)
632+
_logger.debug("Therapy Normalizer unable to normalize: %s", therapy)
589633
extensions.append(self._get_vicc_normalizer_failure_ext())
590634
id_ = therapy_id
591635
else:
@@ -709,7 +753,7 @@ def _get_disease(self, disease: dict) -> MappableConcept:
709753
break
710754

711755
if not normalized_disease_id:
712-
logger.debug("Disease Normalizer unable to normalize: %s", queries)
756+
_logger.debug("Disease Normalizer unable to normalize: %s", queries)
713757
extensions.append(self._get_vicc_normalizer_failure_ext())
714758
id_ = f"moa.disease:{_sanitize_name(disease_name)}"
715759
else:

0 commit comments

Comments
 (0)