55from pathlib import Path
66from typing import ClassVar
77
8- from ga4gh .cat_vrs .models import CategoricalVariant , DefiningAlleleConstraint
8+ from ga4gh .cat_vrs .models import (
9+ CategoricalVariant ,
10+ DefiningAlleleConstraint ,
11+ FeatureContextConstraint ,
12+ )
913from ga4gh .core import sha512t24u
1014from ga4gh .core .models import (
1115 Coding ,
3943 _TransformedRecordsCache ,
4044)
4145
42- logger = logging .getLogger (__name__ )
46+ _logger = logging .getLogger (__name__ )
4347
4448
4549class _MoaTransformedCache (_TransformedRecordsCache ):
@@ -111,7 +115,7 @@ async def _add_variant_study_stmt(self, assertion: dict) -> None:
111115 # Check cache for variation record (which contains gene information)
112116 variation_gene_map = self ._cache .variations .get (variant_id )
113117 if not variation_gene_map :
114- logger .debug (
118+ _logger .debug (
115119 "%s has no variation for variant_id %s" , assertion_id , variant_id
116120 )
117121 return
@@ -136,7 +140,7 @@ async def _add_variant_study_stmt(self, assertion: dict) -> None:
136140 # Add disease
137141 moa_disease = self ._add_disease (assertion ["disease" ])
138142 if not moa_disease :
139- logger .debug (
143+ _logger .debug (
140144 "%s has no disease for disease %s" , assertion_id , assertion ["disease" ]
141145 )
142146 return
@@ -169,7 +173,7 @@ async def _add_variant_study_stmt(self, assertion: dict) -> None:
169173 prop_params ["objectTherapeutic" ] = self ._get_therapy_or_group (assertion )
170174
171175 if not prop_params ["objectTherapeutic" ]:
172- logger .debug (
176+ _logger .debug (
173177 "%s has no therapy for therapy_name %s" ,
174178 assertion_id ,
175179 assertion ["therapy" ]["name" ],
@@ -233,8 +237,48 @@ async def _add_categorical_variants(self, variants: list[dict]) -> None:
233237 constraints = None
234238 extensions = []
235239
236- if "rearrangement_type" in variant or not protein_change or not gene :
237- logger .debug (
240+ if (
241+ variant ["feature_type" ] == "somatic_variant"
242+ and variant ["alternate_allele" ] is None
243+ and feature == gene
244+ and protein_change is None
245+ # no slam-dunk catvar solution exists for defining specific exons as features --
246+ # see https://github.com/ga4gh/cat-vrs/discussions/161
247+ and variant ["exon" ] is None
248+ ):
249+ gene_norm_resp , normalized_gene_id = (
250+ self .vicc_normalizers .normalize_gene (feature )
251+ )
252+ feature = f"{ feature } Mutation"
253+ if not normalized_gene_id :
254+ _logger .debug ("Unable to normalize feature term: %s" , feature )
255+ extensions .append (self ._get_vicc_normalizer_failure_ext ())
256+ else :
257+ mappings = []
258+ extensions = []
259+ if normalized_gene_id :
260+ mappings .extend (
261+ self ._get_vicc_normalizer_mappings (
262+ normalized_gene_id , gene_norm_resp
263+ )
264+ )
265+ id_ = f"moa.{ gene_norm_resp .gene .id } "
266+ else :
267+ id_ = f"moa.gene:{ _sanitize_name (feature )} "
268+ extensions .append (self ._get_vicc_normalizer_failure_ext ())
269+
270+ gene_concept = MappableConcept (
271+ id = id_ ,
272+ conceptType = "Gene" ,
273+ name = gene ,
274+ mappings = mappings or None ,
275+ extensions = extensions or None ,
276+ )
277+ constraints = [
278+ FeatureContextConstraint (featureContext = gene_concept )
279+ ]
280+ elif "rearrangement_type" in variant or not protein_change or not gene :
281+ _logger .debug (
238282 "Variation Normalizer does not support %s: %s" ,
239283 moa_variant_id ,
240284 feature ,
@@ -249,7 +293,7 @@ async def _add_categorical_variants(self, variants: list[dict]) -> None:
249293 vrs_variation = await self .vicc_normalizers .normalize_variation (query )
250294
251295 if not vrs_variation :
252- logger .debug (
296+ _logger .debug (
253297 "Variation Normalizer unable to normalize: moa.variant: %s using query: %s" ,
254298 variant_id ,
255299 query ,
@@ -355,12 +399,12 @@ async def _get_variation_members(
355399 genomic_params ["name" ] = gnomad_vcf
356400 members = [Variation (** genomic_params )]
357401 else :
358- logger .debug (
402+ _logger .debug (
359403 "Variation Normalizer unable to normalize genomic representation: %s" ,
360404 gnomad_vcf ,
361405 )
362406 else :
363- logger .debug (
407+ _logger .debug (
364408 "Not enough enough information provided to create genomic representation: %s" ,
365409 moa_rep_coord ,
366410 )
@@ -432,7 +476,7 @@ def _get_therapy_or_group(
432476 therapy = assertion ["therapy" ]
433477 therapy_name = therapy ["name" ]
434478 if not therapy_name :
435- logger .debug ("%s has no therapy_name" , assertion ["id" ])
479+ _logger .debug ("%s has no therapy_name" , assertion ["id" ])
436480 return None
437481
438482 therapy_type = therapy ["type" ]
@@ -503,7 +547,7 @@ def _resolve_concept_discrepancy(
503547 :param is_disease: ``True`` if ``cached_obj`` is a disease. ``False`` if
504548 ``cached_obj`` is a therapy
505549 """
506- logger .debug (
550+ _logger .debug (
507551 "MOA %s and %s resolve to same concept %s" ,
508552 moa_concept_label ,
509553 cached_label ,
@@ -585,7 +629,7 @@ def _resolve_therapy_discrepancy(
585629 ) = self .vicc_normalizers .normalize_therapy (name )
586630
587631 if not normalized_therapeutic_id :
588- logger .debug ("Therapy Normalizer unable to normalize: %s" , therapy )
632+ _logger .debug ("Therapy Normalizer unable to normalize: %s" , therapy )
589633 extensions .append (self ._get_vicc_normalizer_failure_ext ())
590634 id_ = therapy_id
591635 else :
@@ -709,7 +753,7 @@ def _get_disease(self, disease: dict) -> MappableConcept:
709753 break
710754
711755 if not normalized_disease_id :
712- logger .debug ("Disease Normalizer unable to normalize: %s" , queries )
756+ _logger .debug ("Disease Normalizer unable to normalize: %s" , queries )
713757 extensions .append (self ._get_vicc_normalizer_failure_ext ())
714758 id_ = f"moa.disease:{ _sanitize_name (disease_name )} "
715759 else :
0 commit comments