Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion server/src/metakb/repository/neo4j_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from ga4gh.cat_vrs.models import (
CategoricalVariant,
DefiningAlleleConstraint,
FeatureContextConstraint,
)
from ga4gh.core.models import Coding, ConceptMapping, Extension, MappableConcept
from ga4gh.va_spec.base import (
Expand Down Expand Up @@ -245,6 +246,34 @@ def to_gks(self) -> DefiningAlleleConstraint:
)


class FeatureContextConstraintNode(BaseNode):
"""Node model for Cat-VRS FeatureContextConstraint"""

id: str
has_feature_context: GeneNode

@classmethod
def from_gks(cls, constraint: FeatureContextConstraint, constraint_id: str) -> Self:
"""Create new node instance from a Cat-VRS FeatureContextConstraint

:param constraint: original constraint object
:param constraint_id: database identifier. Our working convention is to
incorporate the container categorical variant's ID as part of this, which means
we need to get this arg separately
:return: node instance
"""
return cls(
id=constraint_id,
has_feature_context=GeneNode.from_gks(constraint.featureContext),
)

def to_gks(self) -> FeatureContextConstraint:
"""Create cat-vrs-python feature context constraint class instance"""
return FeatureContextConstraint(
featureContext=self.has_feature_context.to_gks(),
)


class CategoricalVariantNode(BaseNode):
"""Node model for Categorical Variant."""

Expand All @@ -254,7 +283,7 @@ class CategoricalVariantNode(BaseNode):
aliases: list[str] = []
extensions: str
mappings: str
has_constraint: DefiningAlleleConstraintNode
has_constraint: DefiningAlleleConstraintNode | FeatureContextConstraintNode
has_members: list[AlleleNode]

@classmethod
Expand All @@ -271,6 +300,11 @@ def from_gks(cls, catvar: CategoricalVariant) -> Self:
constraint_node = DefiningAlleleConstraintNode.from_gks(
constraint.root, constraint_id
)
elif constraint.root.type == "FeatureContextConstraint":
constraint_id = f"{catvar.id}:{constraint.root.type}:{constraint.root.featureContext.id}"
constraint_node = FeatureContextConstraintNode.from_gks(
constraint.root, constraint_id
)
else:
msg = f"Unrecognized constraint type: {constraint}"
raise ValueError(msg)
Expand Down
42 changes: 26 additions & 16 deletions server/src/metakb/repository/neo4j_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
DocumentNode,
DrugNode,
EvidenceLineNode,
FeatureContextConstraintNode,
GeneNode,
LiteralSequenceExpressionNode,
MethodNode,
Expand Down Expand Up @@ -203,14 +204,14 @@ def add_catvar(self, tx: Transaction, catvar: CategoricalVariant) -> None:
"""
if catvar.constraints and len(catvar.constraints) == 1:
constraint = catvar.constraints[0]

catvar_node = CategoricalVariantNode.from_gks(catvar)
if constraint.root.type == "DefiningAlleleConstraint":
catvar_node = CategoricalVariantNode.from_gks(catvar)
tx.run(
queries_catalog.load_dac_catvar(),
cv=catvar_node.model_dump(mode="json"),
)
# in the future, handle other kinds of catvars here
query = queries_catalog.load_dac_catvar()
elif constraint.root.type == "FeatureContextConstraint":
query = queries_catalog.load_fcc_catvar()
else:
raise TypeError
tx.run(query, cv=catvar_node.model_dump(mode="json"))
else:
msg = f"Valid CatVars should have a single constraint but `constraints` property for {catvar.id} is {catvar.constraints}"
raise ValueError(msg)
Expand Down Expand Up @@ -436,14 +437,22 @@ def _get_statement_node_from_result(
:param record: Neo4j result row
:return: A statement node with all entities/supporting data filled in
"""
defining_allele_node = self._make_allele_node(
record["defining_allele"],
record["defining_allele_sl"],
record["defining_allele_se"],
)
constraint_node = DefiningAlleleConstraintNode(
has_defining_allele=defining_allele_node, **record["constraint"]
)
if record.get("defining_allele"):
defining_allele_node = self._make_allele_node(
record["defining_allele"],
record["defining_allele_sl"],
record["defining_allele_se"],
)
constraint_node = DefiningAlleleConstraintNode(
has_defining_allele=defining_allele_node, **record["constraint"]
)
elif feature_context_vals := record.get("feature_context"):
feature_context_node = GeneNode(**feature_context_vals)
constraint_node = FeatureContextConstraintNode(
has_feature_context=feature_context_node, **record["constraint"]
)
else:
raise ValueError
member_nodes = [
self._make_allele_node(m["allele"], m["location"], m["state"])
for m in record["members"]
Expand Down Expand Up @@ -598,6 +607,7 @@ def search_statements(
* Combo-therapy specific search
* Specific logic for searching diseases/conditionsets
* Search on source values rather than normalized values
* Searching non-allele catvars (e.g. feature context catvars)

:param variation_ids: list of normalized variation IDs
:param gene_ids: list of normalized gene IDs
Expand All @@ -610,7 +620,7 @@ def search_statements(
"""
if limit is None:
limit = CYPHER_PAGE_LIMIT
# IDs args MUST be lists -- can't be null
# IDs args MUST be lists -- can't be null or the Cypher query will error out
result = self.session.execute_read(
lambda tx, **kwargs: list(
tx.run(queries_catalog.search_statements(), **kwargs)
Expand Down
5 changes: 5 additions & 0 deletions server/src/metakb/repository/queries/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ def load_dac_catvar() -> str:
return _load("load_definingalleleconstraint_catvar.cypher")


@cache
def load_fcc_catvar() -> str:
return _load("load_featurecontextconstraint_catvar.cypher")


@cache
def load_document() -> str:
return _load("load_document.cypher")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
MERGE (cv:Variation:CategoricalVariant {id: $cv.id})
ON CREATE SET
cv +=
{
name: $cv.name,
description: $cv.description,
aliases: $cv.aliases,
extensions: $cv.extensions,
mappings: $cv.mappings
}
MERGE (constr:Constraint:FeatureContextConstraint {id: $cv.has_constraint.id})
MERGE (cv)-[:HAS_CONSTRAINT]->(constr)
MERGE (g:Gene {id: $cv.has_constraint.has_feature_context.id})
ON CREATE SET
g +=
{
normalized_id: $cv.has_constraint.has_feature_context.normalized_id,
description: $cv.has_constraint.has_feature_context.description,
name: $cv.has_constraint.has_feature_context.name,
aliases: $cv.has_constraint.has_feature_context.aliases,
mappings: $cv.has_constraint.has_feature_context.mappings,
extensions: $cv.has_constraint.has_feature_context.extensions
}
MERGE (constr)-[:HAS_FEATURE_CONTEXT]->(g)
35 changes: 25 additions & 10 deletions server/src/metakb/repository/queries/search_statements.cypher
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// ------ Process input args -----
// Expect all params to be lists (possibly empty), never null:
// $statement_ids, $variation_ids, $condition_ids, $gene_ids, $therapy_ids
MATCH (s:Statement)
Expand Down Expand Up @@ -34,13 +35,13 @@ WHERE
WHERE d.normalized_id IN $therapy_ids
})

// get basic statement info
// ----- get basic statement info -----
MATCH (s)-[:HAS_STRENGTH]->(str:Strength)
MATCH (s)-[:IS_SPECIFIED_BY]->(method:Method)
MATCH (method)-[:IS_REPORTED_IN]->(method_doc:Document)
OPTIONAL MATCH (s)-[:HAS_CLASSIFICATION]->(classification:Classification)

// Get therapeutic components
// ----- Get therapeutic components -----
OPTIONAL MATCH (s)-[:HAS_THERAPEUTIC]->(tg:TherapyGroup)
OPTIONAL MATCH (tg)-[:HAS_SUBSTITUTE|HAS_COMPONENT]->(tm:Drug)
WITH
Expand Down Expand Up @@ -72,16 +73,28 @@ WITH
WHEN tg IS NULL THEN td
END AS drug

// Get catvar components
MATCH
(cv)-[:HAS_CONSTRAINT]->
// ----- Get catvar components -----
MATCH (cv)-[:HAS_CONSTRAINT]->(constraint)
// Either get constraint for Feature Context...
OPTIONAL MATCH
(constraint:FeatureContextConstraint)-[:HAS_FEATURE_CONTEXT]->
(feature_context:Gene)
// ...or for Defining Allele
OPTIONAL MATCH
(constraint:DefiningAlleleConstraint)-[:HAS_DEFINING_ALLELE]->
(defining_allele:Allele)
MATCH (defining_allele)-[:HAS_LOCATION]->(defining_allele_sl:SequenceLocation)
MATCH
OPTIONAL MATCH
(defining_allele)-[:HAS_LOCATION]->(defining_allele_sl:SequenceLocation)
OPTIONAL MATCH
(defining_allele)-[:HAS_STATE]->(defining_allele_se:SequenceExpression)
OPTIONAL MATCH
(defining_allele)-[:HAS_LOCATION]->(defining_allele_sl:SequenceLocation)
OPTIONAL MATCH
(defining_allele_sl)-[:HAS_SEQUENCE_REFERENCE]->
(defining_allele_sr:SequenceReference)
MATCH (defining_allele)-[:HAS_STATE]->(defining_allele_se:SequenceExpression)
OPTIONAL MATCH
(defining_allele)-[:HAS_STATE]->(defining_allele_se:SequenceExpression)
// Then get members
CALL (cv) {
WITH cv
OPTIONAL MATCH (cv)-[:HAS_MEMBER]->(m:Allele)
Expand All @@ -97,13 +110,13 @@ CALL (cv) {
) AS members
}

// get documents
// ----- get documents -----
CALL (s) {
MATCH (s)-[:IS_REPORTED_IN]->(doc:Document)
RETURN collect(DISTINCT doc) AS documents
}

// get evidence line IDs
// ----- get evidence line IDs -----
CALL (s) {
WITH s
OPTIONAL MATCH (s)-[:HAS_EVIDENCE_LINE]->(line:EvidenceLine)
Expand All @@ -119,6 +132,7 @@ CALL (s) {
RETURN [x IN tmp WHERE x IS NOT NULL] AS evidence_lines
}

// ----- return everything -----
RETURN DISTINCT
s,
str,
Expand All @@ -130,6 +144,7 @@ RETURN DISTINCT
defining_allele,
defining_allele_sl {.*, has_sequence_reference: defining_allele_sr} AS defining_allele_sl,
defining_allele_se,
feature_context,
members,
c,
g,
Expand Down
10 changes: 5 additions & 5 deletions server/src/metakb/services/manage_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,12 @@ def is_loadable_statement(statement: Statement) -> bool:
proposition.subjectVariant.constraints,
)
success = False
if (
proposition.subjectVariant.constraints[0].root.type
!= "DefiningAlleleConstraint"
):
if proposition.subjectVariant.constraints[0].root.type not in {
"DefiningAlleleConstraint",
"FeatureContextConstraint",
}:
_logger.info(
"%s could not be loaded because it doesn't use a DefiningAlleleConstraint: %s",
"%s could not be loaded because it doesn't use a supported constraint type: %s",
statement.id,
proposition.subjectVariant.constraints,
)
Expand Down
Loading