diff --git a/src/retriever/data_tiers/tier_0/dgraph/transpiler.py b/src/retriever/data_tiers/tier_0/dgraph/transpiler.py index 1078e5fb..159780e1 100644 --- a/src/retriever/data_tiers/tier_0/dgraph/transpiler.py +++ b/src/retriever/data_tiers/tier_0/dgraph/transpiler.py @@ -86,7 +86,10 @@ class DgraphTranspiler(Tier0Transpiler): 25 # Default assumption for average edges per node ) PINNEDNESS_RECURSION_DEPTH: int = ( - 10 # Max recursion depth for pinnedness calculation + 2 # Max recursion depth for pinnedness calculation + ) + PINNEDNESS_ADJ_WEIGHT: float = ( + 0.1 # dampen adjacency contribution relative to the base ID selectivity ) FilterScalar: TypeAlias = str | int | float | bool # noqa: UP040 @@ -292,40 +295,54 @@ def _compute_log_expected_n( last: str | None = None, level: int = 0, ) -> float: - """Compute the log of the expected number of unique knodes bound to the specified qnode.""" + """Compute the log of the expected number of unique knodes bound to the specified qnode. + + The base term is log(ids_count). Neighbor contributions are heavily dampened by + PINNEDNESS_ADJ_WEIGHT and limited by PINNEDNESS_RECURSION_DEPTH to ensure + 'fewer IDs always preferred' dominates adjacency effects. + """ log_expected_n = math.log(num_ids[qnode_id]) + if level < self.PINNEDNESS_RECURSION_DEPTH: for neighbor, num_edges in adjacency_mat[qnode_id].items(): if neighbor == last: continue - log_expected_n += num_edges * min( - max( - self._compute_log_expected_n( - adjacency_mat, - num_ids, - neighbor, - qnode_id, - level + 1, - ), - 0, - ) - + math.log( - self.PINNEDNESS_DEFAULT_EDGES_PER_NODE - / self.PINNEDNESS_DEFAULT_TOTAL_NODES + + # Neighbor expectation (non-negative) + neighbor_log = max( + self._compute_log_expected_n( + adjacency_mat, num_ids, neighbor, qnode_id, level + 1 ), 0, ) + + # Baseline per-edge connectivity factor + baseline = math.log( + self.PINNEDNESS_DEFAULT_EDGES_PER_NODE + / self.PINNEDNESS_DEFAULT_TOTAL_NODES + ) + + # Dampen adjacency term so ID selectivity dominates + contribution = ( + self.PINNEDNESS_ADJ_WEIGHT + * num_edges + * max(neighbor_log + baseline, 0) + ) + + log_expected_n += contribution + return log_expected_n def _get_pinnedness(self, qgraph: QueryGraphDict, qnode_id: str) -> float: - """Get pinnedness of a single node.""" + """Get pinnedness of a single node. + + Higher pinnedness is better. With dampened adjacency, fewer IDs (more selective) + produce higher pinnedness (since -log(ids) is closer to 0 than a larger -log). + """ adjacency_mat = self._get_adjacency_matrix(qgraph) num_ids = self._get_num_ids(qgraph) - return -self._compute_log_expected_n( - adjacency_mat, - num_ids, - qnode_id, - ) + # Pinnedness is negative expected log-N so smaller expected set -> larger pinnedness + return -self._compute_log_expected_n(adjacency_mat, num_ids, qnode_id) # --- Nodes and Edges Methods --- diff --git a/tests/data_tiers/tier_0/dgraph/test_transpiler.py b/tests/data_tiers/tier_0/dgraph/test_transpiler.py index 8976d891..3561aef7 100644 --- a/tests/data_tiers/tier_0/dgraph/test_transpiler.py +++ b/tests/data_tiers/tier_0/dgraph/test_transpiler.py @@ -748,18 +748,18 @@ def qe(d: dict[str, Any]) -> QEdgeDict: EXP_TWO_HOP = dedent(""" { - q0_node_n1(func: eq(id, "UMLS:C0282090")) @cascade(id, ~subject, ~object) { + q0_node_n2(func: eq(id, "UMLS:C0496995")) @cascade(id, ~subject) { expand(Node) - out_edges_e0: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { - expand(Edge) { sources expand(Source) } - node_n0: object @filter(eq(id, "CHEBI:3125")) @cascade(id) { - expand(Node) - } - } - in_edges_e1: ~object @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, subject) { + out_edges_e1: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { expand(Edge) { sources expand(Source) } - node_n2: subject @filter(eq(id, "UMLS:C0496995")) @cascade(id) { + node_n1: object @filter(eq(id, "UMLS:C0282090")) @cascade(id, ~subject) { expand(Node) + out_edges_e0: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { + expand(Edge) { sources expand(Source) } + node_n0: object @filter(eq(id, "CHEBI:3125")) @cascade(id) { + expand(Node) + } + } } } } @@ -768,53 +768,27 @@ def qe(d: dict[str, Any]) -> QEdgeDict: EXP_TWO_HOP_WITH_VERSION = dedent(""" { - q0_node_n1(func: eq(v1_id, "UMLS:C0282090")) @cascade(v1_id, ~v1_subject, ~v1_object) { + q0_node_n2(func: eq(v1_id, "UMLS:C0496995")) @cascade(v1_id, ~v1_subject) { expand(v1_Node) - out_edges_e0: ~v1_subject @filter(eq(v1_predicate_ancestors, "has_phenotype")) @cascade(v1_predicate, v1_object) { - expand(v1_Edge) { v1_sources expand(v1_Source) } - node_n0: v1_object @filter(eq(v1_id, "CHEBI:3125")) @cascade(v1_id) { - expand(v1_Node) - } - } - in_edges_e1: ~v1_object @filter(eq(v1_predicate_ancestors, "has_phenotype")) @cascade(v1_predicate, v1_subject) { + out_edges_e1: ~v1_subject @filter(eq(v1_predicate_ancestors, "has_phenotype")) @cascade(v1_predicate, v1_object) { expand(v1_Edge) { v1_sources expand(v1_Source) } - node_n2: v1_subject @filter(eq(v1_id, "UMLS:C0496995")) @cascade(v1_id) { + node_n1: v1_object @filter(eq(v1_id, "UMLS:C0282090")) @cascade(v1_id, ~v1_subject) { expand(v1_Node) - } - } - } -} -""").strip() - -EXP_THREE_HOP = dedent(""" -{ - q0_node_n2(func: eq(id, "UMLS:C0496995")) @cascade(id, ~subject, ~object) { - expand(Node) - out_edges_e1: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { - expand(Edge) { sources expand(Source) } - node_n1: object @filter(eq(id, "UMLS:C0282090")) @cascade(id, ~subject) { - expand(Node) - out_edges_e0: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { - expand(Edge) { sources expand(Source) } - node_n0: object @filter(eq(id, "CHEBI:3125")) @cascade(id) { - expand(Node) + out_edges_e0: ~v1_subject @filter(eq(v1_predicate_ancestors, "has_phenotype")) @cascade(v1_predicate, v1_object) { + expand(v1_Edge) { v1_sources expand(v1_Source) } + node_n0: v1_object @filter(eq(v1_id, "CHEBI:3125")) @cascade(v1_id) { + expand(v1_Node) } } } } - in_edges_e2: ~object @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, subject) { - expand(Edge) { sources expand(Source) } - node_n3: subject @filter(eq(id, "UMLS:C0149720")) @cascade(id) { - expand(Node) - } - } } } -""").strip().replace("knowledge level", "knowledge_level") +""").strip() -EXP_FOUR_HOP = dedent(""" +EXP_THREE_HOP = dedent(""" { - q0_node_n3(func: eq(id, "UMLS:C0149720")) @cascade(id, ~subject, ~object) { + q0_node_n3(func: eq(id, "UMLS:C0149720")) @cascade(id, ~subject) { expand(Node) out_edges_e2: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { expand(Edge) { sources expand(Source) } @@ -834,19 +808,13 @@ def qe(d: dict[str, Any]) -> QEdgeDict: } } } - in_edges_e3: ~object @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, subject) { - expand(Edge) { sources expand(Source) } - node_n4: subject @filter(eq(id, "UMLS:C0496994")) @cascade(id) { - expand(Node) - } - } } } -""").strip() +""").strip().replace("knowledge level", "knowledge_level") -EXP_FIVE_HOP = dedent(""" +EXP_FOUR_HOP = dedent(""" { - q0_node_n4(func: eq(id, "UMLS:C0496994")) @cascade(id, ~subject, ~object) { + q0_node_n4(func: eq(id, "UMLS:C0496994")) @cascade(id, ~subject) { expand(Node) out_edges_e3: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { expand(Edge) { sources expand(Source) } @@ -872,46 +840,78 @@ def qe(d: dict[str, Any]) -> QEdgeDict: } } } - in_edges_e4: ~object @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, subject) { - expand(Edge) { sources expand(Source) } - node_n5: subject @filter(eq(id, "UMLS:C2879715")) @cascade(id) { - expand(Node) - } - } } } """).strip() -EXP_FIVE_HOP_MULTIPLE_IDS = dedent(""" +EXP_FIVE_HOP = dedent(""" { - q0_node_n3(func: eq(id, ["Q6", "Q7"])) @cascade(id, ~subject, ~object) { + q0_node_n5(func: eq(id, "UMLS:C2879715")) @cascade(id, ~subject) { expand(Node) - out_edges_e2: ~subject @filter(eq(predicate_ancestors, "P2")) @cascade(predicate, object) { + out_edges_e4: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { expand(Edge) { sources expand(Source) } - node_n2: object @filter(eq(id, ["Q4", "Q5"])) @cascade(id, ~subject) { + node_n4: object @filter(eq(id, "UMLS:C0496994")) @cascade(id, ~subject) { expand(Node) - out_edges_e1: ~subject @filter(eq(predicate_ancestors, "P1")) @cascade(predicate, object) { + out_edges_e3: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { expand(Edge) { sources expand(Source) } - node_n1: object @filter(eq(id, ["Q2", "Q3"])) @cascade(id, ~subject) { + node_n3: object @filter(eq(id, "UMLS:C0149720")) @cascade(id, ~subject) { expand(Node) - out_edges_e0: ~subject @filter(eq(predicate_ancestors, "P0")) @cascade(predicate, object) { + out_edges_e2: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { expand(Edge) { sources expand(Source) } - node_n0: object @filter(eq(id, ["Q0", "Q1"])) @cascade(id) { + node_n2: object @filter(eq(id, "UMLS:C0496995")) @cascade(id, ~subject) { expand(Node) + out_edges_e1: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { + expand(Edge) { sources expand(Source) } + node_n1: object @filter(eq(id, "UMLS:C0282090")) @cascade(id, ~subject) { + expand(Node) + out_edges_e0: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { + expand(Edge) { sources expand(Source) } + node_n0: object @filter(eq(id, "CHEBI:3125")) @cascade(id) { + expand(Node) + } + } + } + } } } } } } } - in_edges_e3: ~object @filter(eq(predicate_ancestors, "P3")) @cascade(predicate, subject) { + } +} +""").strip() + +EXP_FIVE_HOP_MULTIPLE_IDS = dedent(""" +{ + q0_node_n5(func: eq(id, ["Q10", "Q11"])) @cascade(id, ~subject) { + expand(Node) + out_edges_e4: ~subject @filter(eq(predicate_ancestors, "P4")) @cascade(predicate, object) { expand(Edge) { sources expand(Source) } - node_n4: subject @filter(eq(id, ["Q8", "Q9"])) @cascade(id, ~object) { + node_n4: object @filter(eq(id, ["Q8", "Q9"])) @cascade(id, ~subject) { expand(Node) - in_edges_e4: ~object @filter(eq(predicate_ancestors, "P4")) @cascade(predicate, subject) { + out_edges_e3: ~subject @filter(eq(predicate_ancestors, "P3")) @cascade(predicate, object) { expand(Edge) { sources expand(Source) } - node_n5: subject @filter(eq(id, ["Q10", "Q11"])) @cascade(id) { + node_n3: object @filter(eq(id, ["Q6", "Q7"])) @cascade(id, ~subject) { expand(Node) + out_edges_e2: ~subject @filter(eq(predicate_ancestors, "P2")) @cascade(predicate, object) { + expand(Edge) { sources expand(Source) } + node_n2: object @filter(eq(id, ["Q4", "Q5"])) @cascade(id, ~subject) { + expand(Node) + out_edges_e1: ~subject @filter(eq(predicate_ancestors, "P1")) @cascade(predicate, object) { + expand(Edge) { sources expand(Source) } + node_n1: object @filter(eq(id, ["Q2", "Q3"])) @cascade(id, ~subject) { + expand(Node) + out_edges_e0: ~subject @filter(eq(predicate_ancestors, "P0")) @cascade(predicate, object) { + expand(Edge) { sources expand(Source) } + node_n0: object @filter(eq(id, ["Q0", "Q1"])) @cascade(id) { + expand(Node) + } + } + } + } + } + } } } } @@ -1092,54 +1092,54 @@ def qe(d: dict[str, Any]) -> QEdgeDict: } } - q2_node_n1(func: eq(id, "UMLS:C0282090")) @cascade(id, ~subject, ~object) { + q2_node_n2(func: eq(id, "UMLS:C0496995")) @cascade(id, ~subject) { expand(Node) - out_edges_e0: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { - expand(Edge) { sources expand(Source) } - node_n0: object @filter(eq(id, "CHEBI:3125")) @cascade(id) { - expand(Node) - } - } - in_edges_e1: ~object @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, subject) { + out_edges_e1: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { expand(Edge) { sources expand(Source) } - node_n2: subject @filter(eq(id, "UMLS:C0496995")) @cascade(id) { + node_n1: object @filter(eq(id, "UMLS:C0282090")) @cascade(id, ~subject) { expand(Node) + out_edges_e0: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { + expand(Edge) { sources expand(Source) } + node_n0: object @filter(eq(id, "CHEBI:3125")) @cascade(id) { + expand(Node) + } + } } } } - q3_node_n3(func: eq(id, ["Q6", "Q7"])) @cascade(id, ~subject, ~object) { + q3_node_n5(func: eq(id, ["Q10", "Q11"])) @cascade(id, ~subject) { expand(Node) - out_edges_e2: ~subject @filter(eq(predicate_ancestors, "P2")) @cascade(predicate, object) { + out_edges_e4: ~subject @filter(eq(predicate_ancestors, "P4")) @cascade(predicate, object) { expand(Edge) { sources expand(Source) } - node_n2: object @filter(eq(id, ["Q4", "Q5"])) @cascade(id, ~subject) { + node_n4: object @filter(eq(id, ["Q8", "Q9"])) @cascade(id, ~subject) { expand(Node) - out_edges_e1: ~subject @filter(eq(predicate_ancestors, "P1")) @cascade(predicate, object) { + out_edges_e3: ~subject @filter(eq(predicate_ancestors, "P3")) @cascade(predicate, object) { expand(Edge) { sources expand(Source) } - node_n1: object @filter(eq(id, ["Q2", "Q3"])) @cascade(id, ~subject) { + node_n3: object @filter(eq(id, ["Q6", "Q7"])) @cascade(id, ~subject) { expand(Node) - out_edges_e0: ~subject @filter(eq(predicate_ancestors, "P0")) @cascade(predicate, object) { + out_edges_e2: ~subject @filter(eq(predicate_ancestors, "P2")) @cascade(predicate, object) { expand(Edge) { sources expand(Source) } - node_n0: object @filter(eq(id, ["Q0", "Q1"])) @cascade(id) { + node_n2: object @filter(eq(id, ["Q4", "Q5"])) @cascade(id, ~subject) { expand(Node) + out_edges_e1: ~subject @filter(eq(predicate_ancestors, "P1")) @cascade(predicate, object) { + expand(Edge) { sources expand(Source) } + node_n1: object @filter(eq(id, ["Q2", "Q3"])) @cascade(id, ~subject) { + expand(Node) + out_edges_e0: ~subject @filter(eq(predicate_ancestors, "P0")) @cascade(predicate, object) { + expand(Edge) { sources expand(Source) } + node_n0: object @filter(eq(id, ["Q0", "Q1"])) @cascade(id) { + expand(Node) + } + } + } + } } } } } } } - in_edges_e3: ~object @filter(eq(predicate_ancestors, "P3")) @cascade(predicate, subject) { - expand(Edge) { sources expand(Source) } - node_n4: subject @filter(eq(id, ["Q8", "Q9"])) @cascade(id, ~object) { - expand(Node) - in_edges_e4: ~object @filter(eq(predicate_ancestors, "P4")) @cascade(predicate, subject) { - expand(Edge) { sources expand(Source) } - node_n5: subject @filter(eq(id, ["Q10", "Q11"])) @cascade(id) { - expand(Node) - } - } - } - } } } """).strip() @@ -1940,3 +1940,176 @@ def test_normalization_multihop_query(transpiler: _TestDgraphTranspiler) -> None assert "end_node" not in actual assert "first_edge" not in actual assert "second_edge" not in actual + + +# ----------------------- +# Pinnedness Algorithm Tests +# ----------------------- + +def test_pinnedness_empty_graph_raises(transpiler: _TestDgraphTranspiler) -> None: + qgraph = qg({"nodes": {}, "edges": {}}) + with pytest.raises(ValueError): + transpiler.convert_multihop_public(qgraph) + + +def test_pinnedness_single_node_no_edges(transpiler: _TestDgraphTranspiler) -> None: + qgraph = qg({"nodes": {"n0": {"ids": ["X"]}}, "edges": {}}) + # convert_multihop still works and selects n0 + actual = transpiler.convert_multihop_public(qgraph) + assert "q0_node_n0" in actual + + +def test_pinnedness_two_nodes_one_edge_prefers_node_with_ids(transpiler: _TestDgraphTranspiler) -> None: + qgraph = qg({ + "nodes": { + "n0": {"ids": ["A"]}, # constrained + "n1": {"categories": ["biolink:Gene"]}, # less constrained than IDs + }, + "edges": {"e0": {"subject": "n0", "object": "n1", "predicates": ["biolink:related_to"]}}, + }) + actual = transpiler.convert_multihop_public(qgraph) + # Should start at the most constrained node, n0 + assert "q0_node_n0" in actual + assert "out_edges_e0:" in actual + + # And should not use category at root + assert "func: eq(category" not in actual + + +def test_pinnedness_multiple_parallel_edges_increase_weight(transpiler: _TestDgraphTranspiler) -> None: + # Same two nodes but with two parallel edges; pinnedness should still pick the ID node now + qgraph = qg({ + "nodes": {"n0": {"ids": ["A"]}, "n1": {"categories": ["biolink:Gene"]}}, + "edges": { + "e0": {"subject": "n0", "object": "n1", "predicates": ["biolink:related_to"]}, + "e1": {"subject": "n0", "object": "n1", "predicates": ["biolink:related_to"]}, + }, + }) + actual = transpiler.convert_multihop_public(qgraph) + assert "q0_node_n0" in actual + assert "out_edges_e0:" in actual and "out_edges_e1:" in actual + # Root filter should be ID-based + assert 'q0_node_n0(func: eq(id, "A"))' in actual or 'q0_node_n0(func: eq(id, ["A"]))' in actual + assert "func: eq(category" not in actual + + +def test_pinnedness_prefers_more_ids_over_fewer(transpiler: _TestDgraphTranspiler) -> None: + # Fewer IDs (n0 has 1) should be preferred over more IDs (n1 has 3) + qgraph = qg({ + "nodes": { + "n0": {"ids": ["A"]}, + "n1": {"ids": ["B1", "B2", "B3"]}, + }, + "edges": {"e0": {"subject": "n1", "object": "n0", "predicates": ["biolink:related_to"]}}, + }) + actual = transpiler.convert_multihop_public(qgraph) + assert "q0_node_n0" in actual + assert 'q0_node_n0(func: eq(id, "A"))' in actual or 'q0_node_n0(func: eq(id, ["A"]))' in actual + + +def test_pinnedness_tie_breaker_uses_node_id(transpiler: _TestDgraphTranspiler) -> None: + # Both nodes unconstrained (no IDs); tie-breaker should pick max by (score, node_id) + # With alphabetical normalization, node IDs sorted: a_first -> n0, z_last -> n1 + qgraph = qg({ + "nodes": { + "a_first": {"categories": ["biolink:Gene"]}, + "z_last": {"categories": ["biolink:Gene"]}, + }, + "edges": { + "e0": {"subject": "a_first", "object": "z_last", "predicates": ["biolink:related_to"]}, + }, + }) + actual = transpiler.convert_multihop_public(qgraph) + + # With equal category-only nodes, tie-breaker picks lexicographically larger id ("z_last"), normalized to n1. + assert "q0_node_n1" in actual + # Root should NOT use ID filter since neither node has IDs; it should use category + assert "func: eq(id," not in actual + assert 'func: eq(category, "Gene")' in actual + + +def test_pinnedness_issue(transpiler: _TestDgraphTranspiler) -> None: + """Test Pinnedness algorithm issue.""" + # 1. Arrange + qgraph = qg({ + "nodes": { + "SN": { + "categories": ["biolink:ChemicalEntity"], + "set_interpretation": "BATCH", + "constraints": [], + "member_ids": [] + }, + "ON": { + "ids": ["MONDO:0011705"], + "categories": ["biolink:DiseaseOrPhenotypicFeature"], + "set_interpretation": "BATCH", + "constraints": [], + "member_ids": [] + }, + "f": { + "categories": ["biolink:Disease"], + "set_interpretation": "BATCH", + "constraints": [], + "member_ids": [] + } + }, + "edges": { + "edge_0": { + "subject": "SN", + "object": "f", + "predicates": ["biolink:treats_or_applied_or_studied_to_treat"], + "attribute_constraints": [], + "qualifier_constraints": [] + }, + "edge_1": { + "subject": "f", + "object": "ON", + "predicates": ["biolink:has_phenotype"], + "attribute_constraints": [], + "qualifier_constraints": [] + }, + "edge_2": { + "subject": "ON", + "object": "f", + "predicates": ["biolink:has_phenotype"], + "attribute_constraints": [], + "qualifier_constraints": [] + } + } + }) + + # 2. Act + actual = transpiler.convert_multihop_public(qgraph) + expected = dedent(""" + { + q0_node_n0(func: eq(id, "MONDO:0011705")) @cascade(id, ~subject, ~object) { + expand(Node) + out_edges_e2: ~subject @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, object) { + expand(Edge) { sources expand(Source) } + node_n2: object @filter(eq(category, "Disease")) @cascade(id, ~object) { + expand(Node) + in_edges_e0: ~object @filter(eq(predicate_ancestors, "treats_or_applied_or_studied_to_treat")) @cascade(predicate, subject) { + expand(Edge) { sources expand(Source) } + node_n1: subject @filter(eq(category, "ChemicalEntity")) @cascade(id) { + expand(Node) + } + } + } + } + in_edges_e1: ~object @filter(eq(predicate_ancestors, "has_phenotype")) @cascade(predicate, subject) { + expand(Edge) { sources expand(Source) } + node_n2: subject @filter(eq(category, "Disease")) @cascade(id, ~object) { + expand(Node) + in_edges_e0: ~object @filter(eq(predicate_ancestors, "treats_or_applied_or_studied_to_treat")) @cascade(predicate, subject) { + expand(Edge) { sources expand(Source) } + node_n1: subject @filter(eq(category, "ChemicalEntity")) @cascade(id) { + expand(Node) + } + } + } + } + } + }""").strip() + + # 3. Assert + assert normalize(actual) == normalize(expected)