diff --git a/src/retriever/data_tiers/tier_1/elasticsearch/aggregating_querier.py b/src/retriever/data_tiers/tier_1/elasticsearch/aggregating_querier.py index bf3a7470..07f364f3 100644 --- a/src/retriever/data_tiers/tier_1/elasticsearch/aggregating_querier.py +++ b/src/retriever/data_tiers/tier_1/elasticsearch/aggregating_querier.py @@ -5,7 +5,7 @@ from retriever.data_tiers.tier_1.elasticsearch.types import ( ESDocument, - ESHit, + ESEdge, ESPayload, ESResponse, ) @@ -26,7 +26,7 @@ class QueryBody(QueryInfo): async def parse_response( response: ObjectApiResponse[ESResponse], page_size: int -) -> tuple[list[ESHit], list[Any] | None]: +) -> tuple[list[ESEdge], list[Any] | None]: """Parse an ES response and for 0) list of hits, and 1) search_after i.e. the pagination anchor for next query.""" if "hits" not in response: raise RuntimeError(f"Invalid ES response: no hits in response body: {response}") @@ -39,12 +39,7 @@ async def parse_response( if len(fetched_documents) == page_size: search_after = fetched_documents[-1]["sort"] - hits: list[ESHit] = [ - hit["_source"] - if "_index" not in hit - else {**hit["_source"], "_index": hit["_index"]} - for hit in fetched_documents - ] + hits = [ESEdge.from_dict(hit) for hit in fetched_documents] return hits, search_after @@ -71,13 +66,13 @@ async def run_single_query( index_name: str, query: ESPayload, page_size: int = 1000, -) -> list[ESHit]: +) -> list[ESEdge]: """Adapter for running single query through _search and aggregating all hits.""" query_info: QueryInfo = { "query": query["query"], } - results: list[ESHit] = [] + results = list[ESEdge]() while True: query_body = generate_query_body(query_info, page_size) @@ -99,7 +94,7 @@ async def run_batch_query( index_name: str, queries: list[ESPayload], page_size: int = 1000, -) -> list[list[ESHit]]: +) -> list[list[ESEdge]]: """Adapter for running batch queries through _msearch and aggregating all hits.""" query_collection: list[QueryInfo] = [ { @@ -108,7 +103,7 @@ async def run_batch_query( for query in queries ] - results: list[list[ESHit]] = [[] for _ in query_collection] + results: list[list[ESEdge]] = [[] for _ in query_collection] current_query_indices = range(0, len(query_collection)) diff --git a/src/retriever/data_tiers/tier_1/elasticsearch/driver.py b/src/retriever/data_tiers/tier_1/elasticsearch/driver.py index 3e025dd2..628d3def 100644 --- a/src/retriever/data_tiers/tier_1/elasticsearch/driver.py +++ b/src/retriever/data_tiers/tier_1/elasticsearch/driver.py @@ -14,7 +14,7 @@ run_batch_query, run_single_query, ) -from retriever.data_tiers.tier_1.elasticsearch.types import ESHit, ESPayload +from retriever.data_tiers.tier_1.elasticsearch.types import ESEdge, ESPayload from retriever.data_tiers.utils import parse_dingo_metadata from retriever.types.dingo import DINGO_ADAPTER, DINGOMetadata from retriever.types.metakg import Operation, OperationNode @@ -112,7 +112,7 @@ async def close(self) -> None: async def run( self, query: ESPayload | list[ESPayload] - ) -> list[ESHit] | list[list[ESHit]] | None: + ) -> list[ESEdge] | list[list[ESEdge]] | None: """Execute query logic.""" # Check ES connection instance if self.es_connection is None: @@ -162,7 +162,7 @@ async def run( @tracer.start_as_current_span("elasticsearch_query") async def run_query( self, query: ESPayload | list[ESPayload], *args: Any, **kwargs: Any - ) -> list[ESHit] | list[list[ESHit]] | None: + ) -> list[ESEdge] | list[list[ESEdge]] | None: """Use ES async client to execute query via the `_search/_msearch` endpoints.""" otel_span = trace.get_current_span() if not otel_span or not otel_span.is_recording(): diff --git a/src/retriever/data_tiers/tier_1/elasticsearch/transpiler.py b/src/retriever/data_tiers/tier_1/elasticsearch/transpiler.py index 41c98a8b..cefb0ed3 100644 --- a/src/retriever/data_tiers/tier_1/elasticsearch/transpiler.py +++ b/src/retriever/data_tiers/tier_1/elasticsearch/transpiler.py @@ -10,15 +10,12 @@ ) from retriever.data_tiers.tier_1.elasticsearch.types import ( ESBooleanQuery, + ESEdge, ESFilterClause, - ESHit, + ESNode, ESPayload, ESQueryContext, ) -from retriever.data_tiers.utils import ( - DINGO_KG_EDGE_TOPLEVEL_VALUES, - DINGO_KG_NODE_TOPLEVEL_VALUES, -) from retriever.types.general import BackendResult from retriever.types.trapi import ( CURIE, @@ -181,15 +178,15 @@ def convert_triple(self, qgraph: QueryGraphDict) -> ESPayload: def convert_batch_triple(self, qgraphs: list[QueryGraphDict]) -> list[ESPayload]: return [self.convert_triple(qgraph) for qgraph in qgraphs] - def build_nodes(self, hits: list[ESHit]) -> dict[CURIE, NodeDict]: + def build_nodes(self, edges: list[ESEdge]) -> dict[CURIE, NodeDict]: """Build TRAPI nodes from backend representation.""" nodes = dict[CURIE, NodeDict]() - for hit in hits: + for edge in edges: node_ids = dict[str, CURIE]() - for argument in ("subject", "object"): - node = hit[argument] - node_id = node["id"] - node_ids[argument] = node_id + for node_pos in ("subject", "object"): + node: ESNode = getattr(edge, node_pos) + node_id = CURIE(node.id) + node_ids[node_pos] = node_id if node_id in nodes: continue attributes: list[AttributeDict] = [] @@ -198,13 +195,14 @@ def build_nodes(self, hits: list[ESHit]) -> dict[CURIE, NodeDict]: special_cases: dict[str, tuple[str, Any]] = { "equivalent_identifiers": ( "biolink:xref", - [CURIE(i) for i in node["equivalent_identifiers"]], + [ + CURIE(i) + for i in node.attributes.get("equivalent_identifiers", []) + ], ) } - for field, value in node.items(): - if field in DINGO_KG_NODE_TOPLEVEL_VALUES: - continue + for field, value in node.attributes.items(): if field in special_cases: continue if value is not None and value not in ([], ""): @@ -222,10 +220,10 @@ def build_nodes(self, hits: list[ESHit]) -> dict[CURIE, NodeDict]: ) trapi_node = NodeDict( - name=node["name"], + name=node.name, categories=[ BiolinkEntity(biolink.ensure_prefix(cat)) - for cat in node["category"] + for cat in node.category ], attributes=attributes, ) @@ -233,10 +231,10 @@ def build_nodes(self, hits: list[ESHit]) -> dict[CURIE, NodeDict]: nodes[node_id] = trapi_node return nodes - def build_edges(self, hits: list[ESHit]) -> dict[EdgeIdentifier, EdgeDict]: + def build_edges(self, edges: list[ESEdge]) -> dict[EdgeIdentifier, EdgeDict]: """Build TRAPI edges from backend representation.""" - edges = dict[EdgeIdentifier, EdgeDict]() - for hit in hits: + trapi_edges = dict[EdgeIdentifier, EdgeDict]() + for edge in edges: attributes: list[AttributeDict] = [] qualifiers: list[QualifierDict] = [] sources: list[RetrievalSourceDict] = [] @@ -247,25 +245,15 @@ def build_edges(self, hits: list[ESHit]) -> dict[EdgeIdentifier, EdgeDict]: "biolink:category", [ BiolinkEntity(biolink.ensure_prefix(cat)) - for cat in hit.get("category", []) + for cat in edge.attributes.get("category", []) ], ), } # Build Attributes and Qualifiers - for field, value in hit.items(): - if field in DINGO_KG_EDGE_TOPLEVEL_VALUES or field in special_cases: + for field, value in edge.attributes.items(): + if field in special_cases: continue - if biolink.is_qualifier(field): - qualifiers.append( - QualifierDict( - qualifier_type_id=QualifierTypeID( - biolink.ensure_prefix(field) - ), - qualifier_value=str(value), - ) - ) - pass elif value is not None and value not in ([], ""): attributes.append( AttributeDict( @@ -274,6 +262,15 @@ def build_edges(self, hits: list[ESHit]) -> dict[EdgeIdentifier, EdgeDict]: ) ) + # Build Qualifiers + for qtype, qval in edge.qualifiers.items(): + qualifiers.append( + QualifierDict( + qualifier_type_id=QualifierTypeID(biolink.ensure_prefix(qtype)), + qualifier_value=qval, + ) + ) + # Special case attributes for name, value in special_cases.values(): if value is not None and value not in ([], ""): @@ -282,7 +279,7 @@ def build_edges(self, hits: list[ESHit]) -> dict[EdgeIdentifier, EdgeDict]: ) # Build Sources - for source in hit["sources"]: + for source in edge.sources: retrieval_source = RetrievalSourceDict( resource_id=Infores(source["resource_id"]), resource_role=source["resource_role"], @@ -297,9 +294,9 @@ def build_edges(self, hits: list[ESHit]) -> dict[EdgeIdentifier, EdgeDict]: # Build Edge trapi_edge = EdgeDict( - predicate=BiolinkPredicate(biolink.ensure_prefix(hit["predicate"])), - subject=CURIE(hit["subject"]["id"]), - object=CURIE(hit["object"]["id"]), + predicate=BiolinkPredicate(biolink.ensure_prefix(edge.predicate)), + subject=CURIE(edge.subject.id), + object=CURIE(edge.object.id), sources=sources, ) if len(attributes) > 0: @@ -310,12 +307,13 @@ def build_edges(self, hits: list[ESHit]) -> dict[EdgeIdentifier, EdgeDict]: append_aggregator_source(trapi_edge, Infores(CONFIG.tier1.backend_infores)) edge_hash = hash_hex(hash_edge(trapi_edge)) - edges[edge_hash] = trapi_edge - return edges + trapi_edges[edge_hash] = trapi_edge + + return trapi_edges @override def convert_results( - self, qgraph: QueryGraphDict, results: list[ESHit] | None + self, qgraph: QueryGraphDict, results: list[ESEdge] | None ) -> BackendResult: nodes = self.build_nodes(results) if results is not None else {} edges = self.build_edges(results) if results is not None else {} @@ -327,7 +325,7 @@ def convert_results( ) def convert_batch_results( - self, qgraph_list: list[QueryGraphDict], results: list[list[ESHit]] + self, qgraph_list: list[QueryGraphDict], results: list[list[ESEdge]] ) -> list[BackendResult]: """Wrapper for converting results for a batch query.""" return [ diff --git a/src/retriever/data_tiers/tier_1/elasticsearch/types.py b/src/retriever/data_tiers/tier_1/elasticsearch/types.py index a9bfa177..2b285c0b 100644 --- a/src/retriever/data_tiers/tier_1/elasticsearch/types.py +++ b/src/retriever/data_tiers/tier_1/elasticsearch/types.py @@ -1,4 +1,7 @@ -from typing import Any, NotRequired, TypedDict +from dataclasses import dataclass +from typing import Any, NotRequired, Self, TypedDict + +import orjson from retriever.data_tiers.tier_1.elasticsearch.attribute_types import ( AttributeFilterQuery, @@ -7,7 +10,14 @@ ESQueryForSingleQualifierConstraint, ESTermClause, ) -from retriever.types.trapi import CURIE +from retriever.data_tiers.utils import ( + DINGO_KG_EDGE_TOPLEVEL_VALUES, + DINGO_KG_NODE_TOPLEVEL_VALUES, +) +from retriever.types.trapi import ( + RetrievalSourceDict, +) +from retriever.utils import biolink class ESFilterClause(TypedDict): @@ -38,80 +48,10 @@ class ESPayload(TypedDict): query: ESQueryContext -class ESPublicationsInfo(TypedDict): - """Information regarding publications.""" - - pmid: str - publication_date: NotRequired[str] - sentence: NotRequired[str] - subject_score: NotRequired[str] - object_score: NotRequired[str] - - -class ESSourceInfo(TypedDict): - """Information regarding sources.""" - - resource_id: str - resource_role: str - upstream_resource_ids: NotRequired[list[str]] - source_record_urls: NotRequired[list[str]] - - -class ESNode(TypedDict): - """A knowledge node as represented in Elasticsearch.""" - - id: CURIE - name: str - category: str - description: str - equivalent_identifiers: list[str] - in_taxon: NotRequired[list[str]] - information_content: NotRequired[float] - inheritance: NotRequired[str] - provided_by: NotRequired[list[str]] - - -class ESHit(TypedDict): - """The main data of an Elasticsearch hit.""" - - _index: NotRequired[str] - subject: ESNode - object: ESNode - predicate: str - sources: list[ESSourceInfo] - id: NotRequired[str] - agent_type: NotRequired[str] - knowledge_level: NotRequired[str] - publications: list[str] - qualified_predicate: NotRequired[str] - predicate_ancestors: list[str] - source_inforeses: list[str] - subject_form_or_variant_qualifier: NotRequired[str] - disease_context_qualifier: NotRequired[str] - frequency_qualifier: NotRequired[str] - onset_qualifier: NotRequired[str] - sex_qualifier: NotRequired[str] - original_subject: NotRequired[str] - original_predicate: NotRequired[str] - original_object: NotRequired[str] - allelic_requirement: NotRequired[str] - update_date: NotRequired[str] - z_score: NotRequired[float] - has_evidence: list[str] - has_confidence_score: NotRequired[float] - has_count: NotRequired[float] - has_total: NotRequired[float] - has_percentage: NotRequired[float] - has_quotient: NotRequired[float] - category: list[str] - seq_: NotRequired[int] - negated: NotRequired[bool] - - class ESDocument(TypedDict): """A source document returned from Elasticsearch.""" - _source: ESHit + _source: dict[str, Any] _index: NotRequired[str] sort: list[Any] @@ -126,3 +66,80 @@ class ESResponse(TypedDict): """An Elasticsearch query response.""" hits: ESHits + + +@dataclass(frozen=True, kw_only=True, slots=True) +class ESNode: + """A knowledge node as represented by ES, with some convenience features.""" + + id: str + name: str + category: list[str] + attributes: dict[str, Any] + + @classmethod + def from_dict(cls, doc: dict[str, Any]) -> Self: + """Parse part of an ES document as an Edge.""" + attributes = dict[str, Any]() + for key, value in doc.items(): + if key in DINGO_KG_NODE_TOPLEVEL_VALUES: + continue + else: + attributes[key] = value + + return cls( + id=doc.get("id", "NOT_PROVIDED"), + name=doc.get("name", "NOT_PROVIDED"), + category=doc.get("category", "NOT_PROVIDED"), + attributes=attributes, + ) + + +@dataclass(frozen=True, kw_only=True, slots=True) +class ESEdge: + """Edge information as represented by ES, with some convenience features.""" + + _index: str | None + id: str + subject: ESNode + object: ESNode + predicate: str + predicate_ancestors: list[str] + sources: list[RetrievalSourceDict] + source_inforeses: list[str] + qualifiers: dict[str, str] + attributes: dict[str, Any] + + @classmethod + def from_dict(cls, doc: ESDocument) -> Self: + """Parse an ES document as an Edge.""" + qualifiers = dict[str, str]() + attributes = dict[str, Any]() + for key, value in doc["_source"].items(): + if key in DINGO_KG_EDGE_TOPLEVEL_VALUES: + continue + if biolink.is_qualifier(key): + if not isinstance(value, str): + qualifiers[key] = orjson.dumps(value).decode() + else: + qualifiers[key] = str(value) + else: + attributes[key] = value + + sbj_node = ESNode.from_dict(doc["_source"]["subject"]) + obj_node = ESNode.from_dict(doc["_source"]["object"]) + + return cls( + _index=doc.get("_index"), + id=doc["_source"].get("id", "NOT_PROVIDED"), + subject=sbj_node, + object=obj_node, + predicate=doc["_source"].get("predicate", "related_to"), + predicate_ancestors=doc["_source"].get( + "predicate_ancestors", ["related_to"] + ), + sources=doc["_source"].get("sources", []), + source_inforeses=doc["_source"].get("source_inforeses", []), + qualifiers=qualifiers, + attributes=attributes, + ) diff --git a/tests/data_tiers/tier_1/elasticsearch_tests/payload/es_hits.py b/tests/data_tiers/tier_1/elasticsearch_tests/payload/es_hits.py index d73a3b96..67a187d2 100644 --- a/tests/data_tiers/tier_1/elasticsearch_tests/payload/es_hits.py +++ b/tests/data_tiers/tier_1/elasticsearch_tests/payload/es_hits.py @@ -1,268 +1,230 @@ from typing import Any, cast -from retriever.data_tiers.tier_1.elasticsearch.types import ESHit, ESPayload +from retriever.data_tiers.tier_1.elasticsearch.types import ( + ESEdge, + ESPayload, + ESDocument, +) -def esh(d: dict[str, Any]) -> ESHit: - return cast(ESHit, cast(ESPayload, cast(dict, d))) +def esh(d: ESDocument) -> ESEdge: + return ESEdge.from_dict(d) -SIMPLE_ES_HITS: list[ESHit] = [esh( -{ - "subject": { - "id": "UMLS:C1564592", - "name": "Diclomin", - "all_names": [ - "Diclomin" - ], - "category": [ - "Entity", - "ChemicalOrDrugOrTreatment", - "ChemicalEntity", - "PhysicalEssence", - "NamedThing", - "ChemicalEntityOrGeneOrGeneProduct", - "ChemicalEntityOrProteinOrPolypeptide", - "PhysicalEssenceOrOccurrent" - ], - "iri": "https://identifiers.org/umls:C1564592", - "description": "UMLS Semantic Type: STY:T109; UMLS Semantic Type: STY:T121", - "equivalent_identifiers": [ - "UMLS:C1564592" - ] - }, - "object": { - "id": "CHEBI:4514", - "name": "dicyclomine", - "all_names": [ - "dicyclomine hydrochloride 2 MG/ML Oral Solution", - "dicyclomine Oral Tablet", - "dicyclomine hydrochloride 20 MG [Bentyl]", - "Bentyl Oral Product", - "dicyclomine Injectable Product", - "Dicyclomine", - "dicyclomine Oral Solution", - "dicycloverin", - "dicyclomine Oral Capsule [Bentyl]", - "dicyclomine Injection [Bentyl]", - "dicyclomine hydrochloride 10 MG/ML [Bentyl]", - "dicyclomine Oral Capsule", - "dicyclomine hydrochloride 2 MG/ML", - "dicyclomine hydrochloride 10 MG/ML", - "dicyclomine hydrochloride 20 MG Oral Capsule", - "2 ML dicyclomine hydrochloride 10 MG/ML Injection", - "dicycloverine", - "Bentyl", - "bentyl", - "DICYCLOMINE", - "dicyclomine Oral Product", - "Bentyl Pill", - "dicyclomine", - "dicyclomine hydrochloride 10 MG Oral Capsule", - "2 ML dicyclomine hydrochloride 10 MG/ML Injection [Bentyl]", - "dicyclomine hydrochloride 10 MG [Bentyl]", - "dicyclomine Pill", - "dicyclomine hydrochloride 10 MG Oral Tablet", - "dicyclomine Oral Liquid Product", - "dicyclomine Oral Tablet [Bentyl]", - "dicyclomine hydrochloride 10 MG", - "Bentyl Injectable Product", - "dicyclomine hydrochloride 20 MG", - "dicyclomine hydrochloride", - "DICYCLOMINE HYDROCHLORIDE", - "dicyclomine hydrochloride 10 MG Oral Capsule [Bentyl]", - "dicyclomine single use injection", - "dicyclomine hydrochloride 20 MG Oral Tablet" - ], - "category": [ - "Entity", - "ChemicalEntity", - "ChemicalOrDrugOrTreatment", - "ChemicalMixture", - "PhysicalEssence", - "MolecularEntity", - "NamedThing", - "MolecularMixture", - "OntologyClass", - "Drug", - "ChemicalEntityOrGeneOrGeneProduct", - "ChemicalEntityOrProteinOrPolypeptide", - "PhysicalEssenceOrOccurrent", - "SmallMolecule" - ], - "iri": "http://purl.obolibrary.org/obo/CHEBI_4514", - "description": "Dicyclomine is only found in individuals that have used or taken this drug. It is a muscarinic antagonist used as an antispasmodic and in urinary incontinence. It has little effect on glandular secretion or the cardiovascular system. It does have some local anesthetic properties and is used in gastrointestinal, biliary, and urinary tract spasms. [PubChem]Action is achieved via a dual mechanism: (1) a specific anticholinergic effect (antimuscarinic) at the acetylcholine-receptor sites and (2) a direct effect upon smooth muscle (musculotropic).", - "equivalent_identifiers": [ - "RXNORM:991063", - "RXNORM:1722904", - "RXNORM:991151", - "RXCUI:2657977", - "CAS:104959-55-9", - "UMLS:C3205694", - "CHV:0000044640", - "RXNORM:1151168", - "RXNORM:1171479", - "RXNORM:991069", - "UMLS:C0591771", - "UMLS:C0976352", - "RXCUI:366711", - "HMDB:HMDB0014942", - "UMLS:C4719851", - "UMLS:C3225558", - "UMLS:C1245855", - "UMLS:C0353803", - "UMLS:C4730928", - "UMLS:C1245859", - "RXNORM:991086", - "RXCUI:991081", - "RXCUI:991063", - "RXNORM:991061", - "RXCUI:1151169", - "UMLS:C2916892", - "UMLS:C0688579", - "RXCUI:2649127", - "UMLS:C5841834", - "NCIT:C61720", - "RXNORM:991082", - "RXNORM:991064", - "KEGG.DRUG:D00717", - "UMLS:C5838728", - "UMLS:C0709217", - "RXCUI:2662657", - "RXCUI:2646528", - "RXNORM:366711", - "RXNORM:991060", - "RXCUI:2654863", - "UMLS:C2916899", - "UMLS:C5838319", - "RXCUI:1171479", - "RXCUI:1722902", - "RXCUI:991087", - "UMLS:C2916891", - "RXCUI:203018", - "UMLS:C0012125", - "UMLS:C4060245", - "RXCUI:2647822", - "MESH:D004025", - "UMLS:C3225559", - "CAS:77-19-0", - "UMLS:C1245857", - "RXCUI:991082", - "RXCUI:2661688", - "UMLS:C5845493", - "CHV:0000003899", - "RXNORM:203018", - "CHEBI:4514", - "UMLS:C1270926", - "UMLS:C5846109", - "RXCUI:2645632", - "UMLS:C3205696", - "RXCUI:2659854", - "RXNORM:991065", - "UMLS:C2916902", - "NDDF:004711", - "UMLS:C5846461", - "DRUGBANK:DB00804", - "UMLS:C2916901", - "UMLS:C0688582", - "KEGG.COMPOUND:C06951", - "INCHIKEY:CURUTKGFNZGFSE-UHFFFAOYSA-N", - "RXNORM:371817", - "RXNORM:991616", - "RXNORM:1171480", - "RXCUI:2654453", - "CAS:67-92-5", - "UNII:4KV4X8IF6V", - "UMLS:C0709215", - "RXCUI:991061", - "RXNORM:1151166", - "RXNORM:371813", - "UMLS:C5782099", - "RXCUI:1151167", - "RXCUI:991086", - "RXCUI:371813", - "RXNORM:991085", - "UMLS:C4050109", - "RXCUI:1722904", - "GTOPDB:355", - "VANDF:4017870", - "RXCUI:1171480", - "RXCUI:152021", - "UMLS:C0305432", - "DrugCentral:868", - "VANDF:4019716", - "RXCUI:991060", - "PUBCHEM.COMPOUND:3042", - "RXCUI:991062", - "RXCUI:3361", - "RXCUI:2662305", - "RXCUI:991064", - "RXCUI:1171477", - "RXNORM:371815", - "UMLS:C3225556", - "UMLS:C1240680", - "PUBCHEM.COMPOUND:441344", - "RXCUI:991065", - "RXNORM:1171477", - "RXNORM:991087", - "UMLS:C3205695", - "UMLS:C0692732", - "RXNORM:368081", - "UMLS:C0700023", - "RXCUI:1151166", - "RXNORM:1151169", - "RXNORM:991068", - "UMLS:C4719825", - "UMLS:C2916890", - "RXCUI:371817", - "RXNORM:991081", - "RXCUI:368081", - "ATC:A03AA07", - "RXNORM:1151167", - "RXCUI:991151", - "RXNORM:3361", - "RXNORM:991062", - "RXCUI:991068", - "CHEMBL.COMPOUND:CHEMBL1123", - "RXCUI:991616", - "UMLS:C1242057", - "UMLS:C2916894", - "RXCUI:991085", - "UMLS:C5843663", - "RXCUI:371815", - "RXCUI:991069", - "RXCUI:1151168" - ], - "publications": [ - "PMID:22194678", - "PMID:23523385", - "PMID:3612532", - "PMID:3597632", - "PMID:2579237", - "PMID:18834112", - "PMID:24332655", - "PMID:1920350", - "PMID:14254329", - "PMID:22961681" - ] - }, - "predicate": "subclass_of", - "sources": [{ +SIMPLE_ES_HITS: list[ESEdge] = [ + esh( + ESDocument( + sort=["NOT_REQUIRED_FOR_TEST"], + _source={ + "subject": { + "id": "UMLS:C1564592", + "name": "Diclomin", + "category": [ + "Entity", + "ChemicalOrDrugOrTreatment", + "ChemicalEntity", + "PhysicalEssence", + "NamedThing", + "ChemicalEntityOrGeneOrGeneProduct", + "ChemicalEntityOrProteinOrPolypeptide", + "PhysicalEssenceOrOccurrent", + ], + "iri": "https://identifiers.org/umls:C1564592", + "description": "UMLS Semantic Type: STY:T109; UMLS Semantic Type: STY:T121", + "equivalent_identifiers": ["UMLS:C1564592"], + }, + "object": { + "id": "CHEBI:4514", + "name": "dicyclomine", + "category": [ + "Entity", + "ChemicalEntity", + "ChemicalOrDrugOrTreatment", + "ChemicalMixture", + "PhysicalEssence", + "MolecularEntity", + "NamedThing", + "MolecularMixture", + "OntologyClass", + "Drug", + "ChemicalEntityOrGeneOrGeneProduct", + "ChemicalEntityOrProteinOrPolypeptide", + "PhysicalEssenceOrOccurrent", + "SmallMolecule", + ], + "iri": "http://purl.obolibrary.org/obo/CHEBI_4514", + "description": "Dicyclomine is only found in individuals that have used or taken this drug. It is a muscarinic antagonist used as an antispasmodic and in urinary incontinence. It has little effect on glandular secretion or the cardiovascular system. It does have some local anesthetic properties and is used in gastrointestinal, biliary, and urinary tract spasms. [PubChem]Action is achieved via a dual mechanism: (1) a specific anticholinergic effect (antimuscarinic) at the acetylcholine-receptor sites and (2) a direct effect upon smooth muscle (musculotropic).", + "equivalent_identifiers": [ + "RXNORM:991063", + "RXNORM:1722904", + "RXNORM:991151", + "RXCUI:2657977", + "CAS:104959-55-9", + "UMLS:C3205694", + "CHV:0000044640", + "RXNORM:1151168", + "RXNORM:1171479", + "RXNORM:991069", + "UMLS:C0591771", + "UMLS:C0976352", + "RXCUI:366711", + "HMDB:HMDB0014942", + "UMLS:C4719851", + "UMLS:C3225558", + "UMLS:C1245855", + "UMLS:C0353803", + "UMLS:C4730928", + "UMLS:C1245859", + "RXNORM:991086", + "RXCUI:991081", + "RXCUI:991063", + "RXNORM:991061", + "RXCUI:1151169", + "UMLS:C2916892", + "UMLS:C0688579", + "RXCUI:2649127", + "UMLS:C5841834", + "NCIT:C61720", + "RXNORM:991082", + "RXNORM:991064", + "KEGG.DRUG:D00717", + "UMLS:C5838728", + "UMLS:C0709217", + "RXCUI:2662657", + "RXCUI:2646528", + "RXNORM:366711", + "RXNORM:991060", + "RXCUI:2654863", + "UMLS:C2916899", + "UMLS:C5838319", + "RXCUI:1171479", + "RXCUI:1722902", + "RXCUI:991087", + "UMLS:C2916891", + "RXCUI:203018", + "UMLS:C0012125", + "UMLS:C4060245", + "RXCUI:2647822", + "MESH:D004025", + "UMLS:C3225559", + "CAS:77-19-0", + "UMLS:C1245857", + "RXCUI:991082", + "RXCUI:2661688", + "UMLS:C5845493", + "CHV:0000003899", + "RXNORM:203018", + "CHEBI:4514", + "UMLS:C1270926", + "UMLS:C5846109", + "RXCUI:2645632", + "UMLS:C3205696", + "RXCUI:2659854", + "RXNORM:991065", + "UMLS:C2916902", + "NDDF:004711", + "UMLS:C5846461", + "DRUGBANK:DB00804", + "UMLS:C2916901", + "UMLS:C0688582", + "KEGG.COMPOUND:C06951", + "INCHIKEY:CURUTKGFNZGFSE-UHFFFAOYSA-N", + "RXNORM:371817", + "RXNORM:991616", + "RXNORM:1171480", + "RXCUI:2654453", + "CAS:67-92-5", + "UNII:4KV4X8IF6V", + "UMLS:C0709215", + "RXCUI:991061", + "RXNORM:1151166", + "RXNORM:371813", + "UMLS:C5782099", + "RXCUI:1151167", + "RXCUI:991086", + "RXCUI:371813", + "RXNORM:991085", + "UMLS:C4050109", + "RXCUI:1722904", + "GTOPDB:355", + "VANDF:4017870", + "RXCUI:1171480", + "RXCUI:152021", + "UMLS:C0305432", + "DrugCentral:868", + "VANDF:4019716", + "RXCUI:991060", + "PUBCHEM.COMPOUND:3042", + "RXCUI:991062", + "RXCUI:3361", + "RXCUI:2662305", + "RXCUI:991064", + "RXCUI:1171477", + "RXNORM:371815", + "UMLS:C3225556", + "UMLS:C1240680", + "PUBCHEM.COMPOUND:441344", + "RXCUI:991065", + "RXNORM:1171477", + "RXNORM:991087", + "UMLS:C3205695", + "UMLS:C0692732", + "RXNORM:368081", + "UMLS:C0700023", + "RXCUI:1151166", + "RXNORM:1151169", + "RXNORM:991068", + "UMLS:C4719825", + "UMLS:C2916890", + "RXCUI:371817", + "RXNORM:991081", + "RXCUI:368081", + "ATC:A03AA07", + "RXNORM:1151167", + "RXCUI:991151", + "RXNORM:3361", + "RXNORM:991062", + "RXCUI:991068", + "CHEMBL.COMPOUND:CHEMBL1123", + "RXCUI:991616", + "UMLS:C1242057", + "UMLS:C2916894", + "RXCUI:991085", + "UMLS:C5843663", + "RXCUI:371815", + "RXCUI:991069", + "RXCUI:1151168", + ], + "publications": [ + "PMID:22194678", + "PMID:23523385", + "PMID:3612532", + "PMID:3597632", + "PMID:2579237", + "PMID:18834112", + "PMID:24332655", + "PMID:1920350", + "PMID:14254329", + "PMID:22961681", + ], + }, + "predicate": "subclass_of", + "sources": [ + { "resource_id": "infores:mesh", "resource_role": "primary_knowledge_source", - }], - "kg2_ids": [ - "UMLS:C1564592---MESH:RN---None---None---None---UMLS:C0012125---umls_source:MSH", - "UMLS:C0012125---MESH:RB---None---None---None---UMLS:C1564592---umls_source:MSH" - ], - "domain_range_exclusion": False, - "knowledge_level": "knowledge_assertion", - "agent_type": "manual_agent", - "id": 1375555, - "predicate_ancestors": [ - "subclass_of", - "related_to_at_concept_level", - "related_to" - ] - } -)] + } + ], + "domain_range_exclusion": False, + "knowledge_level": "knowledge_assertion", + "agent_type": "manual_agent", + "id": 1375555, + "predicate_ancestors": [ + "subclass_of", + "related_to_at_concept_level", + "related_to", + ], + }, + ) + ) +] diff --git a/tests/data_tiers/tier_1/elasticsearch_tests/test_tier1_driver.py b/tests/data_tiers/tier_1/elasticsearch_tests/test_tier1_driver.py index bb37e8e4..ffa6684a 100644 --- a/tests/data_tiers/tier_1/elasticsearch_tests/test_tier1_driver.py +++ b/tests/data_tiers/tier_1/elasticsearch_tests/test_tier1_driver.py @@ -5,7 +5,7 @@ import retriever.config.general as general_mod import retriever.data_tiers.tier_1.elasticsearch.driver as driver_mod from retriever.data_tiers.tier_1.elasticsearch.transpiler import ElasticsearchTranspiler -from retriever.data_tiers.tier_1.elasticsearch.types import ESPayload, ESHit +from retriever.data_tiers.tier_1.elasticsearch.types import ESPayload, ESEdge from payload.trapi_qgraphs import DINGO_QGRAPH, VALID_REGEX_QGRAPHS, INVALID_REGEX_QGRAPHS @@ -112,7 +112,7 @@ async def test_elasticsearch_driver(payload: ESPayload | list[ESPayload], expect except Exception: pytest.skip("skipping es driver connection test: cannot connect") - hits: list[ESHit] | list[ESHit] = await driver.run_query(payload) + hits: list[ESEdge] | list[ESEdge] = await driver.run_query(payload) def assert_single_result(res, expected_result_num: int): if not isinstance(res, list): @@ -125,7 +125,7 @@ def assert_single_result(res, expected_result_num: int): assert len(hits) == len(payload) assert isinstance(hits[0], list) - for index, result in enumerate(cast(list[list[ESHit]], hits)): + for index, result in enumerate(cast(list[list[ESEdge]], hits)): assert_single_result(result, expected[index]) else: assert_single_result(hits, expected) @@ -161,7 +161,7 @@ async def test_valid_regex_query(): pytest.skip("skipping es driver connection test: cannot connect") for payload in qgraphs_with_valid_regex: - hits: list[ESHit] = await driver.run_query(payload) + hits: list[ESEdge] = await driver.run_query(payload) if hits is not None: print(len(hits)) @@ -187,6 +187,6 @@ async def test_end_to_end(): except Exception: pytest.skip("skipping es driver connection test: cannot connect") - hits: list[ESHit] = await driver.run_query(payload) + hits: list[ESEdge] = await driver.run_query(payload) assert len(hits) == 8