1818import itertools
1919import math
2020import pandas as pd
21+ import gzip
22+ import glob
2123
2224import colorama
2325from colorama import Fore
@@ -64,6 +66,52 @@ def backoff_handler(details):
6466 "{kwargs}" .format (** details )
6567 )
6668
69+ # handles reading minified ontologies and performing term/synonym lookups
70+ class MinifiedOntologyReader ():
71+ parsed_ontologies = {}
72+
73+ def __init__ (self ):
74+ ontology_dir = f"{ os .path .dirname (os .path .realpath (__file__ ))} /ontologies"
75+ for ontology_file in glob .glob (f"{ ontology_dir } /*.min.tsv.gz" ):
76+ ontology_name = ontology_file .split ('/' )[- 1 ].replace (".min.tsv.gz" , "" )
77+ self .populate_ontology (ontology_name , ontology_file )
78+
79+ def ontology_names (self ):
80+ return list (self .parsed_ontologies .keys ())
81+
82+ def populate_ontology (self , ontology_name , ontology_file ):
83+ """Parses ontology file by name and populates entries into parsed_ontologies for lookup
84+ :param ontology_name: name of ontology
85+ :param ontology_file: relative path to ontology file
86+ :return: parsed ontology dictionary
87+ """
88+ dev_logger .debug (f"populating minified ontology { ontology_name } from { ontology_file } " )
89+ with gzip .open (ontology_file , 'rt' ) as file_gz :
90+ ontology = {}
91+ for line in file_gz .readlines ():
92+ try :
93+ ontology_id , label , raw_syn = line .split ("\t " )
94+ entry = {"label" : label , "synonyms" : [syn .replace ("\n " , '' ) for syn in raw_syn .split ("||" )]}
95+ ontology [ontology_id ] = entry
96+ except (ValueError , TypeError ) as e :
97+ dev_logger .error (f"could not process { line } from { ontology_name } : { e } " )
98+ self .parsed_ontologies [ontology_name ] = ontology
99+
100+ def find_ontology_entry (self , ontology_name , identifier , property_name ):
101+ """Find an entry in a parsed ontology by identfier
102+ :param ontology_name: name of ontology
103+ :param identifier: ontology ID, e.g. MONDO_0005887
104+ :param property_name: name of metadata property, e.g. species
105+ :return: dict
106+ """
107+ entry = self .parsed_ontologies .get (ontology_name , {}).get (identifier , {})
108+ if entry :
109+ return entry
110+ else :
111+
112+ msg = f"{ property_name } : No match found in EBI OLS for provided ontology ID: { identifier } "
113+ raise ValueError (msg )
114+
67115
68116# contains methods for looking up terms in various ontologies,
69117# as well as caching results of previous queries to speed up performance
@@ -113,6 +161,10 @@ def retrieve_ontology_term_label_remote(
113161 if property_name == "organ_region" :
114162 return self .retrieve_mouse_brain_term (term , property_name )
115163 else :
164+ # leave debug statement for QA purposes later
165+ dev_logger .debug (
166+ f"Using fallback EBI OLS call with { ontology_urls } , { term } , { property_name } "
167+ )
116168 return self .retrieve_ols_term (
117169 ontology_urls , term , property_name , attribute_type
118170 )
@@ -328,7 +380,7 @@ def get_ontology_file_location(ontology):
328380
329381# create an OntologyRetriever instance to handle fetching and caching ontology terms
330382retriever = OntologyRetriever ()
331-
383+ minified_reader = MinifiedOntologyReader ()
332384
333385def validate_schema (json , metadata ):
334386 """Check validity of metadata convention as JSON schema.
@@ -416,6 +468,22 @@ def validate_cells_unique(metadata):
416468 )
417469 return valid
418470
471+ def retrieve_label_and_synonyms (
472+ ontology_id , property_name , convention , property_type
473+ ):
474+ """Wrapper method to retrieve label and synonyms depending on whether ontology is local or remote
475+ :param ontology_id: ontology ID, e.g. MONDO_0005887
476+ :param property_name: name of metadata property, e.g. species
477+ :param convention: metadata convention being checked against
478+ :param property_type: attribute type for term (string, array, boolean)
479+ """
480+ ontology_name = re .split ("[_:]" , ontology_id )[0 ].lower ()
481+ if ontology_is_local (ontology_name ):
482+ return minified_reader .find_ontology_entry (ontology_name , ontology_id , property_name )
483+ else :
484+ return retriever .retrieve_ontology_term_label_and_synonyms (
485+ ontology_id , property_name , convention , property_type
486+ )
419487
420488def insert_array_ontology_label_row_data (
421489 property_name , row , metadata , required , convention , ontology_label
@@ -437,11 +505,8 @@ def insert_array_ontology_label_row_data(
437505 for id in row [property_name ]:
438506 label_lookup = ""
439507 try :
440- label_and_synonyms = (
441- retriever .retrieve_ontology_term_label_and_synonyms (
442- id , property_name , convention , "array"
443- )
444- )
508+
509+ label_and_synonyms = retrieve_label_and_synonyms (id , property_name , convention , "array" )
445510 label_lookup = label_and_synonyms .get ('label' )
446511 reference_ontology = (
447512 "EBI OLS lookup"
@@ -494,9 +559,7 @@ def insert_ontology_label_row_data(
494559 # for optional columns, try to fill it in
495560 property_type = convention ["properties" ][property_name ]["type" ]
496561 try :
497- label_and_synonyms = retriever .retrieve_ontology_term_label_and_synonyms (
498- id , property_name , convention , property_type
499- )
562+ label_and_synonyms = retrieve_label_and_synonyms (id , property_name , convention , property_type )
500563 label = label_and_synonyms .get ('label' )
501564 row [ontology_label ] = label
502565 reference_ontology = (
@@ -1056,6 +1119,12 @@ def is_label_or_synonym(labels, provided_label):
10561119 else :
10571120 return False
10581121
1122+ def ontology_is_local (ontology_name ):
1123+ """Check if it is possible to use local ontology validation instead of OLS
1124+ :param ontology_name: name of ontology
1125+ :return: Boolean
1126+ """
1127+ return ontology_name is not None and ontology_name in minified_reader .ontology_names ()
10591128
10601129def validate_collected_ontology_data (metadata , convention ):
10611130 """Evaluate collected ontology_id, ontology_label info in
@@ -1080,15 +1149,10 @@ def validate_collected_ontology_data(metadata, convention):
10801149
10811150 for ontology_info in metadata .ontology [property_name ].keys ():
10821151 ontology_id , ontology_label = ontology_info
1083-
10841152 try :
10851153 attribute_type = convention ["properties" ][property_name ]["type" ]
10861154 # get actual label along with synonyms for more robust matching
1087- label_and_synonyms = (
1088- retriever .retrieve_ontology_term_label_and_synonyms (
1089- ontology_id , property_name , convention , attribute_type
1090- )
1091- )
1155+ label_and_synonyms = retrieve_label_and_synonyms (ontology_id , property_name , convention , attribute_type )
10921156
10931157 if not is_label_or_synonym (label_and_synonyms , ontology_label ):
10941158 matched_label_for_id = label_and_synonyms .get ("label" )
0 commit comments