diff --git a/Pipfile b/Pipfile index 24db7aa6..c14369cc 100644 --- a/Pipfile +++ b/Pipfile @@ -11,7 +11,7 @@ jupyter-cache = ">=0.2,<1.0" matplotlib = ">=3.8" docutils = ">=0.17.0,<0.20" openbabel-wheel = ">=3.1" -rdkit-pypi = ">=2022.9" +rdkit = ">=2024.9.3" numpy = ">=1.26" jcamp = ">=1.2" sphinx = ">=6.2.0" @@ -20,7 +20,7 @@ nbmake = ">=1.4" nbclient = ">=0.6.8,<0.7" pyparsing = ">=3.0,<3.1" mdit-py-plugins = ">0.3,<0.4" -Jinja2 = ">=3.1.3" +Jinja2 = ">=3.1.6" pandas = "*" seaborn = "*" JupyterLab = ">=4.0.11" @@ -30,4 +30,4 @@ IPython = "*" [dev-packages] [requires] -python_version = "3.11" \ No newline at end of file +python_version = "3.12" \ No newline at end of file diff --git a/book/contributions/jh_ps_nfdi4chem_ts/images/nfdi4chem_ts_fig1.png b/book/contributions/jh_ps_nfdi4chem_ts/images/nfdi4chem_ts_fig1.png new file mode 100644 index 00000000..e03e3d58 Binary files /dev/null and b/book/contributions/jh_ps_nfdi4chem_ts/images/nfdi4chem_ts_fig1.png differ diff --git a/book/contributions/jh_ps_nfdi4chem_ts/nfdi4chem_ts.md b/book/contributions/jh_ps_nfdi4chem_ts/nfdi4chem_ts.md new file mode 100644 index 00000000..2d3746fa --- /dev/null +++ b/book/contributions/jh_ps_nfdi4chem_ts/nfdi4chem_ts.md @@ -0,0 +1,139 @@ +# The NFDI4Chem Terminology Service + +## About NFDI4Chem + +[NFDI4Chem](https://www.nfdi4chem.de/) is the Chemistry Consortium in the National Research Data Infrastructure for Germany [NFDI](https://www.nfdi.de/?lang=en) +(German: **N**ationale **F**orschungs**D**aten**I**nfrastruktur), a project launched in 2018 and funded by the German +government to build a national research data infrastructure in Germany for a wide range of scientific disciplines. The +vision of NFDI4Chem is to digitize all key steps in chemical research to support scientists in their efforts to collect, +store, process, analyze, publish, and reuse research data. Actions to promote open science and research data management +(RDM) in line with the [FAIR data principles](https://www.go-fair.org/fair-principles/) are fundamental objectives of NFDI4Chem to provide the chemistry +community with a holistic approach to research data access. To this end, the overall goal is to develop and maintain +innovative and user-friendly services and novel scientific approaches based on the reuse of research data. NFDI4Chem +aims to represent all disciplines of chemistry in academia and therefore works closely with thematically related +NFDI consortia. + +## NFDI4Chem Terminology Service + +Research data is not just a collection of numbers or images in a scientific journal article, experimental section, or +supplementary information. To fully comprehend the deduction of results and enable the exploration of new data-driven, +interdisciplinary research questions, access to the raw data and knowledge of how it was generated, processed, and +analyzed is essential. Research data needs to be FAIR (**F**indable, **A**ccessible, **I**nteroperable, **R**eusable) +for both humans and machines. Achieving machine-usable FAIR research data requires extensive metadata annotation, which +describes the data and its context. To semantically describe research data, ontologies, taxonomies, terminologies, or +vocabularies can be used, playing an important role in creating semantically rich, discipline-specific metadata. In +addition, consensual definitions of entities are formed, ensuring conceptual alignment across domains, even if the +nomenclature of the individual domains is different. Please visit the +[NFDI4Chem Knowledge Base article about Ontologies](https://knowledgebase.nfdi4chem.de/knowledge_base/docs/ontology/?_highlight=terminol#sources-and-further-information) to learn more about this topic. + +Using terminologies for data annotation presents a significant challenge as it requires selecting the appropriate terms +from the available terminologies. To make an informed decision, one must either possess prior knowledge of the suitable +terminologies and terms for a specific scientific context and use case or acquire this knowledge by browsing the +available terminologies. A terminology service is a tool for browsing terminologies that aims to make them +understandable to humans. Therefore, it should provide detailed information about the terminologies and the terms +they contain. This is particularly important when comparing multiple terminologies that cover the same or overlapping +areas of knowledge. Terminology users must be able to recognize the semantic differences and similarities, as well +as the interdependencies between terminologies, to make informed decisions. Although there are many sophisticated +terminology services available, such as the [Basic Register of Thesauri, Ontologies & Classifications (BARTOC)](https://bartoc.org/) +or the [Ontology Lookup Service (OLS)](https://www.ebi.ac.uk/ols4), they contain a large selection of different terminologies and ontologies +that can overwhelm domain experts. + +The [NFDI4Chem Terminology Service (TS)](https://terminology.nfdi4chem.de/ts/) only indexes terminologies that are most relevant to chemistry using +various parameters such as subject area relevance and license to facilitate domain experts' selection. Additionally, +the NFDI4Chem TS provides functions that go beyond simple search and provision of ontologies, enabling collaborative +curation and management. To minimize the risk of losing track, users can list or write GitHub issues directly from +the TS. If an ontology is not maintained on GitHub or if additional contextual information is desired, users can +write notes at the ontology or term level to make open issues, recommendations, or insights more visible to +themselves and others. + +Using the example of "[mass concentration](https://terminology.nfdi4chem.de/ts/search?and=false&sorting=title&page=1&size=10&q=mass+concentration)", +a search in the NFDI4Chem TS yields almost 6,000 results. However, selecting the "Exact Match" option reduces the +number of results to six (see [Figure 1](images/nfdi4chem_ts_fig1.png)). + +![Figure 1: Screenshot of the result list obtained by searching for the term "mass concentration" in the NFDI4Chem TS with the "Exact Match" option.](images/nfdi4chem_ts_fig1.png) + +Of the six results, the option "Unit of Mass Concentration" is unsuitable as it only describes the unit. Therefore, +only five options remain, all of which are in principle suitable for semantically describing the term +"mass concentration". However, the term ["mass concentration" defined in the Chemical Methods Ontology (CHMO)](https://terminology.nfdi4chem.de/ts/ontologies/chmo/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FCHMO_0002821&obsoletes=false) +appears to be the most appropriate, as it is already used by another ontology, as indicated by the "Also in" field. +This example briefly illustrates how the NFDI4Chem TS assists chemists in identifying and selecting the most +appropriate terminology for their specific use case. + +In addition to the previously described use case of using the NFDI4Chem TS through the Graphical User Interface (GUI), +the TS can also be utilized by other services, such as [Electronic Lab Notebooks (ELNs)](https://knowledgebase.nfdi4chem.de/knowledge_base/docs/eln/), through the provided +[Application Programming Interface (API)](https://terminology.nfdi4chem.de/ts/api). In addition to the search functionality, the Terminology Service API +provides other functions for interacting with indexed ontologies. For example, the autocomplete or direct select +function can be used to populate forms or dropdown menus. The interactive [Swagger documentation](https://service.tib.eu/ts4tib/swagger-ui.html#/) documents all +available API functions and allows for live testing. + +To implement the search example above in Python using the NFDI4Chem TS API, follow these steps: + +``` +import requests +import urllib + +class ChemSearch: +def __init__(self, query, exactMatch, page, size): + self.query = urllib.parse.quote_plus(query) + self.exactMatch = exactMatch + self.rows = size + self.start = page + self.collection = "NFDI4CHEM" +self.base_api_url = "https://service.tib.eu/ts4tib/api/ + search?groupField=iri&&schema=collection" + search_result = [] + search_facet = [] + + + def run(self): + params = "&q={}&exact={}&start={}&rows={}&classification={}".format( +self.query, +self.exactMatch, +self.start, +self.rows, +self.collection) + searchUrl = self.base_api_url + params + result = requests.get(searchUrl) + if result.status_code == 200: + result = result.json() + self.search_result = result['response'] + self.search_facet = result['facet_counts']['facet_fields'] + return True + + return False + + + def get_number_of_found_result(self): + print(self.search_result['numFound']) + +return True + + + def get_results(self): + for res in self.search_result['docs']: + print("Iri: " + res['iri']) + print("short_form: " + res['short_form']) + print("ontology_name: " + res['ontology_name']) + print("Type: " + res['type']) + +return True + + + def get_facet(self): + for facet_name, facet_values in self.search_facet.items(): + print("Facet Field Name: " + facet_name) + for i in range(len(facet_values)): + if i % 2 == 0: + print("facet item: " + facet_values[i]) + else: + print("count: " + str(facet_values[i])) + + return True + +# running the above code +search = ChemSearch("mass concentration", True, 0, 40) +search.run() +search.get_number_of_found_result() +search.get_results() +search.get_facet() +``` diff --git a/book/contributions/jh_ps_nfdi4chem_ts/nfdi_ts.py b/book/contributions/jh_ps_nfdi4chem_ts/nfdi_ts.py new file mode 100644 index 00000000..47ccfabb --- /dev/null +++ b/book/contributions/jh_ps_nfdi4chem_ts/nfdi_ts.py @@ -0,0 +1,71 @@ +""" Code to create a NFDI4Chem Terminology Service search class based on the service API """ +# The code below has been formatted using the best practices for coding in Python +# found at https://peps.python.org/pep-0008/ "PEP 8 – Style Guide for Python Code" + +# import the Python packages needed to run this code +import requests +import urllib.parse + + +# +class ChemSearch: + def __init__(self, query, exactMatch, page, size): + """ Initialize (configure) parameters for the API search """ + self.query = urllib.parse.quote_plus(query) + self.exactMatch = exactMatch + self.rows = size + self.start = page + self.collection = "NFDI4CHEM" + self.base_api_url = "https://service.tib.eu/ts4tib/api/search?groupField = iri & & schema = collection" + self.search_result = [] + self.search_facet = [] + + def run(self): + """ Run the search on the terminology server and load results into the 'search_result' attribute """ + params = "&q={}&exact={}&start={}&rows={}&classification={}".format( + self.query, + self.exactMatch, + self.start, + self.rows, + self.collection) + + searchUrl = self.base_api_url + params + result = requests.get(searchUrl) + if result.status_code == 200: + result = result.json() + self.search_result = result['response'] + self.search_facet = result['facet_counts']['facet_fields'] + return True + return False + + def get_number_of_found_result(self): + """ Display the number results are found """ + print(self.search_result['numFound']) + return True + + def get_results(self): + """ Display the search result data """ + for res in self.search_result['docs']: + print("Iri: " + res['iri']) + print("short_form: " + res['short_form']) + print("ontology_name: " + res['ontology_name']) + print("Type: " + res['type']) + return True + + """ Its not clear what this code does """ + # def get_facet(self): + # for facet_name, facet_values in self.search_facet.items(): + # print("Facet Field Name: " + facet_name) + # for i in range(len(facet_values)): + # if i % 2 == 0: + # print("facet item: " + facet_values[i]) + # else: + # print("count: " + str(facet_values[i])) + # return True + + +# This code uses the Class developed above to run a search for the term "mass concentration" +search = ChemSearch("mass concentration", True, 0, 40) +search.run() +search.get_number_of_found_result() +search.get_results() diff --git a/requirements.txt b/requirements.txt index 01ea113d..6e5c12e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,14 +9,12 @@ asttokens==2.4.1 async-lru==2.0.4 attrs==23.1.0 Babel==2.14.0 -backcall==0.2.0 beautifulsoup4==4.12.2 bleach==6.1.0 certifi==2023.11.17 cffi==1.16.0 charset-normalizer==3.3.2 click==8.1.7 -colorama==0.4.6 comm==0.2.0 contourpy==1.2.0 cycler==0.12.1 @@ -25,28 +23,22 @@ debugpy==1.8.0 decorator==5.1.1 defusedxml==0.7.1 docutils==0.17.1 -entrypoints==0.4 -exceptiongroup==1.2.0 executing==2.0.1 fastjsonschema==2.19.0 fonttools==4.47.0 fqdn==1.5.1 ghp-import==2.1.0 -gitdb==4.0.10 -GitPython>=3.1.37 -greenlet==3.0.3 idna==3.6 imagesize==1.4.1 importlib-metadata==7.0.0 iniconfig==2.0.0 ipykernel==6.27.1 ipython==8.19.0 -ipython-genutils==0.2.0 ipywidgets==8.1.1 isoduration==20.11.0 jcamp==1.2.2 jedi==0.19.1 -Jinja2==3.1.2 +Jinja2==3.1.6 json5==0.9.14 jsonpointer==2.4 jsonschema==4.20.0 @@ -57,8 +49,6 @@ jupyter-cache==0.6.1 jupyter-console==6.6.3 jupyter-events==0.9.0 jupyter-lsp==2.2.2 -jupyter-server-mathjax==0.2.6 -jupyter-sphinx==0.3.2 jupyter_client==8.6.0 jupyter_core==5.5.1 jupyter_server==2.12.1 @@ -70,7 +60,6 @@ jupyterlab_server==2.25.2 kiwisolver==1.4.5 latexcodec==2.0.1 linkify-it-py==2.0.2 -lxml==4.9.2 markdown-it-py==2.2.0 MarkupSafe==2.1.3 matplotlib==3.8.2 @@ -80,10 +69,8 @@ mdurl==0.1.2 mistune==3.0.2 myst-nb==0.17.2 myst-parser==0.18.1 -nbclassic==0.4.8 nbclient==0.6.8 nbconvert==7.13.1 -nbdime==3.1.1 nbformat==5.9.2 nbmake==1.4.6 nest-asyncio==1.5.8 @@ -97,7 +84,6 @@ pandas==2.1.4 pandocfilters==1.5.0 parso==0.8.3 pexpect==4.9.0 -pickleshare==0.7.5 Pillow==10.2.0 platformdirs==4.1.0 pluggy==1.3.0 @@ -112,7 +98,6 @@ pycparser==2.21 pydata-sphinx-theme==0.13.3 Pygments==2.17.2 pyparsing==3.0.9 -pyrsistent==0.19.2 pytest==7.4.3 python-dateutil==2.8.2 python-json-logger==2.0.7 @@ -121,7 +106,6 @@ PyYAML==6.0.1 pyzmq==25.1.2 qtconsole==5.5.1 QtPy==2.4.1 -rdkit-pypi==2022.9.5 referencing==0.32.0 requests==2.31.0 rfc3339-validator==0.1.4 @@ -130,7 +114,6 @@ rpds-py==0.15.2 seaborn==0.13.0 Send2Trash==1.8.2 six==1.16.0 -smmap==5.0.0 sniffio==1.3.0 snowballstemmer==2.2.0 soupsieve==2.5 @@ -156,7 +139,6 @@ stack-data==0.6.3 tabulate==0.9.0 terminado==0.18.0 tinycss2==1.2.1 -tomli==2.0.1 tornado==6.4 traitlets==5.14.0 types-python-dateutil==2.8.19.14