diff --git a/Snakefile b/Snakefile
index 21527417..af6a18d4 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1,5 +1,6 @@
 configfile: "config.yaml"
 
+
 include: "src/snakefiles/datacollect.snakefile"
 include: "src/snakefiles/anatomy.snakefile"
 include: "src/snakefiles/cell_line.snakefile"
@@ -19,6 +20,7 @@ include: "src/snakefiles/duckdb.snakefile"
 include: "src/snakefiles/reports.snakefile"
 include: "src/snakefiles/exports.snakefile"
 
+
 # Some general imports.
 import shutil
 from src.snakefiles.util import write_done
@@ -28,6 +30,7 @@ import os
 
 os.environ["TMPDIR"] = config["tmp_directory"]
 
+
 # Top-level rules.
 rule all:
     input:
@@ -43,7 +46,7 @@ rule all:
         config["output_directory"] + "/kgx/done",
         config["output_directory"] + "/sapbert-training-data/done",
         # Store the config.yaml file used to produce the output.
-        config_file = "config.yaml",
+        config_file="config.yaml",
     output:
         x=config["output_directory"] + "/reports/all_done",
         output_config_file=config["output_directory"] + "/config.yaml",
diff --git a/input_data/parse_bad_mappings.py b/input_data/parse_bad_mappings.py
index 8705f924..0677b5ba 100644
--- a/input_data/parse_bad_mappings.py
+++ b/input_data/parse_bad_mappings.py
@@ -1,10 +1,10 @@
-from collections import defaultdict
 from ast import literal_eval
+from collections import defaultdict
 
 
 def read_bad_hp_mappings(fn):
     drops = defaultdict(set)
-    with open(fn, "r") as infile:
+    with open(fn) as infile:
         for line in infile:
             if line.startswith("-"):
                 continue
@@ -12,7 +12,7 @@ def read_bad_hp_mappings(fn):
             hps = x[0]
             commaindex = hps.index(",")
             curie = hps[1:commaindex]
-            name = hps[commaindex + 1 : -1]
+            # name = hps[commaindex + 1 : -1]
             badset = literal_eval(x[1])
             drops[curie].update(badset)
     return drops
diff --git a/pyproject.toml b/pyproject.toml
index e5daef09..b8dfcd32 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,13 +48,31 @@ apybiomart = { git = "https://github.com/gaurav/apybiomart.git", rev = "change-c
 
 [dependency-groups]
 dev = [
+    "ruff>=0.14.9",
     "snakefmt>=0.11.2",
 ]
 
 # Linting/formatting configuration
 [tool.ruff]
-line-length = 160
+line-length = 120
 
 [tool.snakefmt]
-line_length = 160
+line_length = 120
 include = '\.snakefile$|^Snakefile'
+
+[tool.ruff.lint]
+# Enable all rules that ruff format would normally apply
+select = [
+    "E",   # pycodestyle errors
+    "F",   # pyflakes
+    "I",   # isort (import sorting)
+    "UP",  # pyupgrade
+]
+
+# Optional but common
+ignore = [
+    "E501",  # let Ruff handle wrapping consistently
+]
+
+fixable = ["ALL"]
+unfixable = []
diff --git a/releases/summaries/compare.py b/releases/summaries/compare.py
index 6223a668..59e7cf6f 100644
--- a/releases/summaries/compare.py
+++ b/releases/summaries/compare.py
@@ -5,10 +5,10 @@
 file1 = "2024mar24.json"
 file2 = "2024jul13.json"
 
-with open(file1, "r") as f:
+with open(file1) as f:
     summary1 = json.load(f)
 
-with open(file2, "r") as f:
+with open(file2) as f:
     summary2 = json.load(f)
 
 
diff --git a/src/assess_compendia.py b/src/assess_compendia.py
index b15452a9..90ae4df4 100644
--- a/src/assess_compendia.py
+++ b/src/assess_compendia.py
@@ -1,7 +1,9 @@
 import os
+from collections import defaultdict
 from os import path
+
 import jsonlines
-from collections import defaultdict
+
 from src.util import Text
 
 
@@ -11,7 +13,7 @@ def assess_completeness(input_dir, compendia, reportfile):
     id_files = os.listdir(input_dir)
     all_identifiers = set()
     for idf in id_files:
-        with open(path.join(input_dir, idf), "r") as inf:
+        with open(path.join(input_dir, idf)) as inf:
             for line in inf:
                 x = line.strip().split("\t")[0]
                 all_identifiers.add(x)
@@ -23,11 +25,11 @@ def assess_completeness(input_dir, compendia, reportfile):
                 for identifier in ids:
                     all_identifiers.discard(identifier)
     with open(reportfile, "w") as outf:
-        l = list(all_identifiers)
-        l.sort()
-        print(f"Missing identifiers: {len(l)}\n")
-        outf.write(f"Missing identifiers: {len(l)}\n")
-        for missing_id in l:
+        list_all_identifiers = list(all_identifiers)
+        list_all_identifiers.sort()
+        print(f"Missing identifiers: {len(list_all_identifiers)}\n")
+        outf.write(f"Missing identifiers: {len(list_all_identifiers)}\n")
+        for missing_id in list_all_identifiers:
             outf.write(f"{missing_id}\n")
 
 
diff --git a/src/babel_utils.py b/src/babel_utils.py
index ccd1420b..19981327 100644
--- a/src/babel_utils.py
+++ b/src/babel_utils.py
@@ -1,27 +1,26 @@
+import gzip
+import os
+import sqlite3
 import subprocess
+import time
 import traceback
+import urllib
+from collections import defaultdict
+from datetime import datetime, timedelta
 from enum import Enum
 from ftplib import FTP
 from io import BytesIO
-import gzip
-from datetime import timedelta
-import time
 from pathlib import Path
 
-import requests
-import os
-import urllib
 import jsonlines
+import requests
 from humanfriendly import format_timespan
 
-from src.metadata.provenance import write_combined_metadata
-from src.node import NodeFactory, SynonymFactory, DescriptionFactory, InformationContentFactory, TaxonFactory
-from src.properties import PropertyList, HAS_ALTERNATIVE_ID
-from src.util import Text, get_config, get_memory_usage_summary, get_logger
 from src.LabeledID import LabeledID
-from collections import defaultdict
-import sqlite3
-from typing import List, Tuple
+from src.metadata.provenance import write_combined_metadata
+from src.node import DescriptionFactory, InformationContentFactory, NodeFactory, SynonymFactory, TaxonFactory
+from src.properties import HAS_ALTERNATIVE_ID, PropertyList
+from src.util import Text, get_config, get_logger, get_memory_usage_summary
 
 # Configuration items
 WRITE_COMPENDIUM_LOG_EVERY_X_CLIQUES = 1_000_000
@@ -144,7 +143,7 @@ def __init__(self, delta_ms):
         self.delta = timedelta(milliseconds=delta_ms)
 
     def get(self, url):
-        now = dt.now()
+        now = datetime.now()
         throttled = False
         if self.last_time is not None:
             cdelta = now - self.last_time
@@ -152,7 +151,7 @@ def get(self, url):
                 waittime = self.delta - cdelta
                 time.sleep(waittime.microseconds / 1e6)
                 throttled = True
-        self.last_time = dt.now()
+        self.last_time = datetime.now()
         response = requests.get(url)
         return response, throttled
 
@@ -194,7 +193,6 @@ def pull_via_urllib(url: str, in_file_name: str, decompress=True, subpath=None,
     """
     # Everything goes in downloads
     download_dir = get_config()["download_directory"]
-    working_dir = download_dir
 
     # get the (local) download file name, derived from the input file name
     if subpath is None:
@@ -589,11 +587,11 @@ def write_compendium(metadata_yamls, synonym_list, ofname, node_type, labels=Non
                     possible_labels = map(lambda identifier: identifier.get("label", ""), node["identifiers"])
 
                 # Step 2. Filter out any suspicious labels.
-                filtered_possible_labels = [l for l in possible_labels if l]  # Ignore blank or empty names.
+                filtered_possible_labels = [label for label in possible_labels if label]  # Ignore blank or empty names.
 
                 # Step 3. Filter out labels longer than config['demote_labels_longer_than'], but only if there is at
                 # least one label shorter than this limit.
-                labels_shorter_than_limit = [l for l in filtered_possible_labels if l and len(l) <= config["demote_labels_longer_than"]]
+                labels_shorter_than_limit = [label for label in filtered_possible_labels if label and len(label) <= config["demote_labels_longer_than"]]
                 if labels_shorter_than_limit:
                     filtered_possible_labels = labels_shorter_than_limit
 
@@ -782,7 +780,7 @@ def glom(conc_set, newgroups, unique_prefixes=["INCHIKEY"], pref="HP", close={})
     shit_prefixes = set(["KEGG", "PUBCHEM"])
     test_id = "xUBERON:0002262"
     debugit = False
-    excised = set()
+    # excised = set()
     for xgroup in newgroups:
         if isinstance(xgroup, frozenset):
             group = set(xgroup)
@@ -802,7 +800,7 @@ def glom(conc_set, newgroups, unique_prefixes=["INCHIKEY"], pref="HP", close={})
         existing_sets_w_x = [(conc_set[x], x) for x in group if x in conc_set]
         # All of these sets are now going to be combined through the equivalence of our new set.
         existing_sets = [es[0] for es in existing_sets_w_x]
-        x = [es[1] for es in existing_sets_w_x]
+        # x = [es[1] for es in existing_sets_w_x]
         newset = set().union(*existing_sets)
         if debugit:
             print("merges:", existing_sets)
@@ -830,7 +828,7 @@ def glom(conc_set, newgroups, unique_prefixes=["INCHIKEY"], pref="HP", close={})
         for up in unique_prefixes:
             if test_id in group:
                 print("up?", up)
-            idents = [e if type(e) == str else e.identifier for e in newset]
+            idents = [e if isinstance(e, str) else e.identifier for e in newset]
             if len(set([e for e in idents if (e.split(":")[0] == up)])) > 1:
                 bad += 1
                 setok = False
@@ -840,18 +838,15 @@ def glom(conc_set, newgroups, unique_prefixes=["INCHIKEY"], pref="HP", close={})
                     wrote.add(fs)
                 for gel in group:
                     if Text.get_prefix_or_none(gel) == pref:
-                        killer = gel
+                        # killer = gel
+                        pass
                 # for preset in wrote:
                 #    print(f'{killer}\t{set(group).intersection(preset)}\t{preset}\n')
                 # print('------------')
         NPC = sum(1 for s in newset if s.startswith("PUBCHEM.COMPOUND:"))
         if ("PUBCHEM.COMPOUND:3100" in newset) and (NPC > 3):
             if debugit:
-                l = sorted(list(newset))
-                print("bad")
-                for li in l:
-                    print(li)
-                exit()
+                raise ValueError(f"Debugging information: {sorted(list(newset))}")
         if not setok:
             # Our new group created a new set that merged stuff we didn't want to merge.
             # Previously we did a lot of fooling around at this point.  But now we're just going to say, I have a
@@ -894,7 +889,7 @@ def glom(conc_set, newgroups, unique_prefixes=["INCHIKEY"], pref="HP", close={})
         # Now check the 'close' dictionary to see if we've accidentally gotten to a close match becoming an exact match
         setok = True
         for cpref, closedict in close.items():
-            idents = set([e if type(e) == str else e.identifier for e in newset])
+            idents = set([e if isinstance(e, str) else e.identifier for e in newset])
             prefidents = [e for e in idents if e.startswith(cpref)]
             for pident in prefidents:
                 for cd in closedict[pident]:
@@ -978,7 +973,7 @@ def read_identifier_file(infile):
     a hint to the normalizer about the proper biolink type for this entity."""
     types = {}
     identifiers = list()
-    with open(infile, "r") as inf:
+    with open(infile) as inf:
         for line in inf:
             x = line.strip().split("\t")
             identifiers.append((x[0],))
@@ -987,7 +982,7 @@ def read_identifier_file(infile):
     return identifiers, types
 
 
-def remove_overused_xrefs(pairlist: List[Tuple], bothways: bool = False):
+def remove_overused_xrefs(pairlist: list[tuple], bothways: bool = False):
     """Given a list of tuples (id1, id2) meaning id1-[xref]->id2, remove any id2 that are associated with more
     than one id1.  The idea is that if e.g. id1 is made up of UBERONS and 2 of those have an xref to say a UMLS
     then it doesn't mean that all of those should be identified.  We don't really know what it means, so remove it."""
diff --git a/src/createcompendia/anatomy.py b/src/createcompendia/anatomy.py
index e8dcf7a0..0c043020 100644
--- a/src/createcompendia/anatomy.py
+++ b/src/createcompendia/anatomy.py
@@ -1,16 +1,16 @@
 from collections import defaultdict
+
 import requests
 
+import src.datahandlers.mesh as mesh
 import src.datahandlers.obo as obo
+import src.datahandlers.umls as umls
+from src.babel_utils import get_prefixes, glom, read_identifier_file, remove_overused_xrefs, write_compendium
+from src.categories import ANATOMICAL_ENTITY, CELL, CELLULAR_COMPONENT, GROSS_ANATOMICAL_STRUCTURE
 from src.metadata.provenance import write_concord_metadata
-from src.util import Text
-
-from src.prefixes import MESH, NCIT, CL, GO, UBERON, SNOMEDCT, WIKIDATA, UMLS, FMA
-from src.categories import ANATOMICAL_ENTITY, GROSS_ANATOMICAL_STRUCTURE, CELL, CELLULAR_COMPONENT
+from src.prefixes import CL, FMA, GO, MESH, NCIT, SNOMEDCT, UBERON, UMLS, WIKIDATA
 from src.ubergraph import build_sets
-from src.babel_utils import write_compendium, glom, get_prefixes, read_identifier_file, remove_overused_xrefs
-import src.datahandlers.umls as umls
-import src.datahandlers.mesh as mesh
+from src.util import Text
 
 
 def remove_overused_xrefs_dict(kv):
@@ -190,7 +190,7 @@ def build_compendia(concordances, metadata_yamls, identifiers, icrdf_filename):
         # them added. So we want to limit concordances to terms that are already in the dicts. But that's ONLY for the
         # UMLS concord.  We trust the others to retrieve decent identifiers.
         bs = frozenset([UMLS, GO])
-        with open(infile, "r") as inf:
+        with open(infile) as inf:
             for line in inf:
                 x = line.strip().split("\t")
                 prefixes = frozenset([xi.split(":")[0] for xi in x[0:3:2]])  # leave out the predicate
@@ -202,7 +202,7 @@ def build_compendia(concordances, metadata_yamls, identifiers, icrdf_filename):
                             use = False
                     if not use:
                         continue
-                pairs.append(([x[0], x[2]]))
+                pairs.append([x[0], x[2]])
         newpairs = remove_overused_xrefs(pairs)
         setpairs = [set(x) for x in newpairs]
         glom(dicts, setpairs, unique_prefixes=[UBERON, GO])
diff --git a/src/createcompendia/cell_line.py b/src/createcompendia/cell_line.py
index e4e55a5a..67782c32 100644
--- a/src/createcompendia/cell_line.py
+++ b/src/createcompendia/cell_line.py
@@ -1,7 +1,6 @@
+from src.babel_utils import glom, read_identifier_file, write_compendium
 from src.categories import CELL_LINE
 
-from src.babel_utils import read_identifier_file, glom, write_compendium
-
 
 def build_compendia(ifile, metadata_yamls, icrdf_filename):
     """:identifiers: a list of files from which to read identifiers and optional categories"""
diff --git a/src/createcompendia/chemicals.py b/src/createcompendia/chemicals.py
index f55ead18..628ec0d9 100644
--- a/src/createcompendia/chemicals.py
+++ b/src/createcompendia/chemicals.py
@@ -1,3 +1,5 @@
+import ast
+import gzip
 import logging
 import os
 from collections import defaultdict
@@ -5,22 +7,39 @@
 
 import jsonlines
 import requests
-import ast
-import gzip
-
-from src.properties import Property, HAS_ALTERNATIVE_ID
-from src.metadata.provenance import write_concord_metadata, write_combined_metadata
-from src.ubergraph import UberGraph
-from src.prefixes import MESH, CHEBI, UNII, DRUGBANK, INCHIKEY, PUBCHEMCOMPOUND, GTOPDB, KEGGCOMPOUND, DRUGCENTRAL, CHEMBLCOMPOUND, UMLS, RXCUI
-from src.categories import MOLECULAR_MIXTURE, SMALL_MOLECULE, CHEMICAL_ENTITY, POLYPEPTIDE, COMPLEX_MOLECULAR_MIXTURE, CHEMICAL_MIXTURE, DRUG
-from src.sdfreader import read_sdf
-
-from src.datahandlers.unichem import data_sources as unichem_data_sources
-from src.babel_utils import write_compendium, glom, get_prefixes, read_identifier_file, remove_overused_xrefs
 
 import src.datahandlers.mesh as mesh
 import src.datahandlers.umls as umls
-from src.util import get_memory_usage_summary, Text, get_logger
+from src.babel_utils import get_prefixes, glom, read_identifier_file, remove_overused_xrefs, write_compendium
+from src.categories import (
+    CHEMICAL_ENTITY,
+    CHEMICAL_MIXTURE,
+    COMPLEX_MOLECULAR_MIXTURE,
+    DRUG,
+    MOLECULAR_MIXTURE,
+    POLYPEPTIDE,
+    SMALL_MOLECULE,
+)
+from src.datahandlers.unichem import data_sources as unichem_data_sources
+from src.metadata.provenance import write_combined_metadata, write_concord_metadata
+from src.prefixes import (
+    CHEBI,
+    CHEMBLCOMPOUND,
+    DRUGBANK,
+    DRUGCENTRAL,
+    GTOPDB,
+    INCHIKEY,
+    KEGGCOMPOUND,
+    MESH,
+    PUBCHEMCOMPOUND,
+    RXCUI,
+    UMLS,
+    UNII,
+)
+from src.properties import HAS_ALTERNATIVE_ID, Property
+from src.sdfreader import read_sdf
+from src.ubergraph import UberGraph
+from src.util import Text, get_logger, get_memory_usage_summary
 
 logger = get_logger(__name__)
 
@@ -96,7 +115,7 @@ def build_chemical_rxnorm_relationships(conso, idfile, outfile, metadata_yaml):
 def write_pubchem_ids(labelfile, smilesfile, outfile):
     # Trying to be memory efficient here.  We could just ingest the whole smilesfile which would make this code easier
     # but since they're already sorted, let's give it a shot
-    with open(labelfile, "r") as inlabels, gzip.open(smilesfile, "rt", encoding="utf-8") as insmiles, open(outfile, "w") as outf:
+    with open(labelfile) as inlabels, gzip.open(smilesfile, "rt", encoding="utf-8") as insmiles, open(outfile, "w") as outf:
         sn = -1
         flag_file_ended = False
         for labelline in inlabels:
@@ -196,7 +215,7 @@ def write_chebi_ids(outfile):
 def write_unii_ids(infile, outfile):
     """UNII contains a bunch of junk like leaves.   We are going to try to clean it a bit to get things
     that are actually chemicals.  In biolink 2.0 we cn revisit exactly what happens here."""
-    with open(infile, "r", encoding="windows-1252") as inf, open(outfile, "w") as outf:
+    with open(infile, encoding="windows-1252") as inf, open(outfile, "w") as outf:
         h = inf.readline().strip().split("\t")
         bad_cols = ["NCBI", "PLANTS", "GRIN", "MPNS"]
         bad_colnos = [h.index(bc) for bc in bad_cols]
@@ -220,7 +239,7 @@ def write_drugbank_ids(infile, outfile):
     drugbank_id = "2"
     assert unichem_data_sources[drugbank_id] == DRUGBANK
     written = set()
-    with open(infile, "r") as inf, open(outfile, "w") as outf:
+    with open(infile) as inf, open(outfile, "w") as outf:
         header_line = inf.readline()
         assert header_line == "UCI\tSRC_ID\tSRC_COMPOUND_ID\tASSIGNMENT\n", f"Incorrect header line in {infile}: {header_line}"
         for line in inf:
@@ -235,11 +254,11 @@ def write_drugbank_ids(infile, outfile):
 
 def write_chemical_ids_from_labels_and_smiles(labelfile, smifile, outfile):
     smiles = {}
-    with open(smifile, "r") as inf:
+    with open(smifile) as inf:
         for line in inf:
             x = line.strip().split("\t")
             smiles[x[0]] = x[1]
-    with open(labelfile, "r") as inf, open(outfile, "w") as outf:
+    with open(labelfile) as inf, open(outfile, "w") as outf:
         for line in inf:
             hmdbid = line.split("\t")[0]
             if hmdbid in smiles:
@@ -252,7 +271,7 @@ def write_chemical_ids_from_labels_and_smiles(labelfile, smifile, outfile):
 def parse_smifile(infile, outfile, smicol, idcol, pref, stripquotes=False):
     idcol_index = None
     smicol_index = None
-    with open(infile, "r") as inf, open(outfile, "w") as outf:
+    with open(infile) as inf, open(outfile, "w") as outf:
         for line in inf:
             if line.startswith('"# GtoPdb Version'):
                 # Version line! Skip.
@@ -318,7 +337,7 @@ def parse_smifile(infile, outfile, smicol, idcol, pref, stripquotes=False):
 def write_drugcentral_ids(infile, outfile):
     smicol = 1
     idcol = 0
-    with open(infile, "r") as inf, open(outfile, "w") as outf:
+    with open(infile) as inf, open(outfile, "w") as outf:
         for line in inf:
             x = line.strip().split("\t")
             if x[smicol] == "None":
@@ -338,7 +357,7 @@ def write_unichem_concords(structfile, reffile, outdir):
         concname = f"{outdir}/UNICHEM_{name}"
         print(concname)
         concfiles[num] = open(concname, "w")
-    with open(reffile, "rt") as inf:
+    with open(reffile) as inf:
         header_line = inf.readline()
         assert header_line == "UCI\tSRC_ID\tSRC_COMPOUND_ID\tASSIGNMENT\n", f"Incorrect header line in {reffile}: {header_line}"
         for line in inf:
@@ -380,10 +399,10 @@ def combine_unichem(concordances, output):
         # but out of paranoia we'll double-check that.
         prefixes_in_file = set()
 
-        with open(infile, "r") as inf:
+        with open(infile) as inf:
             for line in inf:
                 x = line.strip().split("\t")
-                pairs.append(([x[0], x[2]]))
+                pairs.append([x[0], x[2]])
                 # Get the prefix from the first row to determine if we need to remove overused xrefs
                 prefixes_in_file.add(Text.get_prefix(x[0]))
 
@@ -428,7 +447,7 @@ def is_cas(thing):
 
 
 def make_pubchem_cas_concord(pubchemsynonyms, outfile, metadata_yaml):
-    with open(pubchemsynonyms, "r") as inf, open(outfile, "w") as outf:
+    with open(pubchemsynonyms) as inf, open(outfile, "w") as outf:
         for line in inf:
             x = line.strip().split("\t")
             if is_cas(x[1]):
@@ -449,7 +468,7 @@ def make_pubchem_mesh_concord(pubcheminput, meshlabels, outfile, metadata_yaml):
     # MESH:D014867    Water
     # MESH:M0022883   Water
     # but we only want the ones that are MESH:D... or MESH:C....
-    with open(meshlabels, "r") as inf:
+    with open(meshlabels) as inf:
         for line in inf:
             x = line.strip().split("\t")
             if x[0].split(":")[-1][0] in ["C", "D"]:
@@ -458,7 +477,7 @@ def make_pubchem_mesh_concord(pubcheminput, meshlabels, outfile, metadata_yaml):
     # first mapping is the 'best' i.e. the one most frequently reported.
     # We will only use the first one
     used_pubchem = set()
-    with open(pubcheminput, "r") as inf, open(outfile, "w") as outf:
+    with open(pubcheminput) as inf, open(outfile, "w") as outf:
         for line in inf:
             x = line.strip().split("\t")  # x[0] = puchemid (no prefix), x[1] = mesh label
             if x[0] in used_pubchem:
@@ -494,7 +513,7 @@ def build_drugcentral_relations(infile, outfile, metadata_yaml):
     external_id_col = 1
     external_ns_col = 2
     drugcentral_id_col = 3
-    with open(infile, "r") as inf, open(outfile, "w") as outf:
+    with open(infile) as inf, open(outfile, "w") as outf:
         for line in inf:
             parts = line.strip().split("\t")
             # print(parts)
@@ -515,7 +534,7 @@ def build_drugcentral_relations(infile, outfile, metadata_yaml):
 
 
 def make_gtopdb_relations(infile, outfile, metadata_yaml):
-    with open(infile, "r") as inf, open(outfile, "w") as outf:
+    with open(infile) as inf, open(outfile, "w") as outf:
         h = inf.readline()
         # We might have a header/version line. If so, skip to the next line.
         if h.startswith('"# GtoPdb Version'):
@@ -551,7 +570,7 @@ def make_chebi_relations(sdf, dbx, outfile, propfile_gz, metadata_yaml):
     # CHEBIs in the sdf by definition have structure (the sdf is a structure file)
     structured_chebi = set(chebi_sdf_dat.keys())
     # READ xrefs
-    with open(dbx, "r") as inf:
+    with open(dbx) as inf:
         dbxdata = inf.read()
     kk = "keggcompounddatabaselinks"
     pk = "pubchemdatabaselinks"
@@ -613,7 +632,7 @@ def make_chebi_relations(sdf, dbx, outfile, propfile_gz, metadata_yaml):
 
 def get_mesh_relationships(mesh_id_file, cas_out, unii_out, cas_metadata, unii_metadata):
     meshes = set()
-    with open(mesh_id_file, "r") as inf:
+    with open(mesh_id_file) as inf:
         for line in inf:
             x = line.split("\t")
             meshes.add(x[0])
@@ -707,10 +726,10 @@ def build_untyped_compendia(concordances, identifiers, unichem_partial, untyped_
         print(infile)
         print("loading", infile)
         pairs = []
-        with open(infile, "r") as inf:
+        with open(infile) as inf:
             for line in inf:
                 x = line.strip().split("\t")
-                pairs.append(([x[0], x[2]]))
+                pairs.append([x[0], x[2]])
         p = False
         if DRUGCENTRAL in [n.split(":")[0] for n in pairs[0]]:
             p = True
@@ -753,14 +772,14 @@ def build_untyped_compendia(concordances, identifiers, unichem_partial, untyped_
 
 def build_compendia(type_file, untyped_compendia_file, properties_jsonl_gz_files, metadata_yamls, icrdf_filename):
     types = {}
-    with open(type_file, "r") as inf:
+    with open(type_file) as inf:
         for line in inf:
             x = line.strip().split("\t")
             types[x[0]] = x[1]
     logger.info(f"Loaded {len(types)} types from {type_file}: {get_memory_usage_summary()}")
 
     untyped_sets = set()
-    with open(untyped_compendia_file, "r") as inf:
+    with open(untyped_compendia_file) as inf:
         for line in inf:
             s = ast.literal_eval(line.strip())
             untyped_sets.add(frozenset(s))
diff --git a/src/createcompendia/diseasephenotype.py b/src/createcompendia/diseasephenotype.py
index e5aaa661..a2680041 100644
--- a/src/createcompendia/diseasephenotype.py
+++ b/src/createcompendia/diseasephenotype.py
@@ -1,18 +1,16 @@
-from os import path
 from collections import defaultdict
+from os import path
 
+import src.datahandlers.doid as doid
+import src.datahandlers.efo as efo
+import src.datahandlers.mesh as mesh
 import src.datahandlers.obo as obo
-from src.metadata.provenance import write_concord_metadata
-
-from src.prefixes import MESH, NCIT, MONDO, OMIM, HP, SNOMEDCT, MEDDRA, ORPHANET, ICD0, ICD9, ICD10, UMLS, KEGGDISEASE
+import src.datahandlers.umls as umls
+from src.babel_utils import get_prefixes, glom, read_identifier_file, remove_overused_xrefs, write_compendium
 from src.categories import DISEASE, PHENOTYPIC_FEATURE
+from src.metadata.provenance import write_concord_metadata
+from src.prefixes import HP, ICD0, ICD9, ICD10, KEGGDISEASE, MEDDRA, MESH, MONDO, NCIT, OMIM, ORPHANET, SNOMEDCT, UMLS
 from src.ubergraph import build_sets
-import src.datahandlers.umls as umls
-import src.datahandlers.doid as doid
-import src.datahandlers.mesh as mesh
-import src.datahandlers.efo as efo
-
-from src.babel_utils import read_identifier_file, glom, remove_overused_xrefs, get_prefixes, write_compendium
 
 
 def write_obo_ids(irisandtypes, outfile, exclude=[]):
@@ -47,7 +45,7 @@ def write_hp_ids(outfile):
 
 
 def write_omim_ids(infile, outfile):
-    with open(infile, "r") as inf, open(outfile, "w") as outf:
+    with open(infile) as inf, open(outfile, "w") as outf:
         for line in inf:
             if line.startswith("#"):
                 continue
@@ -89,7 +87,7 @@ def write_mesh_ids(outfile):
 
 def write_umls_ids(mrsty, outfile, badumlsfile):
     badumls = set()
-    with open(badumlsfile, "r") as inf:
+    with open(badumlsfile) as inf:
         for line in inf:
             if line.startswith("#"):
                 continue
@@ -181,7 +179,7 @@ def build_disease_umls_relationships(mrconso, idfile, outfile, omimfile, ncitfil
     good_ids = {}
     for prefix, prefixidfile in [(OMIM, omimfile), (NCIT, ncitfile)]:
         good_ids[prefix] = set()
-        with open(prefixidfile, "r") as inf:
+        with open(prefixidfile) as inf:
             for line in inf:
                 x = line.split()[0]
                 good_ids[prefix].add(x)
@@ -229,7 +227,7 @@ def build_compendium(concordances, metadata_yamls, identifiers, mondoclose, badx
         glom(dicts, new_identifiers, unique_prefixes=[MONDO, HP])
         types.update(new_types)
     # Load close Mondos
-    with open(mondoclose, "r") as inf:
+    with open(mondoclose) as inf:
         close_mondos = defaultdict(set)
         for line in inf:
             x = tuple(line.strip().split("\t"))
@@ -245,7 +243,7 @@ def build_compendium(concordances, metadata_yamls, identifiers, mondoclose, badx
         else:
             print("no bad pairs", pref)
             bad_pairs = set()
-        with open(infile, "r") as inf:
+        with open(infile) as inf:
             for line in inf:
                 stuff = line.strip().split("\t")
                 if len(stuff) != 3:
@@ -316,7 +314,7 @@ def create_typed_sets(eqsets, types):
 
 def read_badxrefs(fn):
     morebad = set()
-    with open(fn, "r") as inf:
+    with open(fn) as inf:
         for line in inf:
             if line.startswith("#"):
                 continue
diff --git a/src/createcompendia/drugchemical.py b/src/createcompendia/drugchemical.py
index daa39f2c..dabf65ed 100644
--- a/src/createcompendia/drugchemical.py
+++ b/src/createcompendia/drugchemical.py
@@ -1,10 +1,15 @@
 import csv
+import json
+import logging
 import sys
 import time
+from collections import defaultdict
 
 import jsonlines
 from humanfriendly import format_timespan
 
+from src.babel_utils import get_numerical_curie_suffix, glom
+
 # from src.categories import (
 #     SMALL_MOLECULE,
 #     POLYPEPTIDE,
@@ -23,13 +28,8 @@
 from src.categories import CHEMICAL_ENTITY
 from src.metadata.provenance import write_combined_metadata, write_concord_metadata
 from src.node import InformationContentFactory
-from src.prefixes import RXCUI, PUBCHEMCOMPOUND, UMLS
-from src.babel_utils import glom, get_numerical_curie_suffix
-from collections import defaultdict
-import json
-
-import logging
-from src.util import LoggingUtil, get_config, get_memory_usage_summary, get_biolink_model_toolkit, Text
+from src.prefixes import PUBCHEMCOMPOUND, RXCUI, UMLS
+from src.util import LoggingUtil, Text, get_biolink_model_toolkit, get_config, get_memory_usage_summary
 
 logger = LoggingUtil.init_logging(__name__, level=logging.INFO)
 
@@ -138,7 +138,7 @@ def get_aui_to_cui(consofile):
     aui_to_cui = {}
     sdui_to_cui = defaultdict(set)
     # consofile = os.path.join('input_data', 'private', "RXNCONSO.RRF")
-    with open(consofile, "r") as inf:
+    with open(consofile) as inf:
         for line in inf:
             x = line.strip().split("|")
             aui = x[7]
@@ -229,7 +229,7 @@ def build_rxnorm_relationships(conso, relfile, outfile, metadata_yaml):
     one_to_one_relations = {}
     # one_to_one_relations = {"has_tradename": {"subject": defaultdict(set),
     #                                          "object": defaultdict(set)}}
-    with open(relfile, "r") as inf, open(outfile, "w") as outf:
+    with open(relfile) as inf, open(outfile, "w") as outf:
         for line in inf:
             x = line.strip().split("|")
             # UMLS always has the CUI in it, while RXNORM does not.
@@ -274,7 +274,7 @@ def build_rxnorm_relationships(conso, relfile, outfile, metadata_yaml):
 
 def load_cliques_containing_rxcui(compendium):
     rx_to_clique = {}
-    with open(compendium, "r") as infile:
+    with open(compendium) as infile:
         for line in infile:
             if RXCUI not in line:
                 continue
@@ -287,7 +287,7 @@ def load_cliques_containing_rxcui(compendium):
 
 
 def build_pubchem_relationships(infile, outfile, metadata_yaml):
-    with open(infile, "r") as inf:
+    with open(infile) as inf:
         document = json.load(inf)
     with open(outfile, "w") as outf:
         for annotation in document["Annotations"]["Annotation"]:
@@ -341,7 +341,7 @@ def build_conflation(
     manual_concords_curies = set()
     manual_concords_predicate_counts = defaultdict(int)
     manual_concords_curie_prefix_counts = defaultdict(int)
-    with open(manual_concord_filename, "r") as manualf:
+    with open(manual_concord_filename) as manualf:
         csv_reader = csv.DictReader(manualf, dialect=csv.excel_tab)
         for row in csv_reader:
             # We're only interested in two fields, so you can add additional files ('comment', 'notes', etc.) as needed.
@@ -364,7 +364,7 @@ def build_conflation(
     type_for_preferred_curie = {}
     clique_for_preferred_curie = {}
     for chemical_compendium in chemical_compendia:
-        with open(chemical_compendium, "r") as compendiumf:
+        with open(chemical_compendium) as compendiumf:
             logger.info(f"Loading {chemical_compendium}: {get_memory_usage_summary()}")
             for line in compendiumf:
                 clique = json.loads(line)
@@ -388,7 +388,7 @@ def build_conflation(
 
     pairs = []
     for concfile in [rxn_concord, umls_concord]:
-        with open(concfile, "r") as infile:
+        with open(concfile) as infile:
             for line in infile:
                 x = line.strip().split("\t")
                 subject = x[0]
@@ -417,15 +417,17 @@ def build_conflation(
     pairs.extend(manual_concords)
 
     # We've had some issues with non-chemical types getting conflated, so we filter those out here.
-    biolink_model_toolkit = get_biolink_model_toolkit(config['biolink_version'])
-    biolink_chemical_types = set(biolink_model_toolkit.get_descendants(
-        CHEMICAL_ENTITY,
-        reflexive=True,
-        formatted=True,
-        mixin=True,
-    ))
+    biolink_model_toolkit = get_biolink_model_toolkit(config["biolink_version"])
+    biolink_chemical_types = set(
+        biolink_model_toolkit.get_descendants(
+            CHEMICAL_ENTITY,
+            reflexive=True,
+            formatted=True,
+            mixin=True,
+        )
+    )
     logging.info(f"Filtering RxCUI pairs to those in these Biolink chemical types: {sorted(biolink_chemical_types)}")
-    with open(pubchem_rxn_concord, "r") as infile:
+    with open(pubchem_rxn_concord) as infile:
         for line in infile:
             x = line.strip().split("\t")
             subject = x[0]
@@ -493,9 +495,9 @@ def build_conflation(
     #
     # So, instead, I'm going to group them by prefix and then to sort it using the ChemicalEntity
     # prefix sort order.
-    biolink_model_toolkit = get_biolink_model_toolkit(config['biolink_version'])
+    biolink_model_toolkit = get_biolink_model_toolkit(config["biolink_version"])
     biolink_chemical_entity = biolink_model_toolkit.get_element(CHEMICAL_ENTITY)
-    conflation_prefix_order = biolink_chemical_entity['id_prefixes']
+    conflation_prefix_order = biolink_chemical_entity["id_prefixes"]
     if not conflation_prefix_order:
         raise RuntimeError(f"Biolink model {config['biolink_version']} doesn't have a ChemicalEntity prefix order: {biolink_chemical_entity}")
 
@@ -628,9 +630,11 @@ def build_conflation(
             # The final conflation list won't match the initial list only if some of the Biolink types weren't
             # chemical types, and so were skipped that way.
             if set(final_conflation_id_list) != set(normalized_conflation_id_list):
-                logger.warning("Final conflation ID list does not match the normalized conflation ID list:\n" +
-                               f" - Final conflation ID list: {sorted(final_conflation_id_list)}\n" +
-                               f" - Normalized conflation ID list: {sorted(normalized_conflation_id_list)}")
+                logger.warning(
+                    "Final conflation ID list does not match the normalized conflation ID list:\n"
+                    + f" - Final conflation ID list: {sorted(final_conflation_id_list)}\n"
+                    + f" - Normalized conflation ID list: {sorted(normalized_conflation_id_list)}"
+                )
 
             # Write out all the identifiers.
             logger.info(f"Ordered DrugChemical conflation {final_conflation_id_list} with IC values {clique_ics}.")
diff --git a/src/createcompendia/gene.py b/src/createcompendia/gene.py
index 88bf4a66..d8a2265f 100644
--- a/src/createcompendia/gene.py
+++ b/src/createcompendia/gene.py
@@ -1,18 +1,14 @@
+import gzip
+import json
+import logging
+import os
 import re
 
-from src.metadata.provenance import write_concord_metadata
-from src.prefixes import OMIM, ENSEMBL, NCBIGENE, WORMBASE, MGI, ZFIN, DICTYBASE, FLYBASE, RGD, SGD, HGNC, UMLS
-from src.categories import GENE
-
 import src.datahandlers.umls as umls
-
-from src.babel_utils import read_identifier_file, glom, write_compendium
-
-import os
-import json
-import gzip
-
-import logging
+from src.babel_utils import glom, read_identifier_file, write_compendium
+from src.categories import GENE
+from src.metadata.provenance import write_concord_metadata
+from src.prefixes import DICTYBASE, ENSEMBL, FLYBASE, HGNC, MGI, NCBIGENE, OMIM, RGD, SGD, UMLS, WORMBASE, ZFIN
 from src.util import LoggingUtil
 
 logger = LoggingUtil.init_logging(__name__, level=logging.ERROR)
@@ -20,7 +16,7 @@
 
 def write_mods_ids(dd, id, modlist):
     for mod in modlist:
-        with open(f"{dd}/{mod}/labels", "r") as inf, open(f"{id}/gene/ids/{mod}", "w") as outf:
+        with open(f"{dd}/{mod}/labels") as inf, open(f"{id}/gene/ids/{mod}", "w") as outf:
             for line in inf:
                 x = line.split("\t")[0]
                 outf.write(f"{x}\n")
@@ -48,7 +44,7 @@ def build_gene_ensembl_relationships(ensembl_dir, outfile, metadata_yaml):
                 infname = os.path.join(dlpath, "BioMart.tsv")
                 if os.path.exists(infname):
                     # open each ensembl file, find the id column, and put it in the output
-                    with open(infname, "r") as inf:
+                    with open(infname) as inf:
                         wrote = set()
                         h = inf.readline()
                         x = h[:-1].split("\t")
@@ -95,7 +91,7 @@ def build_gene_ensembl_relationships(ensembl_dir, outfile, metadata_yaml):
 
 
 def write_zfin_ids(infile, outfile):
-    with open(infile, "r") as inf, open(outfile, "w") as outf:
+    with open(infile) as inf, open(outfile, "w") as outf:
         for line in inf:
             x = line.strip().split()
             if "GENE" in x[0]:
@@ -103,7 +99,7 @@ def write_zfin_ids(infile, outfile):
 
 
 def write_hgnc_ids(infile, outfile):
-    with open(infile, "r") as inf:
+    with open(infile) as inf:
         hgnc_json = json.load(inf)
     with open(outfile, "w") as outf:
         for gene in hgnc_json["response"]["docs"]:
@@ -111,7 +107,7 @@ def write_hgnc_ids(infile, outfile):
 
 
 def write_omim_ids(infile, outfile):
-    with open(infile, "r") as inf, open(outfile, "w") as outf:
+    with open(infile) as inf, open(outfile, "w") as outf:
         for line in inf:
             if line.startswith("#"):
                 continue
@@ -137,7 +133,7 @@ def write_umls_ids(mrconso, mrsty, outfile):
         ]
     )
     umls_keepers = set()
-    with open(mrsty, "r") as inf:
+    with open(mrsty) as inf:
         for line in inf:
             x = line.strip().split("|")
             cat = x[2]
@@ -145,7 +141,7 @@ def write_umls_ids(mrconso, mrsty, outfile):
                 umls_keepers.add(x[0])
     umls_keepers.difference_update(blacklist)
     # Now filter out OMIM variants
-    with open(mrconso, "r") as inf:
+    with open(mrconso) as inf:
         for line in inf:
             x = line.strip().split("|")
             cui = x[0]
@@ -174,7 +170,7 @@ def write_umls_ids(mrconso, mrsty, outfile):
 
 def read_ncbi_idfile(ncbi_idfile):
     ncbi_ids = set()
-    with open(ncbi_idfile, "r") as inf:
+    with open(ncbi_idfile) as inf:
         for line in inf:
             x = line.strip().split("\t")[0]
             ncbi_ids.add(x)
@@ -258,7 +254,7 @@ def build_gene_ncbigene_xrefs(infile, ncbi_idfile, outfile, metadata_yaml):
 
 
 def build_gene_medgen_relationships(infile, outfile, metadata_yaml):
-    with open(infile, "r") as inf, open(outfile, "w") as outf:
+    with open(infile) as inf, open(outfile, "w") as outf:
         h = inf.readline()
         for line in inf:
             x = line.strip().split("\t")
@@ -297,7 +293,7 @@ def write_ensembl_gene_ids(ensembl_dir, outfile):
                 infname = os.path.join(dlpath, "BioMart.tsv")
                 if os.path.exists(infname):
                     # open each ensembl file, find the id column, and put it in the output
-                    with open(infname, "r") as inf:
+                    with open(infname) as inf:
                         wrote = set()
                         h = inf.readline()
                         x = h[:-1].split("\t")
@@ -336,7 +332,7 @@ def build_gene_compendia(concordances, metadata_yamls, identifiers, icrdf_filena
         print(infile)
         print("loading", infile)
         pairs = []
-        with open(infile, "r") as inf:
+        with open(infile) as inf:
             for line in inf:
                 x = line.strip().split("\t")
                 pairs.append(set([x[0], x[2]]))
diff --git a/src/createcompendia/genefamily.py b/src/createcompendia/genefamily.py
index 7361d5bb..64934700 100644
--- a/src/createcompendia/genefamily.py
+++ b/src/createcompendia/genefamily.py
@@ -1,7 +1,6 @@
+from src.babel_utils import glom, read_identifier_file, write_compendium
 from src.categories import GENE_FAMILY
 
-from src.babel_utils import read_identifier_file, glom, write_compendium
-
 
 def build_compendia(identifiers, metadata_yamls, icrdf_filename):
     """:concordances: a list of files from which to read relationships
diff --git a/src/createcompendia/geneprotein.py b/src/createcompendia/geneprotein.py
index 096767d9..5ed8aab6 100644
--- a/src/createcompendia/geneprotein.py
+++ b/src/createcompendia/geneprotein.py
@@ -1,11 +1,11 @@
-from src.metadata.provenance import write_concord_metadata
-from src.prefixes import UNIPROTKB, NCBIGENE
-from src.babel_utils import glom
+import logging
 from collections import defaultdict
 
 import jsonlines
 
-import logging
+from src.babel_utils import glom
+from src.metadata.provenance import write_concord_metadata
+from src.prefixes import NCBIGENE, UNIPROTKB
 from src.util import LoggingUtil
 
 logger = LoggingUtil.init_logging(__name__, level=logging.ERROR)
@@ -16,7 +16,7 @@ def build_uniprotkb_ncbigene_relationships(infile, outfile, metadata_yaml):
     # Our model is 1 gene, many proteins, so this causes trouble.
     # For the moment, we will not include that have more than one gene per protein
     mappings = defaultdict(list)
-    with open(infile, "r") as inf:
+    with open(infile) as inf:
         for line in inf:
             x = line.strip().split()
             if x[1] == "GeneID":
@@ -87,7 +87,7 @@ def build_conflation(geneprotein_concord, gene_compendium, protein_compendium, o
     collect_valid_ids(protein_compendium, all_ids)
     conf = {}
     pairs = []
-    with open(geneprotein_concord, "r") as inf:
+    with open(geneprotein_concord) as inf:
         for line in inf:
             x = line.strip().split("\t")
             if (x[0] in all_ids) and (x[2] in all_ids):
@@ -115,7 +115,7 @@ def build_compendium(gene_compendium, protein_compendium, geneprotein_concord, o
     """
     uniprot2ncbi = {}
     ncbi2uniprot = defaultdict(list)
-    with open(geneprotein_concord, "r") as inf:
+    with open(geneprotein_concord) as inf:
         for line in inf:
             x = line.strip().split("\t")
             uniprot2ncbi[x[0]] = x[2]
diff --git a/src/createcompendia/leftover_umls.py b/src/createcompendia/leftover_umls.py
index 1e264281..323636a0 100644
--- a/src/createcompendia/leftover_umls.py
+++ b/src/createcompendia/leftover_umls.py
@@ -1,14 +1,14 @@
 import json
 import logging
+from pathlib import Path
 
 import jsonlines
-from pathlib import Path
 
-from src.node import NodeFactory
-from src.util import get_biolink_model_toolkit
+from src.categories import ACTIVITY, AGENT, DEVICE, DRUG, FOOD, PHYSICAL_ENTITY, PROCEDURE, PUBLICATION, SMALL_MOLECULE
 from src.datahandlers import umls
+from src.node import NodeFactory
 from src.prefixes import UMLS
-from src.categories import ACTIVITY, AGENT, DEVICE, DRUG, FOOD, SMALL_MOLECULE, PHYSICAL_ENTITY, PUBLICATION, PROCEDURE
+from src.util import get_biolink_model_toolkit
 
 
 def write_leftover_umls(compendia, umls_labels_filename, mrconso, mrsty, synonyms, umls_compendium, umls_synonyms, report, biolink_version):
@@ -56,7 +56,7 @@ def write_leftover_umls(compendia, umls_labels_filename, mrconso, mrsty, synonym
             logging.info(f"Starting compendium: {compendium}")
             umls_ids = set()
 
-            with open(compendium, "r") as f:
+            with open(compendium) as f:
                 for row in f:
                     cluster = json.loads(row)
                     for id in cluster["identifiers"]:
@@ -75,7 +75,7 @@ def write_leftover_umls(compendia, umls_labels_filename, mrconso, mrsty, synonym
         preferred_name_by_id = dict()
         types_by_id = dict()
         types_by_tui = dict()
-        with open(mrsty, "r") as inf:
+        with open(mrsty) as inf:
             for line in inf:
                 x = line.strip().split("|")
                 umls_id = f"{UMLS}:{x[0]}"
@@ -104,7 +104,7 @@ def write_leftover_umls(compendia, umls_labels_filename, mrconso, mrsty, synonym
         # Create a compendium that consists solely of all MRCONSO entries that haven't been referenced.
         count_no_umls_type = 0
         count_multiple_umls_type = 0
-        with open(mrconso, "r") as inf:
+        with open(mrconso) as inf:
             for line in inf:
                 if not umls.check_mrconso_line(line):
                     continue
@@ -194,7 +194,7 @@ def umls_type_to_biolink_type(umls_tui):
 
         # Collected synonyms for all IDs in this compendium.
         synonyms_by_id = dict()
-        with open(synonyms, "r") as synonymsf:
+        with open(synonyms) as synonymsf:
             for line in synonymsf:
                 id, relation, synonym = line.rstrip().split("\t")
                 if id in umls_ids_in_this_compendium:
diff --git a/src/createcompendia/macromolecular_complex.py b/src/createcompendia/macromolecular_complex.py
index e7d81279..0d0ca54d 100644
--- a/src/createcompendia/macromolecular_complex.py
+++ b/src/createcompendia/macromolecular_complex.py
@@ -1,7 +1,6 @@
-from src.prefixes import COMPLEXPORTAL
+from src.babel_utils import glom, read_identifier_file, write_compendium
 from src.categories import MACROMOLECULAR_COMPLEX
-
-from src.babel_utils import read_identifier_file, glom, write_compendium
+from src.prefixes import COMPLEXPORTAL
 
 
 def build_compendia(identifiers, metadata_yamls, icrdf_filename):
diff --git a/src/createcompendia/processactivitypathway.py b/src/createcompendia/processactivitypathway.py
index 859b0fbf..68b2f43f 100644
--- a/src/createcompendia/processactivitypathway.py
+++ b/src/createcompendia/processactivitypathway.py
@@ -1,18 +1,16 @@
 from collections import defaultdict
 
+import src.datahandlers.ec as ec
 import src.datahandlers.obo as obo
 import src.datahandlers.reactome as reactome
 import src.datahandlers.rhea as rhea
-import src.datahandlers.ec as ec
 import src.datahandlers.umls as umls
-from src.metadata.provenance import write_concord_metadata
-
-from src.prefixes import GO, REACT, WIKIPATHWAYS, TCDB
+from src.babel_utils import get_prefixes, glom, read_identifier_file, remove_overused_xrefs, write_compendium
 from src.categories import BIOLOGICAL_PROCESS, MOLECULAR_ACTIVITY, PATHWAY
+from src.metadata.provenance import write_concord_metadata
+from src.prefixes import GO, REACT, TCDB, WIKIPATHWAYS
 from src.ubergraph import build_sets
 
-from src.babel_utils import read_identifier_file, glom, remove_overused_xrefs, get_prefixes, write_compendium
-
 
 def write_obo_ids(irisandtypes, outfile, exclude=[]):
     order = [PATHWAY, BIOLOGICAL_PROCESS, MOLECULAR_ACTIVITY]
@@ -102,7 +100,7 @@ def build_compendia(concordances, metadata_yamls, identifiers, icrdf_filename):
         # them added. So we want to limit concordances to terms that are already in the dicts. But that's ONLY for the
         # UMLS concord.  We trust the others to retrieve decent identifiers.
         pairs = []
-        with open(infile, "r") as inf:
+        with open(infile) as inf:
             for line in inf:
                 x = line.strip().split("\t")
                 if infile.endswith("UMLS"):
diff --git a/src/createcompendia/protein.py b/src/createcompendia/protein.py
index 055a66cd..79f66112 100644
--- a/src/createcompendia/protein.py
+++ b/src/createcompendia/protein.py
@@ -1,24 +1,21 @@
+import os
 import re
 
-from src.metadata.provenance import write_concord_metadata
-from src.prefixes import ENSEMBL, PR, UNIPROTKB, NCIT, NCBITAXON, MESH, DRUGBANK
-from src.categories import PROTEIN
-
-import src.datahandlers.umls as umls
 import src.datahandlers.obo as obo
+import src.datahandlers.umls as umls
+from src.babel_utils import Text, glom, read_identifier_file, write_compendium
+from src.categories import PROTEIN
+from src.metadata.provenance import write_concord_metadata
+from src.prefixes import DRUGBANK, ENSEMBL, MESH, NCBITAXON, NCIT, PR, UNIPROTKB
 from src.ubergraph import UberGraph
-
-from src.babel_utils import read_identifier_file, glom, write_compendium, Text
-
-import os
-from src.util import get_memory_usage_summary, get_logger
+from src.util import get_logger, get_memory_usage_summary
 
 logger = get_logger(__name__)
 
 
 def extract_taxon_ids_from_uniprotkb(idmapping_filename, uniprotkb_taxa_filename):
     """Extract NCBIGene identifiers from the UniProtKB mapping file."""
-    with open(idmapping_filename, "r") as inf, open(uniprotkb_taxa_filename, "w") as outf:
+    with open(idmapping_filename) as inf, open(uniprotkb_taxa_filename, "w") as outf:
         for line in inf:
             x = line.strip().split("\t")
             if x[1] == "NCBI_TaxID":
@@ -51,7 +48,7 @@ def write_ensembl_protein_ids(ensembl_dir, outfile):
                 print(f"write_ensembl_ids for input filename {infname}")
                 if os.path.exists(infname):
                     # open each ensembl file, find the id column, and put it in the output
-                    with open(infname, "r") as inf:
+                    with open(infname) as inf:
                         wrote = set()
                         h = inf.readline()
                         x = h[:-1].split("\t")
@@ -97,7 +94,7 @@ def build_pr_uniprot_relationships(outfile, ignore_list=[], metadata_yaml=None):
 
 
 def build_protein_uniprotkb_ensemble_relationships(infile, outfile, metadata_yaml):
-    with open(infile, "r") as inf, open(outfile, "w") as outf:
+    with open(infile) as inf, open(outfile, "w") as outf:
         for line in inf:
             x = line.strip().split()
             if x[1] == "Ensembl_PRO":
@@ -129,7 +126,7 @@ def build_protein_uniprotkb_ensemble_relationships(infile, outfile, metadata_yam
 
 
 def build_ncit_uniprot_relationships(infile, outfile, metadata_yaml):
-    with open(infile, "r") as inf, open(outfile, "w") as outf:
+    with open(infile) as inf, open(outfile, "w") as outf:
         for line in inf:
             # These lines are sometimes empty (I think because the
             # input file can have DOS line endings). If so, we can
@@ -188,7 +185,7 @@ def build_protein_compendia(concordances, metadata_yamls, identifiers, icrdf_fil
     for infile in concordances:
         logger.info(f"Loading concordance file {infile}")
         pairs = []
-        with open(infile, "r") as inf:
+        with open(infile) as inf:
             for line_index, line in enumerate(inf):
                 if line_index % 1000000 == 0:
                     logger.info(f"Loading concordance file {infile}: line {line_index:,}")
diff --git a/src/createcompendia/publications.py b/src/createcompendia/publications.py
index b9d70468..90c4a8fd 100644
--- a/src/createcompendia/publications.py
+++ b/src/createcompendia/publications.py
@@ -4,15 +4,15 @@
 import logging
 import os
 import time
+import xml.etree.ElementTree as ET
 from collections import defaultdict
 from mmap import ACCESS_READ, mmap
 from pathlib import Path
-import xml.etree.ElementTree as ET
 
-from src.babel_utils import pull_via_wget, WgetRecursionOptions, glom, read_identifier_file, write_compendium
+from src.babel_utils import WgetRecursionOptions, glom, pull_via_wget, read_identifier_file, write_compendium
 from src.categories import JOURNAL_ARTICLE, PUBLICATION
 from src.metadata.provenance import write_concord_metadata
-from src.prefixes import PMID, DOI, PMC
+from src.prefixes import DOI, PMC, PMID
 
 
 def download_pubmed(download_file, pubmed_base="ftp://ftp.ncbi.nlm.nih.gov/pubmed/", pmc_base="https://ftp.ncbi.nlm.nih.gov/pub/pmc/"):
@@ -69,7 +69,7 @@ def verify_pubmed_download_against_md5(pubmed_filename, md5_filename):
         logging.warning(f"Could not verify {pubmed_filename}: no MD5 file found at {md5_filename}.")
         return False
 
-    with open(md5_filename, "r") as md5f:
+    with open(md5_filename) as md5f:
         md5_line = md5f.readline().strip()
         expected_md5 = md5_line.split("= ")[1]
         if len(expected_md5) != 32:
@@ -273,7 +273,7 @@ def generate_compendium(concordances, metadata_yamls, identifiers, titles, publi
         print(infile)
         print("loading", infile)
         pairs = []
-        with open(infile, "r") as inf:
+        with open(infile) as inf:
             for line in inf:
                 x = line.strip().split("\t")
                 pairs.append({x[0], x[2]})
@@ -283,7 +283,7 @@ def generate_compendium(concordances, metadata_yamls, identifiers, titles, publi
     labels = dict()
     for title_filename in titles:
         print("loading titles from", title_filename)
-        with open(title_filename, "r") as titlef:
+        with open(title_filename) as titlef:
             for line in titlef:
                 id, title = line.strip().split("\t")
                 if id in labels:
diff --git a/src/createcompendia/taxon.py b/src/createcompendia/taxon.py
index 57e2c100..275f1e1d 100644
--- a/src/createcompendia/taxon.py
+++ b/src/createcompendia/taxon.py
@@ -1,13 +1,11 @@
-from src.metadata.provenance import write_concord_metadata
-from src.prefixes import NCBITAXON, MESH, UMLS
-from src.categories import ORGANISM_TAXON
+import logging
 
 import src.datahandlers.mesh as mesh
 import src.datahandlers.umls as umls
-
-from src.babel_utils import read_identifier_file, glom, write_compendium
-
-import logging
+from src.babel_utils import glom, read_identifier_file, write_compendium
+from src.categories import ORGANISM_TAXON
+from src.metadata.provenance import write_concord_metadata
+from src.prefixes import MESH, NCBITAXON, UMLS
 from src.util import LoggingUtil
 
 logger = LoggingUtil.init_logging(__name__, level=logging.ERROR)
@@ -74,7 +72,7 @@ def build_taxon_umls_relationships(mrconso, idfile, outfile, metadata_yaml):
 
 def build_relationships(outfile, mesh_ids, metadata_yaml):
     regis = mesh.pull_mesh_registry()
-    with open(mesh_ids, "r") as inf:
+    with open(mesh_ids) as inf:
         lines = inf.read().strip().split("\n")
         all_mesh_taxa = set([x.split("\t")[0] for x in lines])
     with open(outfile, "w") as outf:
@@ -118,7 +116,7 @@ def build_compendia(concordances, metadata_yamls, identifiers, icrdf_filename):
         print(infile)
         print("loading", infile)
         pairs = []
-        with open(infile, "r") as inf:
+        with open(infile) as inf:
             for line in inf:
                 x = line.strip().split("\t")
                 pairs.append(set([x[0], x[2]]))
diff --git a/src/datahandlers/chebi.py b/src/datahandlers/chebi.py
index 147b6a61..80a2b794 100644
--- a/src/datahandlers/chebi.py
+++ b/src/datahandlers/chebi.py
@@ -3,7 +3,9 @@
 
 def pull_chebi():
     pull_via_ftp("ftp.ebi.ac.uk", "/pub/databases/chebi/SDF", "chebi.sdf.gz", decompress_data=True, outfilename="CHEBI/ChEBI_complete.sdf")
-    pull_via_ftp("ftp.ebi.ac.uk", "/pub/databases/chebi/flat_files", "database_accession.tsv.gz", decompress_data=True, outfilename="CHEBI/database_accession.tsv")
+    pull_via_ftp(
+        "ftp.ebi.ac.uk", "/pub/databases/chebi/flat_files", "database_accession.tsv.gz", decompress_data=True, outfilename="CHEBI/database_accession.tsv"
+    )
 
 
 def x(inputfile, labelfile, synfile):
diff --git a/src/datahandlers/chembl.py b/src/datahandlers/chembl.py
index e0272998..d9ed663a 100644
--- a/src/datahandlers/chembl.py
+++ b/src/datahandlers/chembl.py
@@ -1,8 +1,10 @@
-from src.prefixes import CHEMBLCOMPOUND
-from src.babel_utils import pull_via_ftp
 import ftplib
+
 import pyoxigraph
 
+from src.babel_utils import pull_via_ftp
+from src.prefixes import CHEMBLCOMPOUND
+
 
 def pull_chembl(moleculefilename):
     fname = get_latest_chembl_name()
diff --git a/src/datahandlers/clo.py b/src/datahandlers/clo.py
index de9196d7..7620494a 100644
--- a/src/datahandlers/clo.py
+++ b/src/datahandlers/clo.py
@@ -1,12 +1,13 @@
 import logging
 import re
 
+import pyoxigraph
+
+from src.babel_utils import pull_via_urllib
+from src.categories import CELL_LINE
 from src.metadata.provenance import write_download_metadata
 from src.prefixes import CLO
-from src.categories import CELL_LINE
-from src.babel_utils import pull_via_urllib
-from src.util import Text, LoggingUtil
-import pyoxigraph
+from src.util import LoggingUtil, Text
 
 logger = LoggingUtil.init_logging(__name__, level=logging.WARNING)
 
diff --git a/src/datahandlers/complexportal.py b/src/datahandlers/complexportal.py
index b9a289c8..3bc973e8 100644
--- a/src/datahandlers/complexportal.py
+++ b/src/datahandlers/complexportal.py
@@ -9,7 +9,7 @@ def pull_complexportal():
 
 def make_labels_and_synonyms(infile, labelfile, synfile, metadata_yaml):
     usedsyns = set()
-    with open(infile, "r") as inf, open(labelfile, "w") as outl, open(synfile, "w") as outsyn:
+    with open(infile) as inf, open(labelfile, "w") as outl, open(synfile, "w") as outsyn:
         next(inf)  # skip header
         for line in inf:
             sline = line.split("\t")
diff --git a/src/datahandlers/datacollect.py b/src/datahandlers/datacollect.py
index 9b23aa9d..f5563e67 100644
--- a/src/datahandlers/datacollect.py
+++ b/src/datahandlers/datacollect.py
@@ -1,7 +1,8 @@
-from src.babel_utils import make_local_name, pull_via_ftp, pull_via_urllib
 import gzip
 from json import loads
 
+from src.babel_utils import make_local_name, pull_via_ftp, pull_via_urllib
+
 
 def pull_pubchem_labels():
     print("LABEL PUBCHEM")
@@ -64,7 +65,7 @@ def pull_prot(which, refresh):
     swissprot_labels = {}
     nlines = 0
     maxn = 1000
-    with open(swissname, "r") as inf:
+    with open(swissname) as inf:
         for line in inf:
             nlines += 1
             if line.startswith(">"):
diff --git a/src/datahandlers/doid.py b/src/datahandlers/doid.py
index e31539a8..3157b83b 100644
--- a/src/datahandlers/doid.py
+++ b/src/datahandlers/doid.py
@@ -1,7 +1,8 @@
-from src.prefixes import DOID, OIO
-from src.babel_utils import pull_via_urllib, norm
 import json
 
+from src.babel_utils import norm, pull_via_urllib
+from src.prefixes import DOID, OIO
+
 
 def pull_doid():
     pull_via_urllib("https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/main/src/ontology/", "doid.json", subpath="DOID", decompress=False)
@@ -9,7 +10,7 @@ def pull_doid():
 
 def pull_doid_labels_and_synonyms(infile, labelfile, synonymfile):
     # Everything in DOID is a disease.
-    with open(infile, "r") as inf:
+    with open(infile) as inf:
         j = json.load(inf)
     with open(labelfile, "w") as labels, open(synonymfile, "w") as syns:
         for entry in j["graphs"][0]["nodes"]:
@@ -30,7 +31,7 @@ def pull_doid_labels_and_synonyms(infile, labelfile, synonymfile):
 
 def build_xrefs(infile, xreffile, other_prefixes={}):
     # Everything in DOID is a disease.
-    with open(infile, "r") as inf:
+    with open(infile) as inf:
         j = json.load(inf)
     with open(xreffile, "w") as xrefs:
         for entry in j["graphs"][0]["nodes"]:
diff --git a/src/datahandlers/drugbank.py b/src/datahandlers/drugbank.py
index e751f2a6..ccadecfb 100644
--- a/src/datahandlers/drugbank.py
+++ b/src/datahandlers/drugbank.py
@@ -32,7 +32,7 @@ def extract_drugbank_labels_and_synonyms(drugbank_vocab_csv, labels, synonyms):
     :param synonyms: The file to write synonyms into.
     """
 
-    with open(drugbank_vocab_csv, "r") as fin, open(labels, "w") as labelsf, open(synonyms, "w") as synonymsf:
+    with open(drugbank_vocab_csv) as fin, open(labels, "w") as labelsf, open(synonyms, "w") as synonymsf:
         reader = csv.DictReader(fin)
         assert "DrugBank ID" in reader.fieldnames
         assert "Common name" in reader.fieldnames
diff --git a/src/datahandlers/drugcentral.py b/src/datahandlers/drugcentral.py
index 22dabb0f..c5ace307 100644
--- a/src/datahandlers/drugcentral.py
+++ b/src/datahandlers/drugcentral.py
@@ -1,6 +1,7 @@
-from src.prefixes import DRUGCENTRAL
 import psycopg2
 
+from src.prefixes import DRUGCENTRAL
+
 
 def pull_drugcentral(structfile, labelfile, xreffile):
     # DrugCentral is only available as a postgres db, but fortunately they run a public instance.
diff --git a/src/datahandlers/ec.py b/src/datahandlers/ec.py
index f4ef3ecd..469afa2f 100644
--- a/src/datahandlers/ec.py
+++ b/src/datahandlers/ec.py
@@ -1,9 +1,9 @@
-from src.prefixes import EC
-from src.categories import MOLECULAR_ACTIVITY
-from src.babel_utils import pull_via_urllib
-from src.babel_utils import make_local_name
 import pyoxigraph
 
+from src.babel_utils import make_local_name, pull_via_urllib
+from src.categories import MOLECULAR_ACTIVITY
+from src.prefixes import EC
+
 
 def pull_ec():
     outputfile = pull_via_urllib("https://ftp.expasy.org/databases/enzyme/", "enzyme.rdf", subpath="EC", decompress=False)
diff --git a/src/datahandlers/efo.py b/src/datahandlers/efo.py
index 808d8a93..64613f23 100644
--- a/src/datahandlers/efo.py
+++ b/src/datahandlers/efo.py
@@ -1,12 +1,12 @@
 import logging
 import re
 
+import pyoxigraph
+
+from src.babel_utils import pull_via_urllib
 from src.metadata.provenance import write_concord_metadata
 from src.prefixes import EFO, ORPHANET
-from src.babel_utils import pull_via_urllib
-from src.babel_utils import make_local_name
-from src.util import Text, LoggingUtil
-import pyoxigraph
+from src.util import LoggingUtil, Text
 
 logger = LoggingUtil.init_logging(__name__, level=logging.WARNING)
 
@@ -31,7 +31,7 @@ def __init__(self, efo_owl_file_path):
         logger.info(f"Loading EFO from {efo_owl_file_path}.")
         start = dt.now()
         self.m = pyoxigraph.Store()
-        with open(efo_owl_file_path, "r") as inf:
+        with open(efo_owl_file_path) as inf:
             self.m.bulk_load(input=inf, format=pyoxigraph.RdfFormat.RDF_XML, base_iri="http://example.org/")
         end = dt.now()
         logger.info(f"EFO loading complete in {end - start}.")
@@ -168,7 +168,7 @@ def make_ids(roots, owlfile, idfname):
 def make_concords(owlfile, idfilename, outfilename, provenance_metadata=None):
     """Given a list of identifiers, find out all of the equivalent identifiers from the owl"""
     m = EFOgraph(owlfile)
-    with open(idfilename, "r") as inf, open(outfilename, "w") as concfile:
+    with open(idfilename) as inf, open(outfilename, "w") as concfile:
         for line in inf:
             efo_id = line.split("\t")[0]
             nexacts = m.get_exacts(efo_id, concfile)
diff --git a/src/datahandlers/ensembl.py b/src/datahandlers/ensembl.py
index 8f8606d3..582a695c 100644
--- a/src/datahandlers/ensembl.py
+++ b/src/datahandlers/ensembl.py
@@ -1,10 +1,11 @@
 import json
-
-from src.util import get_config
-from apybiomart import find_datasets, query, find_attributes
 import logging
 import os
 
+from apybiomart import find_attributes, find_datasets, query
+
+from src.util import get_config
+
 # As per https://support.bioconductor.org/p/39744/#39751, more attributes than this result in an
 # error from BioMart: Too many attributes selected for External References
 # This is the real MAX minus one: for every batch, we'll query the ensembl_gene_id so that we can
diff --git a/src/datahandlers/gtopdb.py b/src/datahandlers/gtopdb.py
index 19da92d6..efe033b9 100644
--- a/src/datahandlers/gtopdb.py
+++ b/src/datahandlers/gtopdb.py
@@ -1,8 +1,8 @@
-from src.prefixes import GTOPDB
-from src.babel_utils import pull_via_urllib
-
 from bs4 import BeautifulSoup
 
+from src.babel_utils import pull_via_urllib
+from src.prefixes import GTOPDB
+
 
 def pull_gtopdb_ligands():
     pull_via_urllib("https://www.guidetopharmacology.org/DATA/", "ligands.tsv", decompress=False, subpath="GTOPDB")
@@ -27,7 +27,7 @@ def make_labels_and_synonyms(inputfile, labelfile, synfile):
     idcol = 0
     labelcol = 1
     syncol = 13
-    with open(inputfile, "r") as inf, open(labelfile, "w") as lf, open(synfile, "w") as sf:
+    with open(inputfile) as inf, open(labelfile, "w") as lf, open(synfile, "w") as sf:
         h = inf.readline()
         # Everything in this file is double quoted, hence all the [1:-1] stuff
         for line in inf:
diff --git a/src/datahandlers/hgnc.py b/src/datahandlers/hgnc.py
index fbbac905..3d89bdd3 100644
--- a/src/datahandlers/hgnc.py
+++ b/src/datahandlers/hgnc.py
@@ -1,6 +1,7 @@
-from src.babel_utils import make_local_name, pull_via_urllib
 import json
 
+from src.babel_utils import make_local_name, pull_via_urllib
+
 
 def pull_hgnc():
     # As per the "quick links" from https://www.genenames.org/download/archive/
@@ -8,7 +9,7 @@ def pull_hgnc():
 
 
 def pull_hgnc_labels_and_synonyms(infile):
-    with open(infile, "r") as data:
+    with open(infile) as data:
         hgnc_json = json.load(data)
     lname = make_local_name("labels", subpath="HGNC")
     sname = make_local_name("synonyms", subpath="HGNC")
diff --git a/src/datahandlers/hgncfamily.py b/src/datahandlers/hgncfamily.py
index f7a79700..95cd233f 100644
--- a/src/datahandlers/hgncfamily.py
+++ b/src/datahandlers/hgncfamily.py
@@ -1,6 +1,5 @@
 import csv
 
-
 from src.babel_utils import pull_via_urllib
 from src.metadata.provenance import write_metadata
 from src.prefixes import HGNCFAMILY
@@ -14,7 +13,7 @@ def pull_hgncfamily():
 
 
 def pull_labels(infile, labelsfile, descriptionsfile, metadata_yaml):
-    with open(infile, "r") as inf, open(labelsfile, "w") as labelsf, open(descriptionsfile, "w") as descriptionsf:
+    with open(infile) as inf, open(labelsfile, "w") as labelsf, open(descriptionsfile, "w") as descriptionsf:
         reader = csv.DictReader(inf)
         for row in reader:
             curie = f"{HGNCFAMILY}:{row['id']}"
diff --git a/src/datahandlers/hmdb.py b/src/datahandlers/hmdb.py
index 2ebcc7d3..7ef8bb30 100644
--- a/src/datahandlers/hmdb.py
+++ b/src/datahandlers/hmdb.py
@@ -1,9 +1,11 @@
-from zipfile import ZipFile
 from os import path
-from src.prefixes import HMDB
-from src.babel_utils import pull_via_urllib
+from zipfile import ZipFile
+
 import xmltodict
 
+from src.babel_utils import pull_via_urllib
+from src.prefixes import HMDB
+
 
 def pull_hmdb():
     dname = pull_via_urllib("https://hmdb.ca/system/downloads/current/", "hmdb_metabolites.zip", decompress=False, subpath="HMDB")
@@ -31,7 +33,7 @@ def handle_metabolite(metabolite, lfile, synfile, smifile):
 
 
 def make_labels_and_synonyms_and_smiles(inputfile, labelfile, synfile, smifile):
-    with open(inputfile, "r") as inf:
+    with open(inputfile) as inf:
         xml = inf.read()
     parsed = xmltodict.parse(xml)
     metabolites = parsed["hmdb"]["metabolite"]
diff --git a/src/datahandlers/kegg.py b/src/datahandlers/kegg.py
index 64f69572..4cc5ee0e 100644
--- a/src/datahandlers/kegg.py
+++ b/src/datahandlers/kegg.py
@@ -1,8 +1,9 @@
 import re
+import traceback
 
 import requests
-import traceback
 from more_itertools import chunked
+
 from src.prefixes import KEGGCOMPOUND
 
 ###
diff --git a/src/datahandlers/mesh.py b/src/datahandlers/mesh.py
index d4bf18ac..53058c85 100644
--- a/src/datahandlers/mesh.py
+++ b/src/datahandlers/mesh.py
@@ -1,6 +1,8 @@
-from src.babel_utils import make_local_name, pull_via_ftp
-import pyoxigraph
 from collections import defaultdict
+
+import pyoxigraph
+
+from src.babel_utils import make_local_name, pull_via_ftp
 from src.prefixes import MESH
 
 
diff --git a/src/datahandlers/mods.py b/src/datahandlers/mods.py
index 41a9c699..41f90f63 100644
--- a/src/datahandlers/mods.py
+++ b/src/datahandlers/mods.py
@@ -1,8 +1,9 @@
-from src.prefixes import WORMBASE
-from src.babel_utils import pull_via_urllib
 import json
 import os
 
+from src.babel_utils import pull_via_urllib
+from src.prefixes import WORMBASE
+
 mods = ["WB", "FB", "ZFIN", "MGI", "RGD", "SGD"]
 modmap = {x: x for x in mods}
 modmap["WB"] = WORMBASE
@@ -29,7 +30,7 @@ def pull_mods():
 
 def write_labels(dd):
     for mod, prefix in modmap.items():
-        with open(f"{dd}/{prefix}/GENE-DESCRIPTION-JSON_{prefix}.json", "r") as inf:
+        with open(f"{dd}/{prefix}/GENE-DESCRIPTION-JSON_{prefix}.json") as inf:
             j = json.load(inf)
         with open(f"{dd}/{prefix}/labels", "w") as outf:
             for gene in j["data"]:
diff --git a/src/datahandlers/ncbigene.py b/src/datahandlers/ncbigene.py
index 1fe8e10d..c02b00c2 100644
--- a/src/datahandlers/ncbigene.py
+++ b/src/datahandlers/ncbigene.py
@@ -1,6 +1,7 @@
-from src.babel_utils import pull_via_urllib
 import gzip
 
+from src.babel_utils import pull_via_urllib
+
 
 def pull_ncbigene(filenames):
     for fn in filenames:
diff --git a/src/datahandlers/ncbitaxon.py b/src/datahandlers/ncbitaxon.py
index b9a8bab6..7d10d3ea 100644
--- a/src/datahandlers/ncbitaxon.py
+++ b/src/datahandlers/ncbitaxon.py
@@ -1,9 +1,9 @@
 import gzip
 import logging
+import tarfile
 
 from src.babel_utils import pull_via_ftp
 from src.prefixes import NCBITAXON
-import tarfile
 
 
 def pull_ncbitaxon():
diff --git a/src/datahandlers/obo.py b/src/datahandlers/obo.py
index c078c5d3..a502ebb9 100644
--- a/src/datahandlers/obo.py
+++ b/src/datahandlers/obo.py
@@ -1,11 +1,10 @@
 import json
 import logging
+import os
+from collections import defaultdict
 from pathlib import Path
 
 from src.ubergraph import UberGraph
-from collections import defaultdict
-import os
-
 from src.util import Text
 
 
diff --git a/src/datahandlers/orphanet.py b/src/datahandlers/orphanet.py
index 0b45deb8..3c132e66 100644
--- a/src/datahandlers/orphanet.py
+++ b/src/datahandlers/orphanet.py
@@ -1,9 +1,9 @@
-from src.prefixes import OIO, ORPHANET
-from src.babel_utils import pull_via_urllib
-from zipfile import ZipFile
-
 # ugh XML
 import xml.etree.ElementTree as ET
+from zipfile import ZipFile
+
+from src.babel_utils import pull_via_urllib
+from src.prefixes import OIO, ORPHANET
 
 
 def pull_orphanet():
diff --git a/src/datahandlers/pantherfamily.py b/src/datahandlers/pantherfamily.py
index 4a54d3f9..88e62773 100644
--- a/src/datahandlers/pantherfamily.py
+++ b/src/datahandlers/pantherfamily.py
@@ -15,7 +15,7 @@ def pull_labels(infile, outfile, metadata_yaml):
     MAINFAMILY_NAME_COLUMN = 4
     SUBFAMILY_NAME_COLUMN = 5
     done = set()
-    with open(infile, "r") as inf, open(outfile, "w") as labelf:
+    with open(infile) as inf, open(outfile, "w") as labelf:
         for raw_line in inf:
             line = raw_line.strip()
             parts = line.split("\t")
diff --git a/src/datahandlers/pantherpathways.py b/src/datahandlers/pantherpathways.py
index b464f076..4a1948ae 100644
--- a/src/datahandlers/pantherpathways.py
+++ b/src/datahandlers/pantherpathways.py
@@ -1,5 +1,5 @@
-from src.prefixes import PANTHERPATHWAY
 from src.babel_utils import pull_via_urllib
+from src.prefixes import PANTHERPATHWAY
 
 
 def pull_panther_pathways():
@@ -9,7 +9,7 @@ def pull_panther_pathways():
 
 
 def make_pathway_labels(infile, outfile):
-    with open(infile, "r") as inf:
+    with open(infile) as inf:
         data = inf.read()
     lines = data.split("\n")
     labels = {}
diff --git a/src/datahandlers/pubchem.py b/src/datahandlers/pubchem.py
index e22748b8..eac6e6d5 100644
--- a/src/datahandlers/pubchem.py
+++ b/src/datahandlers/pubchem.py
@@ -1,9 +1,11 @@
-from src.prefixes import PUBCHEMCOMPOUND
-from src.babel_utils import pull_via_wget
 import gzip
-import requests
 import json
 
+import requests
+
+from src.babel_utils import pull_via_wget
+from src.prefixes import PUBCHEMCOMPOUND
+
 
 def pull_pubchem():
     files = ["CID-MeSH", "CID-Synonym-filtered.gz", "CID-Title.gz"]
diff --git a/src/datahandlers/reactome.py b/src/datahandlers/reactome.py
index 1f9b071a..25473653 100644
--- a/src/datahandlers/reactome.py
+++ b/src/datahandlers/reactome.py
@@ -1,8 +1,10 @@
-from src.prefixes import REACT
-from src.categories import PATHWAY, BIOLOGICAL_PROCESS, MOLECULAR_ACTIVITY
-import requests
 import json
 
+import requests
+
+from src.categories import BIOLOGICAL_PROCESS, MOLECULAR_ACTIVITY, PATHWAY
+from src.prefixes import REACT
+
 
 # Reactome doesn't have a great download, but it does have a decent service that lets you get the files you could have
 # downloaded.   In reactome, there are "events" which have subclasses of "pathway" and "reaction like event".
@@ -19,7 +21,7 @@ def pull_reactome(outfile):
 
 
 def make_labels(infile, labelfile):
-    with open(infile, "r") as inf:
+    with open(infile) as inf:
         elements = json.load(inf)
     with open(labelfile, "w") as labels:
         for element in elements:
@@ -37,7 +39,7 @@ def parse_element_for_labels(e, lfile):
 
 
 def write_ids(infile, idfile):
-    with open(infile, "r") as inf:
+    with open(infile) as inf:
         elements = json.load(inf)
     with open(idfile, "w") as outf:
         for element in elements:
diff --git a/src/datahandlers/rhea.py b/src/datahandlers/rhea.py
index 46abe244..c25572ce 100644
--- a/src/datahandlers/rhea.py
+++ b/src/datahandlers/rhea.py
@@ -1,9 +1,9 @@
-from src.metadata.provenance import write_concord_metadata
-from src.prefixes import RHEA, EC
-from src.babel_utils import pull_via_urllib
-from src.babel_utils import make_local_name
 import pyoxigraph
 
+from src.babel_utils import make_local_name, pull_via_urllib
+from src.metadata.provenance import write_concord_metadata
+from src.prefixes import EC, RHEA
+
 
 def pull_rhea():
     outputfile = pull_via_urllib("https://ftp.expasy.org/databases/rhea/rdf/", "rhea.rdf.gz", subpath="RHEA", decompress=True)
diff --git a/src/datahandlers/smpdb.py b/src/datahandlers/smpdb.py
index d24ccff8..f46a8843 100644
--- a/src/datahandlers/smpdb.py
+++ b/src/datahandlers/smpdb.py
@@ -1,7 +1,8 @@
-from zipfile import ZipFile
 from os import path
-from src.prefixes import SMPDB
+from zipfile import ZipFile
+
 from src.babel_utils import pull_via_urllib
+from src.prefixes import SMPDB
 
 
 def pull_smpdb():
@@ -13,7 +14,7 @@ def pull_smpdb():
 
 def make_labels(inputfile, labelfile):
     """Get the SMPDB file.  It's not good - there are \n and commas, and commas are also the delimiter. I mean, what?"""
-    with open(inputfile, "r") as inf, open(labelfile, "w") as outf:
+    with open(inputfile) as inf, open(labelfile, "w") as outf:
         h = inf.readline()
         for line in inf:
             if "," not in line:
diff --git a/src/datahandlers/umls.py b/src/datahandlers/umls.py
index 958815b2..408edf55 100644
--- a/src/datahandlers/umls.py
+++ b/src/datahandlers/umls.py
@@ -1,15 +1,16 @@
-from src.metadata.provenance import write_concord_metadata
-from src.prefixes import UMLS, RXCUI
-from src.babel_utils import make_local_name
-from src.categories import DRUG, CHEMICAL_ENTITY, MOLECULAR_MIXTURE
-
+import logging
+import os
+import re
 import shutil
+from collections import defaultdict
 from zipfile import ZipFile
+
 import requests
-from collections import defaultdict
-import os
-import re
-import logging
+
+from src.babel_utils import make_local_name
+from src.categories import CHEMICAL_ENTITY, DRUG, MOLECULAR_MIXTURE
+from src.metadata.provenance import write_concord_metadata
+from src.prefixes import RXCUI, UMLS
 
 
 def check_mrconso_line(line):
@@ -80,7 +81,7 @@ def write_umls_ids(mrsty, category_map, umls_output, prefix=UMLS, blocklist_umls
     output_lines = defaultdict(list)
     semantic_type_trees = defaultdict(set)
     tree_names = defaultdict(set)
-    with open(mrsty, "r") as inf, open(umls_output, "w") as outf:
+    with open(mrsty) as inf, open(umls_output, "w") as outf:
         for line in inf:
             x = line.strip().split("|")
             cat = x[2]
@@ -159,7 +160,7 @@ def write_rxnorm_ids(category_map, bad_categories, infile, outfile, prefix=RXCUI
     If there is an IN or PIN TTY, then it's a ChemicalEntity, otherwise a Drug.
     """
     rxnconso = infile  # os.path.join('input_data', 'private', "RXNCONSO.RRF")
-    with open(rxnconso, "r") as inf, open(outfile, "w") as outf:
+    with open(rxnconso) as inf, open(outfile, "w") as outf:
         current_id = None
         current_ttys = set()
         has_rxnorm = False
@@ -227,7 +228,7 @@ def build_sets(
     acceptable_drugbank_tty = set(["IN", "PIN", "MIN"])
     pairs = set()
     # test_cui = 'C0026827'
-    with open(mrconso, "r") as inf, open(umls_output, "w") as concordfile:
+    with open(mrconso) as inf, open(umls_output, "w") as concordfile:
         for line in inf:
             if not check_mrconso_line(line):
                 continue
@@ -279,7 +280,7 @@ def build_sets(
 def read_umls_priority():
     mrp = os.path.join("input_data", "umls_precedence.txt")
     pris = []
-    with open(mrp, "r") as inf:
+    with open(mrp) as inf:
         h = inf.readline()
         for line in inf:
             x = line.strip().split()
@@ -398,7 +399,7 @@ def pull_umls(mrconso):
     priority = read_umls_priority()
     snomed_label_name = make_local_name("labels", subpath="SNOMEDCT")
     snomed_syn_name = make_local_name("synonyms", subpath="SNOMEDCT")
-    with open(mrconso, "r") as inf, open(snomed_label_name, "w") as snolabels, open(snomed_syn_name, "w") as snosyns:
+    with open(mrconso) as inf, open(snomed_label_name, "w") as snolabels, open(snomed_syn_name, "w") as snosyns:
         for line in inf:
             if not check_mrconso_line(line):
                 continue
diff --git a/src/datahandlers/unichem.py b/src/datahandlers/unichem.py
index 3680fed8..e81a9dc5 100644
--- a/src/datahandlers/unichem.py
+++ b/src/datahandlers/unichem.py
@@ -1,7 +1,7 @@
 import gzip
 
-from src.babel_utils import pull_via_urllib, pull_via_wget
-from src.prefixes import CHEMBLCOMPOUND, DRUGCENTRAL, DRUGBANK, GTOPDB, KEGGCOMPOUND, CHEBI, UNII, HMDB, PUBCHEMCOMPOUND
+from src.babel_utils import pull_via_urllib
+from src.prefixes import CHEBI, CHEMBLCOMPOUND, DRUGBANK, DRUGCENTRAL, GTOPDB, HMDB, KEGGCOMPOUND, PUBCHEMCOMPOUND, UNII
 
 # global for this file
 data_sources: dict = {
@@ -19,14 +19,18 @@
 
 def pull_unichem():
     """Download UniChem files."""
-    pull_via_urllib("http://ftp.ebi.ac.uk/pub/databases/chembl/UniChem/data/table_dumps/", "structure.tsv.gz", decompress=False, subpath="UNICHEM", verify_gzip=True)
-    pull_via_urllib("http://ftp.ebi.ac.uk/pub/databases/chembl/UniChem/data/table_dumps/", "reference.tsv.gz", decompress=False, subpath="UNICHEM", verify_gzip=True)
+    pull_via_urllib(
+        "http://ftp.ebi.ac.uk/pub/databases/chembl/UniChem/data/table_dumps/", "structure.tsv.gz", decompress=False, subpath="UNICHEM", verify_gzip=True
+    )
+    pull_via_urllib(
+        "http://ftp.ebi.ac.uk/pub/databases/chembl/UniChem/data/table_dumps/", "reference.tsv.gz", decompress=False, subpath="UNICHEM", verify_gzip=True
+    )
 
 
 def filter_unichem(ref_file, ref_filtered):
     """Filter UniChem reference file to those sources we're interested in."""
     srclist = [str(k) for k in data_sources.keys()]
-    with gzip.open(ref_file, "rt") as rf, open(ref_filtered, "wt") as ref_filtered:
+    with gzip.open(ref_file, "rt") as rf, open(ref_filtered, "w") as ref_filtered:
         header_line = rf.readline()
         assert header_line == "UCI\tSRC_ID\tSRC_COMPOUND_ID\tASSIGNMENT\n", f"Incorrect header line in {ref_file}: {header_line}"
         ref_filtered.write(header_line)
diff --git a/src/datahandlers/unii.py b/src/datahandlers/unii.py
index 5fc1ad38..7615f199 100644
--- a/src/datahandlers/unii.py
+++ b/src/datahandlers/unii.py
@@ -1,5 +1,5 @@
+from os import listdir, path, rename
 from zipfile import ZipFile
-from os import path, listdir, rename
 
 import requests
 
@@ -39,7 +39,7 @@ def make_labels_and_synonyms(inputfile, labelfile, synfile):
     syncol = 0
     wrotelabels = set()
     wrotesyns = set()
-    with open(inputfile, "r", encoding="latin-1") as inf, open(labelfile, "w") as lf, open(synfile, "w") as sf:
+    with open(inputfile, encoding="latin-1") as inf, open(labelfile, "w") as lf, open(synfile, "w") as sf:
         h = inf.readline()
         for line in inf:
             parts = line.strip().split("\t")
diff --git a/src/datahandlers/uniprotkb.py b/src/datahandlers/uniprotkb.py
index 1a1a8ad5..d04cb38e 100644
--- a/src/datahandlers/uniprotkb.py
+++ b/src/datahandlers/uniprotkb.py
@@ -10,7 +10,7 @@
 def readlabels(which):
     swissname = make_local_name(f"UniProtKB/uniprot_{which}.fasta")
     swissprot_labels = {}
-    with open(swissname, "r") as inf:
+    with open(swissname) as inf:
         for line in inf:
             if line.startswith(">"):
                 # example fasta line:
@@ -58,7 +58,7 @@ def download_umls_gene_protein_mappings(umls_uniprotkb_raw_url, umls_uniprotkb_f
     os.makedirs(os.path.dirname(umls_protein_concords), exist_ok=True)
 
     count_rows = 0
-    with open(umls_uniprotkb_filename, "r") as f, open(umls_gene_concords, "w") as genef, open(umls_protein_concords, "w") as proteinf:
+    with open(umls_uniprotkb_filename) as f, open(umls_gene_concords, "w") as genef, open(umls_protein_concords, "w") as proteinf:
         csv_reader = csv.DictReader(f, dialect="excel-tab")
         for row in csv_reader:
             count_rows += 1
diff --git a/src/eutil.py b/src/eutil.py
index 18ecda3a..57e85532 100644
--- a/src/eutil.py
+++ b/src/eutil.py
@@ -1,5 +1,6 @@
 import itertools
 import os
+
 from src.babel_utils import ThrottledRequester
 
 
diff --git a/src/exporters/duckdb_exporters.py b/src/exporters/duckdb_exporters.py
index b466d0f1..37fd9fe8 100644
--- a/src/exporters/duckdb_exporters.py
+++ b/src/exporters/duckdb_exporters.py
@@ -13,6 +13,7 @@
 MIN_FILE_SIZE_FOR_SPLITTING_LOAD = 44_000_000_000
 CHUNK_LINE_SIZE = 60_000_000
 
+
 def setup_duckdb(duckdb_filename, duckdb_config=None):
     """
     Set up a DuckDB instance using the settings in the config.
@@ -28,7 +29,7 @@ def setup_duckdb(duckdb_filename, duckdb_config=None):
 
     # Apply some Babel-wide settings to DuckDB.
     config = get_config()
-    if 'tmp_directory' in config:
+    if "tmp_directory" in config:
         db.execute(f"SET temp_directory = '{config['tmp_directory']}'")
         db.execute("SET max_temp_directory_size = '500GB';")
 
@@ -75,7 +76,8 @@ def export_compendia_to_parquet(compendium_filename, clique_parquet_filename, du
         if compendium_filesize < MIN_FILE_SIZE_FOR_SPLITTING_LOAD:
             # This seems to be around the threshold where 500G is inadequate on Hatteras. So let's try splitting it.
             logger.info(f"Loading {compendium_filename} into DuckDB (size {compendium_filesize}) in a single direct ingest.")
-            db.execute("""INSERT INTO Node
+            db.execute(
+                """INSERT INTO Node
                           WITH extracted AS (
                               SELECT json_extract_string(identifier_row.value, ['i', 'l', 'd', 't']) AS extracted_list
                               FROM read_json($1, format='newline_delimited') AS clique,
@@ -87,14 +89,16 @@ def export_compendia_to_parquet(compendium_filename, clique_parquet_filename, du
                               LOWER(label) AS label_lc,
                               extracted_list[3] AS description,
                               extracted_list[4] AS taxa
-                          FROM extracted""", [compendium_filename])
+                          FROM extracted""",
+                [compendium_filename],
+            )
         else:
             logger.info(f"Loading {compendium_filename} into DuckDB (size {compendium_filesize}) in multiple chunks of {CHUNK_LINE_SIZE:,} lines:")
             chunk_filenames = []
             lines_added = 0
             lines_added_file = 0
             output_file = None
-            with open(compendium_filename, "r", encoding="utf-8") as inf:
+            with open(compendium_filename, encoding="utf-8") as inf:
                 for line in inf:
                     if output_file is None:
                         output_file = tempfile.NamedTemporaryFile(delete=False, mode="w", encoding="utf-8")
@@ -115,7 +119,8 @@ def export_compendia_to_parquet(compendium_filename, clique_parquet_filename, du
             logger.info(f"Loaded {len(chunk_filenames)} containing {lines_added:,} lines into chunk files.")
             for chunk_filename in chunk_filenames:
                 # TODO: maybe add the PREFIX in a different column here so we can SELECT on that later?
-                db.execute("""INSERT INTO Node
+                db.execute(
+                    """INSERT INTO Node
                               WITH extracted AS (
                                   SELECT json_extract_string(identifier_row.value, ['i', 'l', 'd', 't']) AS extracted_list
                                   FROM read_json($1, format='newline_delimited') AS clique,
@@ -127,7 +132,9 @@ def export_compendia_to_parquet(compendium_filename, clique_parquet_filename, du
                                   LOWER(label) AS label_lc,
                                   extracted_list[3] AS description,
                                   extracted_list[4] AS taxa
-                              FROM extracted""", [chunk_filename])
+                              FROM extracted""",
+                    [chunk_filename],
+                )
                 logger.info(f" - Loaded chunk file {chunk_filename} into DuckDB.")
                 os.remove(chunk_filename)
                 logger.info(f" - Deleted chunk file {chunk_filename}.")
@@ -135,7 +142,7 @@ def export_compendia_to_parquet(compendium_filename, clique_parquet_filename, du
             logger.info(f"Completed loading {compendium_filename} into DuckDB.")
             logger.info(f" - Line count: {lines_added:,}.")
 
-            node_count = db.execute('SELECT COUNT(*) FROM Node').fetchone()[0]
+            node_count = db.execute("SELECT COUNT(*) FROM Node").fetchone()[0]
             logger.info(f" - Identifier count: {node_count:,}.")
 
         db.table("Node").write_parquet(node_parquet_filename)
@@ -144,22 +151,28 @@ def export_compendia_to_parquet(compendium_filename, clique_parquet_filename, du
         db.sql("""CREATE TABLE Clique
                 (clique_leader STRING, preferred_name STRING, clique_identifier_count INT, biolink_type STRING,
                 information_content FLOAT)""")
-        db.execute("""INSERT INTO Clique SELECT
+        db.execute(
+            """INSERT INTO Clique SELECT
                         json_extract_string(identifiers, '$[0].i') AS clique_leader,
                         preferred_name,
                         len(identifiers) AS clique_identifier_count,
                         type AS biolink_type,
                         ic AS information_content
-                    FROM read_json(?, format='newline_delimited')""", [compendium_filename])
+                    FROM read_json(?, format='newline_delimited')""",
+            [compendium_filename],
+        )
         db.table("Clique").write_parquet(clique_parquet_filename)
 
         # Step 2. Create an Edge table with all the clique/CURIE relationships from this file.
         db.sql("CREATE TABLE Edge (clique_leader STRING, curie STRING, conflation STRING)")
-        db.execute("""INSERT INTO Edge SELECT
+        db.execute(
+            """INSERT INTO Edge SELECT
                 json_extract_string(identifiers, '$[0].i') AS clique_leader,
                 UNNEST(json_extract_string(identifiers, '$[*].i')) AS curie,
                 'None' AS conflation
-            FROM read_json(?, format='newline_delimited')""", [compendium_filename])
+            FROM read_json(?, format='newline_delimited')""",
+            [compendium_filename],
+        )
         db.table("Edge").write_parquet(edge_parquet_filename)
 
 
diff --git a/src/exporters/kgx.py b/src/exporters/kgx.py
index 551374f4..c6954b9f 100644
--- a/src/exporters/kgx.py
+++ b/src/exporters/kgx.py
@@ -5,10 +5,10 @@
 import gzip
 import hashlib
 import json
+import logging
 import os
 from itertools import combinations
 
-import logging
 from src.util import LoggingUtil, get_memory_usage_summary
 
 # Default logger for this file.
@@ -46,7 +46,7 @@ def convert_compendium_to_kgx(compendium_filename, kgx_nodes_filename, kgx_edges
     os.makedirs(os.path.dirname(kgx_edges_filename), exist_ok=True)
 
     # Open the compendium file for reading.
-    with open(compendium_filename, "r", encoding="utf-8") as compendium:
+    with open(compendium_filename, encoding="utf-8") as compendium:
         # Open the nodes and edges files for writing.
         with gzip.open(kgx_nodes_filename, "wt", encoding="utf-8") as node_file, gzip.open(kgx_edges_filename, "wt", encoding="utf-8") as edge_file:
             # set the flag for suppressing the first ",\n" in the written data
diff --git a/src/exporters/sapbert.py b/src/exporters/sapbert.py
index 3b7a521a..84bfbb68 100644
--- a/src/exporters/sapbert.py
+++ b/src/exporters/sapbert.py
@@ -10,12 +10,11 @@
 import gzip
 import itertools
 import json
+import logging
 import os
 import random
 import re
 
-import logging
-
 from src.util import LoggingUtil
 
 # Default logger for this file.
diff --git a/src/make_cliques.py b/src/make_cliques.py
index 12234905..770660d4 100644
--- a/src/make_cliques.py
+++ b/src/make_cliques.py
@@ -1,5 +1,6 @@
-import json
 import ast
+import json
+
 # Starting with a conflation file, and a set of compendia, create a new compendium merging conflated cliques.
 
 
@@ -9,7 +10,7 @@ def get_conflation_ids(conffilename):
     return a set of all the ids in the file.
     """
     ids = set()
-    with open(conffilename, "r") as inf:
+    with open(conffilename) as inf:
         for line in inf:
             ids.update(ast.literal_eval(line.strip()))
     if "RXCUI:1092396" in ids:
@@ -30,7 +31,7 @@ def get_compendia_names(cdir, compendia, ids):
     """
     id2name = {}
     for compendium in compendia:
-        with open(f"{cdir}/{compendium}", "r") as inf:
+        with open(f"{cdir}/{compendium}") as inf:
             print(compendium)
             for line in inf:
                 row = json.loads(line)
@@ -56,7 +57,7 @@ def label_cliques(conflation_fname, id2name):
     [{"i": "RXCUI:1092396", "l": "Acetinophem"}, {"i": "RXCUI:849078", "l": "100 mg Tylenol"}, ...]
     """
     print(len(id2name))
-    with open("labeled.txt", "w") as outf, open(conflation_fname, "r") as conflation:
+    with open("labeled.txt", "w") as outf, open(conflation_fname) as conflation:
         for line in conflation:
             clique = []
             ids = ast.literal_eval(line)
diff --git a/src/metadata/provenance.py b/src/metadata/provenance.py
index 7505543b..e375a8aa 100644
--- a/src/metadata/provenance.py
+++ b/src/metadata/provenance.py
@@ -23,7 +23,7 @@ def write_concord_metadata(filename, *, name, concord_filename, url="", descript
     distinct_curies = set()
     predicate_counts = defaultdict(int)
     curie_prefix_counts = defaultdict(int)
-    with open(concord_filename, "r") as concordf:
+    with open(concord_filename) as concordf:
         for line in concordf:
             row = line.strip().split("\t")
             if len(row) != 3:
@@ -71,7 +71,7 @@ def write_combined_metadata(
             )
             combined_from_filenames = [combined_from_filenames]
         for metadata_yaml in combined_from_filenames:
-            with open(metadata_yaml, "r") as metaf:
+            with open(metadata_yaml) as metaf:
                 metadata_block = yaml.safe_load(metaf)
                 if metadata_block is None or metadata_block == {}:
                     raise ValueError("Metadata file {metadata_yaml} is empty.")
diff --git a/src/node.py b/src/node.py
index 4d18584f..e52fce13 100644
--- a/src/node.py
+++ b/src/node.py
@@ -7,16 +7,16 @@
 
 import curies
 
+from src.LabeledID import LabeledID
+from src.prefixes import PUBCHEMCOMPOUND
 from src.util import (
     Text,
-    get_config,
     get_biolink_model_toolkit,
     get_biolink_prefix_map,
+    get_config,
     get_logger,
     get_memory_usage_summary,
 )
-from src.LabeledID import LabeledID
-from src.prefixes import PUBCHEMCOMPOUND
 
 logger = get_logger(__name__)
 
@@ -52,7 +52,7 @@ def __init__(self, syndir):
         for common_synonyms_file in self.config["common"]["synonyms"]:
             common_synonyms_path = os.path.join(self.config["download_directory"], "common", common_synonyms_file)
             count_common_file_synonyms = 0
-            with open(common_synonyms_path, "r") as synonymsf:
+            with open(common_synonyms_path) as synonymsf:
                 # Note that these files may contain ANY prefix -- we should only fallback to this if we have no other
                 # option.
                 for line in synonymsf:
@@ -70,14 +70,14 @@ def load_synonyms(self, prefix):
         count_labels = 0
         count_synonyms = 0
         if os.path.exists(labelfname):
-            with open(labelfname, "r") as inf:
+            with open(labelfname) as inf:
                 for line in inf:
                     x = line.strip().split("\t")
                     lbs[x[0]].add(("http://www.geneontology.org/formats/oboInOwl#hasExactSynonym", x[1]))
                     count_labels += 1
         synfname = os.path.join(self.synonym_dir, prefix, "synonyms")
         if os.path.exists(synfname):
-            with open(synfname, "r") as inf:
+            with open(synfname) as inf:
                 for line in inf:
                     x = line.strip().split("\t")
                     if len(x) < 3:
@@ -113,7 +113,7 @@ def __init__(self, rootdir):
         for common_descriptions_file in self.config["common"]["descriptions"]:
             common_descriptions_path = os.path.join(self.config["download_directory"], "common", common_descriptions_file)
             count_common_file_descriptions = 0
-            with open(common_descriptions_path, "r") as descriptionsf:
+            with open(common_descriptions_path) as descriptionsf:
                 # Note that these files may contain ANY CURIE -- we should only fallback to this if we have no other
                 # option.
                 for line in descriptionsf:
@@ -130,7 +130,7 @@ def load_descriptions(self, prefix):
         descfname = os.path.join(self.root_dir, prefix, "descriptions")
         desc_count = 0
         if os.path.exists(descfname):
-            with open(descfname, "r") as inf:
+            with open(descfname) as inf:
                 for line in inf:
                     x = line.strip().split("\t")
                     descs[x[0]].add("\t".join(x[1:]))
@@ -230,7 +230,7 @@ def load_prefix(self, prefix):
         logger.info(f"Reading records from {tsv_filename} into memory to load into SQLite: {get_memory_usage_summary()}")
         records = []
         record_count = 0
-        with open(tsv_filename, "r") as inf:
+        with open(tsv_filename) as inf:
             for line in inf:
                 x = line.strip().split("\t", maxsplit=1)
                 records.append([x[0].upper(), x[1]])
@@ -344,7 +344,7 @@ def __init__(self, ic_file):
         ubergraph_iri_stem_to_prefix_map = curies.Converter.from_reverse_prefix_map(config["ubergraph_iri_stem_to_prefix_map"])
 
         count_by_prefix = defaultdict(int)
-        with open(ic_file, "r") as inf:
+        with open(ic_file) as inf:
             for line in inf:
                 x = line.strip().split("\t")
                 # We talk in CURIEs, but the infores download is in URLs. We can use the Biolink
@@ -502,7 +502,7 @@ def load_extra_labels(self, prefix):
         labelfname = os.path.join(self.label_dir, prefix, "labels")
         lbs = {}
         if os.path.exists(labelfname):
-            with open(labelfname, "r") as inf:
+            with open(labelfname) as inf:
                 for line in inf:
                     x = line.strip().split("\t")
                     lbs[x[0]] = x[1]
@@ -518,7 +518,7 @@ def apply_labels(self, input_identifiers, labels):
             for common_labels_file in config["common"]["labels"]:
                 common_labels_path = os.path.join(config["download_directory"], "common", common_labels_file)
                 count_common_file_labels = 0
-                with open(common_labels_path, "r") as labelf:
+                with open(common_labels_path) as labelf:
                     # Note that these files may contain ANY prefix -- we should only fallback to this if we have no other
                     # option.
                     for line in labelf:
diff --git a/src/reports/compendia_per_file_reports.py b/src/reports/compendia_per_file_reports.py
index cf54dd86..349492c5 100644
--- a/src/reports/compendia_per_file_reports.py
+++ b/src/reports/compendia_per_file_reports.py
@@ -58,7 +58,7 @@ def generate_content_report_for_compendium(compendium_path, report_path):
     """
 
     with open(report_path, "w") as report_file:
-        with open(compendium_path, "r") as compendium_file:
+        with open(compendium_path) as compendium_file:
             # This is a JSONL file, so we need to read each line as a JSON object.
 
             # Track CURIE breakdowns for this compendium.
@@ -148,7 +148,7 @@ def summarize_content_report_for_compendia(compendia_report_paths, summary_path)
 
         # Read all the summary reports -- these are small, so we can just read them all in.
         for report_path in compendia_report_paths:
-            with open(report_path, "r") as report_file:
+            with open(report_path) as report_file:
                 report = json.load(report_file)
 
             # name = report['name']
diff --git a/src/reports/duckdb_reports.py b/src/reports/duckdb_reports.py
index 50a89bfa..09b2a29c 100644
--- a/src/reports/duckdb_reports.py
+++ b/src/reports/duckdb_reports.py
@@ -1,13 +1,13 @@
-import csv
 import json
 import os
-from collections import Counter, defaultdict
+from collections import defaultdict
 
 from src import util
 from src.exporters.duckdb_exporters import setup_duckdb
 
 logger = util.get_logger(__name__)
 
+
 def check_for_identically_labeled_cliques(parquet_root, duckdb_filename, identically_labeled_cliques_tsv, duckdb_config=None):
     """
     Generate a list of identically labeled cliques.
@@ -174,7 +174,7 @@ def generate_curie_report(parquet_root, duckdb_filename, curie_report_json, duck
 
     # Add total counts back in.
     for curie_prefix in by_curie_prefix_results.keys():
-        by_curie_prefix_results[curie_prefix]['_totals'] = prefix_totals_report_by_curie_prefix[curie_prefix]
+        by_curie_prefix_results[curie_prefix]["_totals"] = prefix_totals_report_by_curie_prefix[curie_prefix]
 
     with open(curie_report_json, "w") as fout:
         json.dump(by_curie_prefix_results, fout, indent=2, sort_keys=True)
@@ -260,16 +260,18 @@ def generate_clique_leaders_report(parquet_root, duckdb_filename, by_clique_repo
     # Step 3. Add total counts back in.
     for filename, clique_leader_prefix_entries in clique_leaders_by_filename.items():
         if filename in clique_totals_by_curie_prefix:
-            clique_leaders_by_filename[filename]['_totals'] = clique_totals_by_curie_prefix[filename]
+            clique_leaders_by_filename[filename]["_totals"] = clique_totals_by_curie_prefix[filename]
 
     # Step 4. Write out by-clique report in JSON.
     with open(by_clique_report_json, "w") as fout:
-        json.dump(clique_leaders_by_filename,
+        json.dump(
+            clique_leaders_by_filename,
             fout,
             indent=2,
             sort_keys=True,
         )
 
+
 def get_label_distribution(duckdb_filename, output_filename):
     db = setup_duckdb(duckdb_filename)
 
diff --git a/src/reports/report_tables.py b/src/reports/report_tables.py
index fc4f375e..758f60e5 100644
--- a/src/reports/report_tables.py
+++ b/src/reports/report_tables.py
@@ -15,7 +15,6 @@
 #
 import csv
 import json
-from collections import defaultdict
 
 
 def generate_prefix_table(prefix_report_json: str, prefix_report_table_csv: str):
@@ -26,7 +25,7 @@ def generate_prefix_table(prefix_report_json: str, prefix_report_table_csv: str)
     :param prefix_report_table_csv: The report table CSV file to generate.
     """
 
-    with open(prefix_report_json, 'r') as f:
+    with open(prefix_report_json) as f:
         prefix_report = json.load(f)
 
     curie_entries = []
@@ -38,130 +37,112 @@ def generate_prefix_table(prefix_report_json: str, prefix_report_table_csv: str)
                 raise ValueError(f"Duplicate filename {filename} for prefix {prefix}!")
 
             filename_entries[filename] = {
-                'prefix': prefix,
-                'curie_count': entry['curie_count'],
-                'curie_distinct_count': entry['curie_distinct_count'],
+                "prefix": prefix,
+                "curie_count": entry["curie_count"],
+                "curie_distinct_count": entry["curie_distinct_count"],
             }
 
-        if '_totals' not in filename_entries:
+        if "_totals" not in filename_entries:
             raise ValueError(f"No totals entry for prefix {prefix}!")
 
-        sorted_entries = sorted(filename_entries.items(), key=lambda x: x[1]['curie_distinct_count'], reverse=True)
+        sorted_entries = sorted(filename_entries.items(), key=lambda x: x[1]["curie_distinct_count"], reverse=True)
         filename_rows = []
         for filename, entry in sorted_entries:
-            if filename == '_totals':
+            if filename == "_totals":
                 continue
 
-            if entry['curie_count'] == entry['curie_distinct_count']:
+            if entry["curie_count"] == entry["curie_distinct_count"]:
                 filename_rows.append(f"- {filename}: {entry['curie_count']:,} CURIEs")
             else:
                 filename_rows.append(f"- {filename}: {entry['curie_count']:,} CURIEs ({entry['curie_distinct_count']:,} distinct)")
 
-        curie_entries.append({
-            'prefix': prefix,
-            'curie_count': filename_entries['_totals']['curie_count'],
-            'curie_distinct_count': filename_entries['_totals']['curie_distinct_count'],
-            'filenames': "\n".join(filename_rows),
-        })
+        curie_entries.append(
+            {
+                "prefix": prefix,
+                "curie_count": filename_entries["_totals"]["curie_count"],
+                "curie_distinct_count": filename_entries["_totals"]["curie_distinct_count"],
+                "filenames": "\n".join(filename_rows),
+            }
+        )
 
     # Before writing it out, sort by distinct CURIE count descending.
-    with open(prefix_report_table_csv, 'w') as f:
-        writer = csv.DictWriter(f, [
-            'Prefix',
-            'CURIE count',
-            'Distinct CURIE count',
-            'Filenames'
-        ])
+    with open(prefix_report_table_csv, "w") as f:
+        writer = csv.DictWriter(f, ["Prefix", "CURIE count", "Distinct CURIE count", "Filenames"])
         writer.writeheader()
 
-        for entry in sorted(curie_entries, key=lambda x: x['curie_distinct_count'], reverse=True):
+        for entry in sorted(curie_entries, key=lambda x: x["curie_distinct_count"], reverse=True):
             row = {
-                'Prefix': entry['prefix'],
-                'CURIE count': "{:,}".format(entry['curie_count']),
-                'Distinct CURIE count': "{:,}".format(entry['curie_distinct_count']),
-                'Filenames': entry['filenames'],
+                "Prefix": entry["prefix"],
+                "CURIE count": "{:,}".format(entry["curie_count"]),
+                "Distinct CURIE count": "{:,}".format(entry["curie_distinct_count"]),
+                "Filenames": entry["filenames"],
             }
 
             writer.writerow(row)
 
+
 def generate_cliques_table(cliques_report_json: str, cliques_table_csv: str):
-    with open(cliques_report_json, 'r') as f:
+    with open(cliques_report_json) as f:
         cliques_report = json.load(f)
 
     # To improve the table somewhat, we'll include pipeline descriptions that group filenames.
     pipeline_descriptions = {
-        'Anatomy': {
-            'description': 'Anatomical entities at all scales, from brains to endothelium to pancreatic beta cells',
-            'filenames': [
-                'AnatomicalEntity',
-                'Cell',
-                'CellularComponent',
-                'GrossAnatomicalStructure'
-            ],
+        "Anatomy": {
+            "description": "Anatomical entities at all scales, from brains to endothelium to pancreatic beta cells",
+            "filenames": ["AnatomicalEntity", "Cell", "CellularComponent", "GrossAnatomicalStructure"],
         },
-        'CellLine': {
-            'description': 'Cell lines from different species',
-            'filenames': ['CellLine'],
+        "CellLine": {
+            "description": "Cell lines from different species",
+            "filenames": ["CellLine"],
         },
-        'Chemicals': {
-            'description': 'All kinds of chemicals, including drugs, small molecules, molecular mixtures, and so on',
-            'filenames': [
-                'MolecularMixture',
-                'SmallMolecule',
-                'Polypeptide',
-                'ComplexMolecularMixture',
-                'ChemicalEntity',
-                'ChemicalMixture',
-                'Drug'
-            ],
+        "Chemicals": {
+            "description": "All kinds of chemicals, including drugs, small molecules, molecular mixtures, and so on",
+            "filenames": ["MolecularMixture", "SmallMolecule", "Polypeptide", "ComplexMolecularMixture", "ChemicalEntity", "ChemicalMixture", "Drug"],
         },
-        'DiseasePhenotype': {
-            'description': 'Conflation of drugs with their active ingredients as chemicals',
-            'filenames': [
-                'Disease',
-                'PhenotypicFeature'
-            ],
+        "DiseasePhenotype": {
+            "description": "Conflation of drugs with their active ingredients as chemicals",
+            "filenames": ["Disease", "PhenotypicFeature"],
         },
-        'DrugChemical': {
-            'description': 'Conflation of drugs with their active ingredients as chemicals',
-            'filenames': [],
+        "DrugChemical": {
+            "description": "Conflation of drugs with their active ingredients as chemicals",
+            "filenames": [],
         },
-        'Gene': {
-            'description': 'Genes from all species',
-            'filenames': ['Gene'],
+        "Gene": {
+            "description": "Genes from all species",
+            "filenames": ["Gene"],
         },
-        'GeneFamily': {
-            'description': 'Families of genes',
-            'filenames': ['GeneFamily'],
+        "GeneFamily": {
+            "description": "Families of genes",
+            "filenames": ["GeneFamily"],
         },
-        'GeneProtein': {
-            'description': 'Conflation of genes with the proteins they code for.',
-            'filenames': [],
+        "GeneProtein": {
+            "description": "Conflation of genes with the proteins they code for.",
+            "filenames": [],
         },
-        'Leftover UMLS': {
-            'description': 'A special pipeline that adds every UMLS concept not already added elsewhere in Babel',
-            'filenames': ['umls'],
+        "Leftover UMLS": {
+            "description": "A special pipeline that adds every UMLS concept not already added elsewhere in Babel",
+            "filenames": ["umls"],
         },
-        'Macromolecular Complex': {
-            'description': '',
-            'filenames': ['MacromolecularComplex'],
+        "Macromolecular Complex": {
+            "description": "",
+            "filenames": ["MacromolecularComplex"],
         },
-        'ProcessActivityPathway': {
-            'description': 'Biological processes, activities and pathways',
-            'filenames': ['Pathway', 'BiologicalProcess', 'MolecularActivity'],
+        "ProcessActivityPathway": {
+            "description": "Biological processes, activities and pathways",
+            "filenames": ["Pathway", "BiologicalProcess", "MolecularActivity"],
         },
-        'Protein': {
-            'description': 'Proteins from all species',
-            'filenames': ['Protein'],
+        "Protein": {
+            "description": "Proteins from all species",
+            "filenames": ["Protein"],
         },
-        'Publications': {
-            'description': 'All publications from PubMed',
-            'filenames': ['Publication'],
+        "Publications": {
+            "description": "All publications from PubMed",
+            "filenames": ["Publication"],
+        },
+        "Taxon": {
+            "description": "Taxonomic entities, including species, genera, families, and so on from the NCBI Taxonomy",
+            "filenames": ["OrganismTaxon"],
         },
-        'Taxon': {
-            'description': 'Taxonomic entities, including species, genera, families, and so on from the NCBI Taxonomy',
-            'filenames': ['OrganismTaxon'],
-        }
     }
 
     clique_leader_entries = {}
@@ -171,18 +152,16 @@ def generate_cliques_table(cliques_report_json: str, cliques_table_csv: str):
         totals = {}
 
         for clique_leader_prefix, inner2 in inner.items():
-            if clique_leader_prefix == '_totals':
+            if clique_leader_prefix == "_totals":
                 totals = inner2
                 continue
 
             clique_leader_prefixes.add(clique_leader_prefix)
 
             for curie_prefix, entry in inner2.items():
-                curie_prefix_entries.append({
-                    'curie_prefix': curie_prefix,
-                    'curie_count': entry['curie_count'],
-                    'distinct_curie_count': entry['distinct_curie_count']
-                })
+                curie_prefix_entries.append(
+                    {"curie_prefix": curie_prefix, "curie_count": entry["curie_count"], "distinct_curie_count": entry["distinct_curie_count"]}
+                )
 
         if not totals:
             raise ValueError(f"No totals entry for filename {filename}!")
@@ -190,71 +169,80 @@ def generate_cliques_table(cliques_report_json: str, cliques_table_csv: str):
         if filename in clique_leader_entries:
             raise ValueError(f"Duplicate filename {filename}!")
 
-        curie_prefixes = map(lambda e: f"{e['curie_prefix']}", sorted(curie_prefix_entries, key=lambda x: x['distinct_curie_count'], reverse=True))
+        curie_prefixes = map(lambda e: f"{e['curie_prefix']}", sorted(curie_prefix_entries, key=lambda x: x["distinct_curie_count"], reverse=True))
         unique_curie_prefixes = []
         for prefix in curie_prefixes:
             if prefix not in unique_curie_prefixes:
                 unique_curie_prefixes.append(prefix)
 
         clique_leader_entries[filename] = {
-            'curie_count': totals['curie_count'],
-            'distinct_curie_count': totals['distinct_curie_count'],
-            'total_synonyms': '',
-            'clique_leader_prefixes': ", ".join(sorted(clique_leader_prefixes)),
-            'curie_prefixes': ", ".join(unique_curie_prefixes),
+            "curie_count": totals["curie_count"],
+            "distinct_curie_count": totals["distinct_curie_count"],
+            "total_synonyms": "",
+            "clique_leader_prefixes": ", ".join(sorted(clique_leader_prefixes)),
+            "curie_prefixes": ", ".join(unique_curie_prefixes),
         }
 
     filenames_not_written = set(clique_leader_entries.keys())
-    with open(cliques_table_csv, 'w') as f:
-        writer = csv.DictWriter(f, [
-            'Pipeline',
-            'Description',
-            'Biolink Types',
-            'Number of CURIEs',
-            'Number of distinct CURIEs',
-            'Clique leader prefixes',
-            'CURIE prefixes',
-        ])
+    with open(cliques_table_csv, "w") as f:
+        writer = csv.DictWriter(
+            f,
+            [
+                "Pipeline",
+                "Description",
+                "Biolink Types",
+                "Number of CURIEs",
+                "Number of distinct CURIEs",
+                "Clique leader prefixes",
+                "CURIE prefixes",
+            ],
+        )
         writer.writeheader()
 
         for pipeline, entry in pipeline_descriptions.items():
-            description = entry['description']
+            description = entry["description"]
 
-            filenames = entry.get('filenames', [])
+            filenames = entry.get("filenames", [])
             if len(filenames) == 0:
-                writer.writerow({
-                    'Pipeline': pipeline,
-                    'Description': description,
-                    'Biolink Types': 'N/A',
-                    'Number of CURIEs': '',
-                    'Number of distinct CURIEs': '',
-                    'Clique leader prefixes': '',
-                    'CURIE prefixes': '',
-                })
+                writer.writerow(
+                    {
+                        "Pipeline": pipeline,
+                        "Description": description,
+                        "Biolink Types": "N/A",
+                        "Number of CURIEs": "",
+                        "Number of distinct CURIEs": "",
+                        "Clique leader prefixes": "",
+                        "CURIE prefixes": "",
+                    }
+                )
 
             for filename in filenames:
                 if filename not in clique_leader_entries:
                     raise ValueError(f"Pipeline {pipeline} references filename {filename} that isn't in clique_leader_entries!")
 
-                writer.writerow({
-                    'Pipeline': pipeline,
-                    'Description': description,
-                    'Biolink Types': filename,
-                    'Number of CURIEs': "{:,}".format(clique_leader_entries[filename]['curie_count']),
-                    'Number of distinct CURIEs': "{:,}".format(clique_leader_entries[filename]['distinct_curie_count']),
-                    'Clique leader prefixes': clique_leader_entries[filename]['clique_leader_prefixes'],
-                    'CURIE prefixes': clique_leader_entries[filename]['curie_prefixes'],
-                })
+                writer.writerow(
+                    {
+                        "Pipeline": pipeline,
+                        "Description": description,
+                        "Biolink Types": filename,
+                        "Number of CURIEs": "{:,}".format(clique_leader_entries[filename]["curie_count"]),
+                        "Number of distinct CURIEs": "{:,}".format(clique_leader_entries[filename]["distinct_curie_count"]),
+                        "Clique leader prefixes": clique_leader_entries[filename]["clique_leader_prefixes"],
+                        "CURIE prefixes": clique_leader_entries[filename]["curie_prefixes"],
+                    }
+                )
 
                 filenames_not_written.remove(filename)
 
         for filename in sorted(filenames_not_written):
-            writer.writerow({
-                'Pipeline': '**NONE**',
-                'Description': '',
-                'Biolink Types': filename,
-                'Number of CURIEs': "{:,}".format(clique_leader_entries[filename]['curie_count']),
-                'Number of distinct CURIEs': "{:,}".format(clique_leader_entries[filename]['distinct_curie_count']),
-                'Clique leader prefixes': clique_leader_entries[filename]['clique_leader_prefixes'],
-                'CURIE prefixes': clique_leader_entries[filename]['curie_prefixes'],
-            })
+            writer.writerow(
+                {
+                    "Pipeline": "**NONE**",
+                    "Description": "",
+                    "Biolink Types": filename,
+                    "Number of CURIEs": "{:,}".format(clique_leader_entries[filename]["curie_count"]),
+                    "Number of distinct CURIEs": "{:,}".format(clique_leader_entries[filename]["distinct_curie_count"]),
+                    "Clique leader prefixes": clique_leader_entries[filename]["clique_leader_prefixes"],
+                    "CURIE prefixes": clique_leader_entries[filename]["curie_prefixes"],
+                }
+            )
diff --git a/src/sdfreader.py b/src/sdfreader.py
index 7c991403..ef1fd861 100644
--- a/src/sdfreader.py
+++ b/src/sdfreader.py
@@ -1,7 +1,7 @@
 def read_sdf(infile, interesting_keys):
     """Given an sdf file name and a set of keys that we'd like to extract, return a dictionary going
     chebiid -> {properties} where the properties are chosen from the interesting keys"""
-    with open(infile, "r") as inf:
+    with open(infile) as inf:
         chebisdf = inf.read()
     lines = chebisdf.split("\n")
     chunk = []
diff --git a/src/snakefiles/anatomy.snakefile b/src/snakefiles/anatomy.snakefile
index 7edbbc93..49e166bf 100644
--- a/src/snakefiles/anatomy.snakefile
+++ b/src/snakefiles/anatomy.snakefile
@@ -52,7 +52,7 @@ rule anatomy_umls_ids:
 
 
 rule get_anatomy_obo_relationships:
-    retries: 10 # Ubergraph sometimes fails mid-download, and then we need to retry.
+    retries: 10  # Ubergraph sometimes fails mid-download, and then we need to retry.
     output:
         config["intermediate_directory"] + "/anatomy/concords/UBERON",
         config["intermediate_directory"] + "/anatomy/concords/CL",
diff --git a/src/snakefiles/chemical.snakefile b/src/snakefiles/chemical.snakefile
index 8a09ac93..dfbb101f 100644
--- a/src/snakefiles/chemical.snakefile
+++ b/src/snakefiles/chemical.snakefile
@@ -101,7 +101,7 @@ rule chemical_drugcentral_ids:
 
 
 rule chemical_chebi_ids:
-    retries: 10 # Ubergraph sometimes fails mid-download, and then we need to retry.
+    retries: 10  # Ubergraph sometimes fails mid-download, and then we need to retry.
     output:
         outfile=config["intermediate_directory"] + "/chemicals/ids/CHEBI",
     run:
diff --git a/src/snakefiles/datacollect.snakefile b/src/snakefiles/datacollect.snakefile
index 44540911..5d210e79 100644
--- a/src/snakefiles/datacollect.snakefile
+++ b/src/snakefiles/datacollect.snakefile
@@ -212,7 +212,7 @@ rule get_obo_labels:
             download_directory=config["download_directory"],
             prefix=config["generate_dirs_for_labels_and_synonyms_prefixes"],
         ),
-    retries: 10 # Ubergraph sometimes fails mid-download, and then we need to retry.
+    retries: 10  # Ubergraph sometimes fails mid-download, and then we need to retry.
     run:
         obo.pull_uber_labels(output.obo_labels, output.generated_labels)
 
@@ -228,7 +228,7 @@ rule get_obo_synonyms:
             download_directory=config["download_directory"],
             prefix=config["generate_dirs_for_labels_and_synonyms_prefixes"],
         ),
-    retries: 10 # Ubergraph sometimes fails mid-download, and then we need to retry.
+    retries: 10  # Ubergraph sometimes fails mid-download, and then we need to retry.
     run:
         obo.pull_uber_synonyms(output.obo_synonyms, output.generated_synonyms)
 
@@ -236,10 +236,11 @@ rule get_obo_synonyms:
 rule get_obo_descriptions:
     output:
         obo_descriptions=config["download_directory"] + "/common/ubergraph/descriptions.jsonl",
-    retries: 10 # Ubergraph sometimes fails mid-download, and then we need to retry.
+    retries: 10  # Ubergraph sometimes fails mid-download, and then we need to retry.
     run:
         obo.pull_uber_descriptions(output.obo_descriptions)
 
+
 rule get_icrdf:
     input:
         # Ideally, we would correctly mark all the dependencies for Ubergraph labels, synonyms and descriptions
@@ -251,7 +252,7 @@ rule get_icrdf:
         config["download_directory"] + "/common/ubergraph/descriptions.jsonl",
     output:
         icrdf_filename=config["download_directory"] + "/icRDF.tsv",
-    retries: 10 # Ubergraph sometimes fails mid-download, and then we need to retry.
+    retries: 10  # Ubergraph sometimes fails mid-download, and then we need to retry.
     run:
         obo.pull_uber_icRDF(output.icrdf_filename)
 
diff --git a/src/snakefiles/duckdb.snakefile b/src/snakefiles/duckdb.snakefile
index 8c0474ac..ec7a1534 100644
--- a/src/snakefiles/duckdb.snakefile
+++ b/src/snakefiles/duckdb.snakefile
@@ -87,11 +87,16 @@ rule check_for_identically_labeled_cliques:
         duckdb_filename=temp(config["output_directory"] + "/duckdb/duckdbs/identically_labeled_clique.duckdb"),
         identically_labeled_cliques_tsv=config["output_directory"] + "/reports/duckdb/identically_labeled_cliques.tsv.gz",
     run:
-        src.reports.duckdb_reports.check_for_identically_labeled_cliques(params.parquet_dir, output.duckdb_filename, output.identically_labeled_cliques_tsv, {
-            'memory_limit': '512G',
-            'threads': 2,
-            'preserve_insertion_order': False,
-        })
+        src.reports.duckdb_reports.check_for_identically_labeled_cliques(
+            params.parquet_dir,
+            output.duckdb_filename,
+            output.identically_labeled_cliques_tsv,
+            {
+                "memory_limit": "512G",
+                "threads": 2,
+                "preserve_insertion_order": False,
+            },
+        )
 
 
 rule check_for_duplicate_curies:
@@ -106,11 +111,16 @@ rule check_for_duplicate_curies:
         duckdb_filename=temp(config["output_directory"] + "/duckdb/duckdbs/duplicate_curies.duckdb"),
         duplicate_curies=config["output_directory"] + "/reports/duckdb/duplicate_curies.tsv",
     run:
-        src.reports.duckdb_reports.check_for_duplicate_curies(params.parquet_dir, output.duckdb_filename, output.duplicate_curies, {
-            'memory_limit': '1500G',
-            'threads': 1,
-            'preserve_insertion_order': False,
-        })
+        src.reports.duckdb_reports.check_for_duplicate_curies(
+            params.parquet_dir,
+            output.duckdb_filename,
+            output.duplicate_curies,
+            {
+                "memory_limit": "1500G",
+                "threads": 1,
+                "preserve_insertion_order": False,
+            },
+        )
 
 
 rule check_for_duplicate_clique_leaders:
@@ -125,11 +135,17 @@ rule check_for_duplicate_clique_leaders:
         duckdb_filename=temp(config["output_directory"] + "/duckdb/duckdbs/duplicate_clique_leaders.duckdb"),
         duplicate_clique_leaders_tsv=config["output_directory"] + "/reports/duckdb/duplicate_clique_leaders.tsv",
     run:
-        src.reports.duckdb_reports.check_for_duplicate_clique_leaders(params.parquet_dir, output.duckdb_filename, output.duplicate_clique_leaders_tsv, {
-            'memory_limit': '512G',
-            'threads': 2,
-            'preserve_insertion_order': False,
-        })
+        src.reports.duckdb_reports.check_for_duplicate_clique_leaders(
+            params.parquet_dir,
+            output.duckdb_filename,
+            output.duplicate_clique_leaders_tsv,
+            {
+                "memory_limit": "512G",
+                "threads": 2,
+                "preserve_insertion_order": False,
+            },
+        )
+
 
 rule generate_curie_report:
     resources:
@@ -144,12 +160,17 @@ rule generate_curie_report:
         duckdb_filename=temp(config["output_directory"] + "/duckdb/duckdbs/curie_report.duckdb"),
         curie_report_json=config["output_directory"] + "/reports/duckdb/curie_report.json",
     run:
-        src.reports.duckdb_reports.generate_curie_report(params.parquet_dir, output.duckdb_filename, output.curie_report_json, {
-            # 'memory_limit': '20G', -- this actually worked!
-            'memory_limit': '100G',
-            'threads': 5,
-            'preserve_insertion_order': False,
-        })
+        src.reports.duckdb_reports.generate_curie_report(
+            params.parquet_dir,
+            output.duckdb_filename,
+            output.curie_report_json,
+            {
+                # 'memory_limit': '20G', -- this actually worked!
+                "memory_limit": "100G",
+                "threads": 5,
+                "preserve_insertion_order": False,
+            },
+        )
 
 
 rule generate_clique_leader_report:
@@ -164,11 +185,16 @@ rule generate_clique_leader_report:
         duckdb_filename=temp(config["output_directory"] + "/duckdb/duckdbs/clique_leaders.duckdb"),
         clique_leaders_json=config["output_directory"] + "/reports/duckdb/clique_leaders.json",
     run:
-        src.reports.duckdb_reports.generate_clique_leaders_report(params.parquet_dir, output.duckdb_filename, output.clique_leaders_json, {
-            'memory_limit': '20G',
-            'threads': 3,
-            'preserve_insertion_order': False,
-        })
+        src.reports.duckdb_reports.generate_clique_leaders_report(
+            params.parquet_dir,
+            output.duckdb_filename,
+            output.clique_leaders_json,
+            {
+                "memory_limit": "20G",
+                "threads": 3,
+                "preserve_insertion_order": False,
+            },
+        )
 
 
 rule all_duckdb_reports:
diff --git a/src/snakefiles/reports.snakefile b/src/snakefiles/reports.snakefile
index 1999319e..116fe3f7 100644
--- a/src/snakefiles/reports.snakefile
+++ b/src/snakefiles/reports.snakefile
@@ -80,10 +80,12 @@ rule generate_summary_content_report_for_compendia:
     run:
         summarize_content_report_for_compendia(input.expected_content_reports, output.report_path)
 
+
 #
 # REPORT TABLES
 #
 
+
 # Generate a prefix table.
 rule generate_prefix_table:
     input:
@@ -93,6 +95,7 @@ rule generate_prefix_table:
     run:
         report_tables.generate_prefix_table(input.curie_report, output.prefix_table)
 
+
 # Generate a cliques table.
 rule generate_cliques_table:
     input:
@@ -102,6 +105,7 @@ rule generate_cliques_table:
     run:
         report_tables.generate_cliques_table(input.cliques_report, output.cliques_table)
 
+
 # Check that all the reports were built correctly.
 rule all_reports:
     input:
diff --git a/src/snakefiles/util.py b/src/snakefiles/util.py
index 1967b59e..77735809 100644
--- a/src/snakefiles/util.py
+++ b/src/snakefiles/util.py
@@ -1,6 +1,6 @@
 # Shared code used by Snakemake files
-import shutil
 import gzip
+import shutil
 
 import src.util
 
diff --git a/src/synonyms/synonymconflation.py b/src/synonyms/synonymconflation.py
index 174b205c..5ac4eb52 100644
--- a/src/synonyms/synonymconflation.py
+++ b/src/synonyms/synonymconflation.py
@@ -14,6 +14,7 @@
 
 logger = util.get_logger(__name__)
 
+
 # click.command()
 # click.option('--conflation-file', multiple=True, type=click.Path(exists=True))
 # click.option('--output', type=click.Path(exists=False), default='-')
@@ -41,7 +42,7 @@ def conflate_synonyms(synonym_files_gz, compendia_files, conflation_file, output
     # Step 1. Load all the conflations. We only need to work on these identifiers, so that simplifies our work.
     for conflation_filename in conflation_file:
         logger.info(f"Reading conflation file {conflation_filename}")
-        with open(conflation_filename, "r") as conflationf:
+        with open(conflation_filename) as conflationf:
             count_primary = 0
             count_secondary = 0
             for line in conflationf:
@@ -74,7 +75,7 @@ def conflate_synonyms(synonym_files_gz, compendia_files, conflation_file, output
 
     for compendium_filename in compendia_files:
         logger.info(f"Reading compendium file {compendium_filename}")
-        with open(compendium_filename, "r") as compendiumf:
+        with open(compendium_filename) as compendiumf:
             for line in compendiumf:
                 clique = json.loads(line)
                 identifiers = clique.get("identifiers", [])
diff --git a/src/triplestore.py b/src/triplestore.py
index 4031658c..4bf832e3 100644
--- a/src/triplestore.py
+++ b/src/triplestore.py
@@ -1,14 +1,15 @@
+import logging
 import os
-from src.util import LoggingUtil
-from SPARQLWrapper import SPARQLWrapper2, JSON, POSTDIRECTLY, POST
 from string import Template
 
-import logging
+from SPARQLWrapper import JSON, POST, POSTDIRECTLY, SPARQLWrapper2
+
+from src.util import LoggingUtil
 
 logger = LoggingUtil.init_logging(__name__, logging.ERROR)
 
 
-class TripleStore(object):
+class TripleStore:
     """Connect to a SPARQL endpoint and provide services for loading and executing queries."""
 
     def __init__(self, hostname):
@@ -21,8 +22,8 @@ def get_template(self, query_name):
     def get_template_text(self, query_name):
         """Get the text of a template given its name"""
         query = None
-        fn = os.path.join(os.path.dirname(__file__), "query", "{0}.sparql".format(query_name))
-        with open(fn, "r") as stream:
+        fn = os.path.join(os.path.dirname(__file__), "query", f"{query_name}.sparql")
+        with open(fn) as stream:
             query = stream.read()
         return query
 
diff --git a/src/ubergraph.py b/src/ubergraph.py
index a03438d4..7dee22f2 100644
--- a/src/ubergraph.py
+++ b/src/ubergraph.py
@@ -1,12 +1,12 @@
 import logging
+from collections import defaultdict
 from time import sleep
 
+from src.babel_utils import norm
 from src.triplestore import TripleStore
 from src.util import Text
-from collections import defaultdict
-from src.babel_utils import norm
 
-SLEEP_BETWEEN_UBERGRAPH_QUERIES = 5 # seconds
+SLEEP_BETWEEN_UBERGRAPH_QUERIES = 5  # seconds
 
 
 class UberGraph:
@@ -301,7 +301,7 @@ def get_subclasses_and_xrefs(self, iri):
             # Sometimes we're getting back just strings that aren't curies, skip those (but complain)
             try:
                 dcurie = Text.opt_to_curie(row["descendent"])
-                results[dcurie].add((Text.opt_to_curie(row["xref"])))
+                results[dcurie].add(Text.opt_to_curie(row["xref"]))
             except ValueError as verr:
                 print(f"Bad XREF from {row['descendent']} to {row['xref']}: {verr}")
                 continue
@@ -402,7 +402,7 @@ def get_subclasses_and_close(self, iri):
                 results[desc] += []
             else:
                 try:
-                    results[desc].append((Text.opt_to_curie(row["match"])))
+                    results[desc].append(Text.opt_to_curie(row["match"]))
                 except ValueError as verr:
                     # Sometimes, if there are no exact_matches, we'll get some kind of blank node id
                     # like 't19830198'. Want to filter those out.
diff --git a/src/util.py b/src/util.py
index 90f77b3e..276f2b7b 100644
--- a/src/util.py
+++ b/src/util.py
@@ -1,22 +1,21 @@
-import logging
+import copy
 import json
+import logging
 import os
 import sys
+from collections import namedtuple
+from logging.handlers import RotatingFileHandler
 from time import gmtime
 
 import curies
-import yaml
 import psutil
-from collections import namedtuple
-import copy
-from logging.handlers import RotatingFileHandler
-
+import yaml
 from bmt import Toolkit
 from humanfriendly import format_size
 
-from src.LabeledID import LabeledID
-from src.prefixes import OMIM, OMIMPS, UMLS, SNOMEDCT, KEGGPATHWAY, KEGGREACTION, NCIT, ICD10, ICD10CM, ICD11FOUNDATION
 import src.prefixes as prefixes
+from src.LabeledID import LabeledID
+from src.prefixes import ICD10, ICD10CM, ICD11FOUNDATION, KEGGPATHWAY, KEGGREACTION, NCIT, OMIM, OMIMPS, SNOMEDCT, UMLS
 
 
 def get_logger(name, loglevel=logging.INFO):
@@ -46,7 +45,7 @@ def get_logger(name, loglevel=logging.INFO):
 
 
 # loggers = {}
-class LoggingUtil(object):
+class LoggingUtil:
     """Logging utility controlling format and setting initial logging level"""
 
     @staticmethod
@@ -101,7 +100,7 @@ def init_logging(name, level=logging.INFO, format="short", logFilePath=None, log
         return logger
 
 
-class Munge(object):
+class Munge:
     @staticmethod
     def gene(gene):
         return gene.split("/")[-1:][0] if gene.startswith("http://") else gene
@@ -255,14 +254,14 @@ def get_resource_path(resource_name):
     @staticmethod
     def load_json(path):
         result = None
-        with open(path, "r") as stream:
+        with open(path) as stream:
             result = json.loads(stream.read())
         return result
 
     @staticmethod
     def load_yaml(path):
         result = None
-        with open(path, "r") as stream:
+        with open(path) as stream:
             result = yaml.load(stream.read())
         return result
 
@@ -345,7 +344,7 @@ def get_config():
         return config_yaml
 
     cname = os.path.join(os.path.dirname(__file__), "..", "config.yaml")
-    with open(cname, "r") as yaml_file:
+    with open(cname) as yaml_file:
         config_yaml = yaml.safe_load(yaml_file)
     return config_yaml
 
diff --git a/tests/datahandlers/test_ensembl.py b/tests/datahandlers/test_ensembl.py
index a10aaae7..2a22c89f 100644
--- a/tests/datahandlers/test_ensembl.py
+++ b/tests/datahandlers/test_ensembl.py
@@ -79,7 +79,7 @@ def test_pull_ensembl(tmp_path):
     split_tsv = download_as_splits / "choffmanni_gene_ensembl" / "BioMart.tsv"
     assert unsplit_tsv.exists()
     assert split_tsv.exists()
-    with open(unsplit_tsv, "r") as unsplit_file, open(split_tsv, "r") as split_file:
+    with open(unsplit_tsv) as unsplit_file, open(split_tsv) as split_file:
         # So we can't compare these files directly, because rows with the same ensembl_gene_id shows up in an
         # undetermined order. So we need to load them, group them by ENSEMBL gene ID, and then compare those sets.
         unsplit_rows = list(read_biomart_file(unsplit_file))
diff --git a/tests/test_ThrottledRequester.py b/tests/test_ThrottledRequester.py
index 87c194c0..333a3eaa 100644
--- a/tests/test_ThrottledRequester.py
+++ b/tests/test_ThrottledRequester.py
@@ -1,5 +1,6 @@
 from datetime import datetime as dt
 from datetime import timedelta
+
 from src.babel_utils import ThrottledRequester
 
 
diff --git a/tests/test_ftp.py b/tests/test_ftp.py
index c9dac12c..a6cc4bbd 100644
--- a/tests/test_ftp.py
+++ b/tests/test_ftp.py
@@ -1,6 +1,8 @@
+import gzip
+
 import pytest
+
 from src.babel_utils import pull_via_ftp
-import gzip
 
 # FTP doesn't play nicely with travis-ci, so these are marked so they can be excluded.
 # See: https://blog.travis-ci.com/2018-07-23-the-tale-of-ftp-at-travis-ci
@@ -21,7 +23,7 @@ def test_pull_text_to_file():
     """Pull a text file into local file"""
     ofname = "test_text"
     outname = pull_via_ftp("ftp.ncbi.nlm.nih.gov", "gene/DATA/", "stopwords_gene", outfilename=ofname)
-    with open(outname, "r") as inf:
+    with open(outname) as inf:
         lines = inf.read().split("\n")
     assert len(lines) > 100
     assert lines[0] == "a"
@@ -41,7 +43,7 @@ def test_pull_gzip_to_uncompressed_file():
     """Pull a gzipped file into memory, decompressed"""
     ofname = "test_gz_text"
     outname = pull_via_ftp("ftp.ncbi.nlm.nih.gov", "gene/DATA/", "gene_group.gz", decompress_data=True, outfilename=ofname)
-    with open(outname, "r") as inf:
+    with open(outname) as inf:
         lines = inf.read().split("\n")
     assert len(lines) > 1000
     assert lines[0].startswith("#tax_id")
diff --git a/tests/test_geneproteiny.py b/tests/test_geneproteiny.py
index e1029468..326e7053 100644
--- a/tests/test_geneproteiny.py
+++ b/tests/test_geneproteiny.py
@@ -1,6 +1,7 @@
-from src.createcompendia.geneprotein import build_compendium
 import os
 
+from src.createcompendia.geneprotein import build_compendium
+
 
 def test_gp():
     here = os.path.abspath(os.path.dirname(__file__))
@@ -9,7 +10,7 @@ def test_gp():
     geneprotein_concord = os.path.join(here, "testdata", "gp_UniProtNCBI.txt")
     outfile = os.path.join(here, "testdata", "gp_output.txt")
     build_compendium(gene_compendium, protein_compendium, geneprotein_concord, outfile)
-    with open(outfile, "r") as inf:
+    with open(outfile) as inf:
         x = inf.read()
     assert len(x) > 0
     print(x)
diff --git a/tests/test_node_factory.py b/tests/test_node_factory.py
index 223af0c8..24959eff 100644
--- a/tests/test_node_factory.py
+++ b/tests/test_node_factory.py
@@ -1,7 +1,8 @@
 import os
-from src.node import NodeFactory
-from src.LabeledID import LabeledID
+
 import src.prefixes as pref
+from src.LabeledID import LabeledID
+from src.node import NodeFactory
 
 
 def test_get_ancestors():
diff --git a/uv.lock b/uv.lock
index df33f81d..56bdd516 100644
--- a/uv.lock
+++ b/uv.lock
@@ -215,6 +215,7 @@ dependencies = [
 
 [package.dev-dependencies]
 dev = [
+    { name = "ruff" },
     { name = "snakefmt" },
 ]
 
@@ -249,7 +250,10 @@ requires-dist = [
 ]
 
 [package.metadata.requires-dev]
-dev = [{ name = "snakefmt", specifier = ">=0.11.2" }]
+dev = [
+    { name = "ruff", specifier = ">=0.14.9" },
+    { name = "snakefmt", specifier = ">=0.11.2" },
+]
 
 [[package]]
 name = "bcp47"
@@ -2614,6 +2618,32 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ce/08/4349bdd5c64d9d193c360aa9db89adeee6f6682ab8825dca0a3f535f434f/rpds_py-0.27.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:dc23e6820e3b40847e2f4a7726462ba0cf53089512abe9ee16318c366494c17a", size = 556523, upload-time = "2025-08-27T12:16:12.188Z" },
 ]
 
+[[package]]
+name = "ruff"
+version = "0.14.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f6/1b/ab712a9d5044435be8e9a2beb17cbfa4c241aa9b5e4413febac2a8b79ef2/ruff-0.14.9.tar.gz", hash = "sha256:35f85b25dd586381c0cc053f48826109384c81c00ad7ef1bd977bfcc28119d5b", size = 5809165, upload-time = "2025-12-11T21:39:47.381Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b8/1c/d1b1bba22cffec02351c78ab9ed4f7d7391876e12720298448b29b7229c1/ruff-0.14.9-py3-none-linux_armv6l.whl", hash = "sha256:f1ec5de1ce150ca6e43691f4a9ef5c04574ad9ca35c8b3b0e18877314aba7e75", size = 13576541, upload-time = "2025-12-11T21:39:14.806Z" },
+    { url = "https://files.pythonhosted.org/packages/94/ab/ffe580e6ea1fca67f6337b0af59fc7e683344a43642d2d55d251ff83ceae/ruff-0.14.9-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ed9d7417a299fc6030b4f26333bf1117ed82a61ea91238558c0268c14e00d0c2", size = 13779363, upload-time = "2025-12-11T21:39:20.29Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/f8/2be49047f929d6965401855461e697ab185e1a6a683d914c5c19c7962d9e/ruff-0.14.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d5dc3473c3f0e4a1008d0ef1d75cee24a48e254c8bed3a7afdd2b4392657ed2c", size = 12925292, upload-time = "2025-12-11T21:39:38.757Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/e9/08840ff5127916bb989c86f18924fd568938b06f58b60e206176f327c0fe/ruff-0.14.9-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84bf7c698fc8f3cb8278830fb6b5a47f9bcc1ed8cb4f689b9dd02698fa840697", size = 13362894, upload-time = "2025-12-11T21:39:02.524Z" },
+    { url = "https://files.pythonhosted.org/packages/31/1c/5b4e8e7750613ef43390bb58658eaf1d862c0cc3352d139cd718a2cea164/ruff-0.14.9-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:aa733093d1f9d88a5d98988d8834ef5d6f9828d03743bf5e338bf980a19fce27", size = 13311482, upload-time = "2025-12-11T21:39:17.51Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/3a/459dce7a8cb35ba1ea3e9c88f19077667a7977234f3b5ab197fad240b404/ruff-0.14.9-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a1cfb04eda979b20c8c19550c8b5f498df64ff8da151283311ce3199e8b3648", size = 14016100, upload-time = "2025-12-11T21:39:41.948Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/31/f064f4ec32524f9956a0890fc6a944e5cf06c63c554e39957d208c0ffc45/ruff-0.14.9-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1e5cb521e5ccf0008bd74d5595a4580313844a42b9103b7388eca5a12c970743", size = 15477729, upload-time = "2025-12-11T21:39:23.279Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/6d/f364252aad36ccd443494bc5f02e41bf677f964b58902a17c0b16c53d890/ruff-0.14.9-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd429a8926be6bba4befa8cdcf3f4dd2591c413ea5066b1e99155ed245ae42bb", size = 15122386, upload-time = "2025-12-11T21:39:33.125Z" },
+    { url = "https://files.pythonhosted.org/packages/20/02/e848787912d16209aba2799a4d5a1775660b6a3d0ab3944a4ccc13e64a02/ruff-0.14.9-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ab208c1b7a492e37caeaf290b1378148f75e13c2225af5d44628b95fd7834273", size = 14497124, upload-time = "2025-12-11T21:38:59.33Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/51/0489a6a5595b7760b5dbac0dd82852b510326e7d88d51dbffcd2e07e3ff3/ruff-0.14.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72034534e5b11e8a593f517b2f2f2b273eb68a30978c6a2d40473ad0aaa4cb4a", size = 14195343, upload-time = "2025-12-11T21:39:44.866Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/53/3bb8d2fa73e4c2f80acc65213ee0830fa0c49c6479313f7a68a00f39e208/ruff-0.14.9-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:712ff04f44663f1b90a1195f51525836e3413c8a773574a7b7775554269c30ed", size = 14346425, upload-time = "2025-12-11T21:39:05.927Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/04/bdb1d0ab876372da3e983896481760867fc84f969c5c09d428e8f01b557f/ruff-0.14.9-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a111fee1db6f1d5d5810245295527cda1d367c5aa8f42e0fca9a78ede9b4498b", size = 13258768, upload-time = "2025-12-11T21:39:08.691Z" },
+    { url = "https://files.pythonhosted.org/packages/40/d9/8bf8e1e41a311afd2abc8ad12be1b6c6c8b925506d9069b67bb5e9a04af3/ruff-0.14.9-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8769efc71558fecc25eb295ddec7d1030d41a51e9dcf127cbd63ec517f22d567", size = 13326939, upload-time = "2025-12-11T21:39:53.842Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/56/a213fa9edb6dd849f1cfbc236206ead10913693c72a67fb7ddc1833bf95d/ruff-0.14.9-py3-none-musllinux_1_2_i686.whl", hash = "sha256:347e3bf16197e8a2de17940cd75fd6491e25c0aa7edf7d61aa03f146a1aa885a", size = 13578888, upload-time = "2025-12-11T21:39:35.988Z" },
+    { url = "https://files.pythonhosted.org/packages/33/09/6a4a67ffa4abae6bf44c972a4521337ffce9cbc7808faadede754ef7a79c/ruff-0.14.9-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:7715d14e5bccf5b660f54516558aa94781d3eb0838f8e706fb60e3ff6eff03a8", size = 14314473, upload-time = "2025-12-11T21:39:50.78Z" },
+    { url = "https://files.pythonhosted.org/packages/12/0d/15cc82da5d83f27a3c6b04f3a232d61bc8c50d38a6cd8da79228e5f8b8d6/ruff-0.14.9-py3-none-win32.whl", hash = "sha256:df0937f30aaabe83da172adaf8937003ff28172f59ca9f17883b4213783df197", size = 13202651, upload-time = "2025-12-11T21:39:26.628Z" },
+    { url = "https://files.pythonhosted.org/packages/32/f7/c78b060388eefe0304d9d42e68fab8cffd049128ec466456cef9b8d4f06f/ruff-0.14.9-py3-none-win_amd64.whl", hash = "sha256:c0b53a10e61df15a42ed711ec0bda0c582039cf6c754c49c020084c55b5b0bc2", size = 14702079, upload-time = "2025-12-11T21:39:11.954Z" },
+    { url = "https://files.pythonhosted.org/packages/26/09/7a9520315decd2334afa65ed258fed438f070e31f05a2e43dd480a5e5911/ruff-0.14.9-py3-none-win_arm64.whl", hash = "sha256:8e821c366517a074046d92f0e9213ed1c13dbc5b37a7fc20b07f79b64d62cc84", size = 13744730, upload-time = "2025-12-11T21:39:29.659Z" },
+]
+
 [[package]]
 name = "semsql"
 version = "0.4.0"