diff --git a/docs/source/reference/api/handlers/wags_tails.utils.downloads.rst b/docs/source/reference/api/utils/wags_tails.utils.downloads.rst similarity index 100% rename from docs/source/reference/api/handlers/wags_tails.utils.downloads.rst rename to docs/source/reference/api/utils/wags_tails.utils.downloads.rst diff --git a/docs/source/reference/api/handlers/wags_tails.utils.storage.rst b/docs/source/reference/api/utils/wags_tails.utils.storage.rst similarity index 100% rename from docs/source/reference/api/handlers/wags_tails.utils.storage.rst rename to docs/source/reference/api/utils/wags_tails.utils.storage.rst diff --git a/docs/source/reference/api/handlers/wags_tails.utils.versioning.rst b/docs/source/reference/api/utils/wags_tails.utils.versioning.rst similarity index 100% rename from docs/source/reference/api/handlers/wags_tails.utils.versioning.rst rename to docs/source/reference/api/utils/wags_tails.utils.versioning.rst diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst index dfa45b0..1b12cf6 100644 --- a/docs/source/reference/index.rst +++ b/docs/source/reference/index.rst @@ -58,13 +58,3 @@ Utilities wags_tails.utils.downloads wags_tails.utils.storage wags_tails.utils.versioning - -Miscellany ----------- - -.. autosummary:: - :nosignatures: - :toctree: api/misc/ - :template: module_summary.rst - - wags_tails.logging diff --git a/docs/source/usage.rst b/docs/source/usage.rst index ba10855..aa8545b 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -28,7 +28,9 @@ Additional parameters are available to force usage of the most recent locally-av Configuration ------------- -All data is stored within source-specific subdirectories of a designated ``wags-tails`` data directory. By default, this location is ``~/.local/share/wags_tails/``, but it can be configured by passing a Path directly to a data class on initialization, via the ``$WAGS_TAILS_DIR`` environment variable, or via `XDG data environment variables `_. This is explicated in full in the :py:meth:`~wags_tails.utils.storage.get_data_dir()` method description. +All data is stored within source-specific subdirectories of a designated ``wags-tails`` data directory. By default, this location is ``~/.local/share/wags_tails/``, but it can be configured by passing a Path directly to a data class on initialization, via the ``$WAGS_TAILS_DIR`` environment variable, or via `XDG data environment variables `_. This is explicated in full in the :py:meth:`wags_tails.utils.storage.get_data_dir()` method description. + +``wags-tails`` will attempt to create the resolved data directory if it does not already exist. This behavior can be disabled by passing ``writeable=False`` to the :py:meth:`wags_tails.utils.storage.get_data_dir()` method, or by setting the environment variable ``WAGS_TAILS_READONLY`` to ``"TRUE"``. If read-only mode is not engaged, ``get_data_dir()`` will perform cursory checks to assess whether the resolved directory appears writeable, and raise a :py:class:`wags_tails.utils.storage.WagsTailsDirWriteError` if those checks fail. If read-only mode is enabled but the resolved data directory is not available, a :py:class:`wags_tails.utils.storage.WagsTailsDirNotAvailableError` will be raised. .. _custom_data_source: diff --git a/src/wags_tails/utils/storage.py b/src/wags_tails/utils/storage.py index 308e41f..0a8d623 100644 --- a/src/wags_tails/utils/storage.py +++ b/src/wags_tails/utils/storage.py @@ -7,42 +7,144 @@ _logger = logging.getLogger(__name__) -def get_data_dir() -> Path: +class WagsTailsDirWriteError(Exception): + """Raise for cases where resolved data dir appears to be unwriteable by current process""" + + +class WagsTailsDirNotAvailableError(Exception): + """Raise for cases where resolved data dir cannot be used (e.g. doesn't exist and write mode is disabled)""" + + +def _check_write(data_dir: Path) -> None: + """Perform writeability checks + + * os.access() is a relatively OS-agnostic check for permissions on a specific location + * os.statvfs(), if available, can check for some cases like read-only mounted filesystems + * a mkdir() call should catch most cases but won't tell us anything if the directory + already exists + + :param data_dir: wags-tails data dir + :raise WagsTailsDirWriteError: if any checks fail + """ + base_failure_msg = f"wags-tails get_data_dir() writeability assertion failed for path `{data_dir}`. INSERT_SPECIFIC_HERE Ensure wags-tails directory is configured to be a writeable location, or use `get_data_dir(writeable=False)` or env var `WAGS_TAILS_READONLY=True` if read-only mode is intended. See docs entry on data dir configuration: https://wags-tails.readthedocs.io/latest/usage.html#configuration" + + # since we might be making multiple new subdirectories, use the nearest existing directory + probe = data_dir + while not probe.exists(): + parent = probe.parent + if parent == probe: + break + probe = parent + + if not probe.exists() or not probe.is_dir(): + msg = base_failure_msg.replace( + "INSERT_SPECIFIC_HERE", + "Parent directory doesn't exist or isn't a directory.", + ) + _logger.error(msg) + raise WagsTailsDirWriteError(msg) + if not os.access(probe, os.W_OK | os.X_OK): + msg = base_failure_msg.replace( + "INSERT_SPECIFIC_HERE", + "os.access() check indicates user lacks write permissions.", + ) + _logger.error(msg) + raise WagsTailsDirWriteError(msg) + if hasattr(os, "statvfs"): + try: + if os.statvfs(probe).f_flag & getattr(os, "ST_RDONLY", 1): + msg = base_failure_msg.replace( + "INSERT_SPECIFIC_HERE", "Filesystem appears to be read-only." + ) + _logger.error(msg) + raise WagsTailsDirWriteError(msg) + except OSError: + pass + try: + data_dir.mkdir(exist_ok=True, parents=True) + except PermissionError as e: + msg = base_failure_msg.replace("INSERT_SPECIFIC_HERE", f"mkdir() failed: {e}.") + _logger.exception(msg) + raise WagsTailsDirWriteError(msg) from e + + +def get_data_dir(readonly: bool | None = None) -> Path: """Get base wags-tails data storage location. By default, conform to `XDG Base Directory Specification `_, unless a directory is specified otherwise: 1) check env var ``"WAGS_TAILS_DIR"`` - 2) check env var ``"XDG_DATA_HOME"`` - 3) check env var ``"XDG_DATA_DIRS"`` for a colon-separated list, skipping any - that can't be used (i.e. they're already a file) - 4) otherwise, use ``~/.local/share/`` + 2) check env var ``"XDG_DATA_HOME"``. If set, use ``${XDG_DATA_HOME}/wags_tails/`` + 3) check env var ``"XDG_DATA_DIRS"`` for a colon-separated list, looking for an + element that contains a ``wags_tails/`` subdirectory (only available in read-only mode) + 4) otherwise, use ``~/.local/share/wags_tails`` + + Enable read-only mode by calling with ``readonly=True`` or setting the env var ``WAGS_TAILS_READONLY=TRUE``. + + * If read-only is enabled, ``$XDG_DATA_DIRS`` can be used to provide a data directory, but only if + a ``wags-tails`` subdirectory already exists within it. Otherwise, an individual directory entry + is skipped. If unable to resolve to a directory that exists, raises ``WagsTailsDirNotAvailableError``. + * If read-only is not enabled, ``$XDG_DATA_DIRS`` is ignored, and cursory checks are performed + to assess writeability of the resolved data directory. If they fail, then a + ``WagsTailsDirWriteError`` is raised. + :param readonly: whether to enable read-only mode. If left unset, checks the ``$WAGS_TAILS_READONLY`` + env var, and otherwise defaults to ``False``. :return: path to base data directory + :raise WagsTailsDirWriteError: if writeable assertion check fails + :raise WagsTailsDirNotAvailableError: if read-only mode enabled but resolved directory doesn't exist """ - spec_wagstails_dir = os.environ.get("WAGS_TAILS_DIR") - if spec_wagstails_dir: + if readonly is None: + if env_var_value := os.environ.get("WAGS_TAILS_READONLY"): + if env_var_value.upper() == "TRUE": + readonly = True + elif env_var_value.upper() == "FALSE": + readonly = False + else: + _logger.warning( + "Unrecognized `WAGS_TAILS_READONLY` value: %s. Defaulting to readonly=False.", + env_var_value, + ) + readonly = False + else: + readonly = False + default_name = "wags_tails" + + data_base_dir = None + if spec_wagstails_dir := os.environ.get("WAGS_TAILS_DIR"): data_base_dir = Path(spec_wagstails_dir) else: - xdg_data_home = os.environ.get("XDG_DATA_HOME") - if xdg_data_home: - data_base_dir = Path(xdg_data_home) / "wags_tails" - else: - xdg_data_dirs = os.environ.get("XDG_DATA_DIRS") - if xdg_data_dirs: - dirs = os.environ["XDG_DATA_DIRS"].split(":") + if xdg_data_home := os.environ.get("XDG_DATA_HOME"): + data_base_dir = Path(xdg_data_home) / default_name + elif readonly: # noqa: SIM102 + if xdg_data_dirs := os.environ.get("XDG_DATA_DIRS"): + dirs = xdg_data_dirs.split(":") for directory in dirs: - dir_path = Path(directory) / "wags_tails" - if not dir_path.is_file(): + dir_path = Path(directory) / default_name + if dir_path.exists() and not dir_path.is_file(): data_base_dir = dir_path break - else: - data_base_dir = Path.home() / ".local" / "share" / "wags_tails" - else: - data_base_dir = Path.home() / ".local" / "share" / "wags_tails" + if data_base_dir is None: + data_base_dir = Path.home() / ".local" / "share" / default_name + try: + data_base_dir = data_base_dir.expanduser() + except RuntimeError: + msg = f"Unable to expand user prefix for path {data_base_dir}" + _logger.warning(msg) + + if readonly: + if not data_base_dir.exists(): + msg = f"Resolved wags-tails dir location `{data_base_dir}` does not exist, but write mode is disabled so it cannot be created or used" + _logger.error(msg) + raise WagsTailsDirNotAvailableError(msg) + if not data_base_dir.is_dir(): + msg = f"Resolved wags-tails dir location `{data_base_dir}` is not a directory." + _logger.error(msg) + raise WagsTailsDirNotAvailableError(msg) + else: + _check_write(data_base_dir) - data_base_dir.mkdir(exist_ok=True, parents=True) return data_base_dir diff --git a/tests/conftest.py b/tests/conftest.py index e42f3c2..54f91db 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,6 @@ """Provide core testing utilities.""" +import contextlib import logging import shutil from pathlib import Path @@ -49,4 +50,5 @@ def base_data_dir(): if path.exists(): # make sure it's empty shutil.rmtree(str(path.absolute())) yield path - shutil.rmtree(str(path.absolute())) # clean up afterward + with contextlib.suppress(FileNotFoundError): + shutil.rmtree(str(path.absolute())) # clean up afterward diff --git a/tests/test_base.py b/tests/test_base.py index e4e64f8..42ac58e 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -1,7 +1,6 @@ """Test base class functions.""" import os -import tempfile from pathlib import Path import pytest @@ -9,40 +8,12 @@ from wags_tails.mondo import MondoData -@pytest.fixture -def _config_teardown(): - """Make sure environment variables are unset after running `test_config_directory`""" - yield - for varname in ("XDG_DATA_DIRS", "XDG_DATA_HOME", "WAGS_TAILS_DIR"): - if varname in os.environ: - del os.environ[varname] - - -@pytest.mark.usefixtures("_config_teardown") -def test_config_directory(base_data_dir: Path): - """Basic tests of directory configuration that shouldn't affect non-temporary files.""" +def test_initialization(base_data_dir: Path): m = MondoData(base_data_dir) assert m.data_dir == base_data_dir assert m.data_dir.exists() assert m.data_dir.is_dir() - tempdir = Path(tempfile.gettempdir()) - - data_dirs_dir = tempdir / "xdg_data_dirs" - os.environ["XDG_DATA_DIRS"] = str(data_dirs_dir) - m = MondoData() - assert m.data_dir == data_dirs_dir / "wags_tails" / "mondo" - - data_home_dir = tempdir / "xdg_data_home" - os.environ["XDG_DATA_HOME"] = str(data_home_dir) - m = MondoData() - assert m.data_dir == data_home_dir / "wags_tails" / "mondo" - - wags_dir = tempdir / "wags_tails_dir" - os.environ["WAGS_TAILS_DIR"] = str(wags_dir) - m = MondoData() - assert m.data_dir == wags_dir / "mondo" - @pytest.mark.skipif( os.environ.get("WAGS_TAILS_TEST_ENV", "").lower() != "true", reason="Not in CI" diff --git a/tests/test_storage_utils.py b/tests/test_storage_utils.py new file mode 100644 index 0000000..690099c --- /dev/null +++ b/tests/test_storage_utils.py @@ -0,0 +1,74 @@ +import os +import tempfile +from pathlib import Path + +import pytest + +from wags_tails.utils.storage import WagsTailsDirNotAvailableError, get_data_dir + + +@pytest.fixture(autouse=True) +def env_var_teardown(): + """Make sure environment variables and directories are cleaned up after each test""" + yield + for varname in ( + "XDG_DATA_DIRS", + "XDG_DATA_HOME", + "WAGS_TAILS_DIR", + "WAGS_TAILS_READONLY", + ): + if varname in os.environ: + del os.environ[varname] + + +@pytest.fixture +def default_data_dir() -> Path: + return Path("~/.local/share/wags_tails/").expanduser() + + +def test_default(default_data_dir: Path): + assert get_data_dir() == default_data_dir + + +def test_xdg_data_home_variable(): + with tempfile.TemporaryDirectory() as td: + xdg_data_home = Path(td) + os.environ["XDG_DATA_HOME"] = str(xdg_data_home) + assert get_data_dir() == xdg_data_home / "wags_tails" + + +def test_handle_create_subdirectories(): + with tempfile.TemporaryDirectory() as td: + xdg_data_home = Path(td) / "a" / "b" / "c" + os.environ["XDG_DATA_HOME"] = str(xdg_data_home) + assert get_data_dir() == xdg_data_home / "wags_tails" + + +def test_xdg_data_dirs_variable(default_data_dir: Path): + with tempfile.TemporaryDirectory() as td: + tempdir = Path(td) + bad_dir = tempdir / "bad_location" + bad_dir.mkdir(exist_ok=True, parents=True) + data_dirs_dir = tempdir / "my_data" + (data_dirs_dir / "wags_tails").mkdir(parents=True, exist_ok=True) + os.environ["XDG_DATA_DIRS"] = f"{bad_dir}:{data_dirs_dir}" + assert get_data_dir() == default_data_dir # should skip when writeable + assert get_data_dir(readonly=True) == data_dirs_dir / "wags_tails" + + +def test_wags_tails_dir_variable(): + with tempfile.TemporaryDirectory() as td: + wtd = Path(td) / "wags_tails_dir" + os.environ["WAGS_TAILS_DIR"] = str(wtd) + assert get_data_dir() == wtd + + +def test_readonly_mode_settings(): + with tempfile.TemporaryDirectory() as td: + new_directory = Path(td) / "fake_directory" + os.environ["WAGS_TAILS_DIR"] = str(new_directory) + with pytest.raises(WagsTailsDirNotAvailableError): + get_data_dir(readonly=True) + os.environ["WAGS_TAILS_READONLY"] = "TRUE" + with pytest.raises(WagsTailsDirNotAvailableError): + get_data_dir()