Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions docs/source/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,3 @@ Utilities
wags_tails.utils.downloads
wags_tails.utils.storage
wags_tails.utils.versioning

Miscellany
----------

.. autosummary::
:nosignatures:
:toctree: api/misc/
:template: module_summary.rst

wags_tails.logging
4 changes: 3 additions & 1 deletion docs/source/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ Additional parameters are available to force usage of the most recent locally-av
Configuration
-------------

All data is stored within source-specific subdirectories of a designated ``wags-tails`` data directory. By default, this location is ``~/.local/share/wags_tails/``, but it can be configured by passing a Path directly to a data class on initialization, via the ``$WAGS_TAILS_DIR`` environment variable, or via `XDG data environment variables <https://specifications.freedesktop.org/basedir-spec/basedir-spec-0.6.html>`_. This is explicated in full in the :py:meth:`~wags_tails.utils.storage.get_data_dir()` method description.
All data is stored within source-specific subdirectories of a designated ``wags-tails`` data directory. By default, this location is ``~/.local/share/wags_tails/``, but it can be configured by passing a Path directly to a data class on initialization, via the ``$WAGS_TAILS_DIR`` environment variable, or via `XDG data environment variables <https://specifications.freedesktop.org/basedir-spec/basedir-spec-0.6.html>`_. This is explicated in full in the :py:meth:`wags_tails.utils.storage.get_data_dir()` method description.

``wags-tails`` will attempt to create the resolved data directory if it does not already exist. This behavior can be disabled by passing ``writeable=False`` to the :py:meth:`wags_tails.utils.storage.get_data_dir()` method, or by setting the environment variable ``WAGS_TAILS_READONLY`` to ``"TRUE"``. If read-only mode is not engaged, ``get_data_dir()`` will perform cursory checks to assess whether the resolved directory appears writeable, and raise a :py:class:`wags_tails.utils.storage.WagsTailsDirWriteError` if those checks fail. If read-only mode is enabled but the resolved data directory is not available, a :py:class:`wags_tails.utils.storage.WagsTailsDirNotAvailableError` will be raised.

.. _custom_data_source:

Expand Down
144 changes: 123 additions & 21 deletions src/wags_tails/utils/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,42 +7,144 @@
_logger = logging.getLogger(__name__)


def get_data_dir() -> Path:
class WagsTailsDirWriteError(Exception):
"""Raise for cases where resolved data dir appears to be unwriteable by current process"""


class WagsTailsDirNotAvailableError(Exception):
"""Raise for cases where resolved data dir cannot be used (e.g. doesn't exist and write mode is disabled)"""


def _check_write(data_dir: Path) -> None:
"""Perform writeability checks
* os.access() is a relatively OS-agnostic check for permissions on a specific location
* os.statvfs(), if available, can check for some cases like read-only mounted filesystems
* a mkdir() call should catch most cases but won't tell us anything if the directory
already exists
:param data_dir: wags-tails data dir
:raise WagsTailsDirWriteError: if any checks fail
"""
base_failure_msg = f"wags-tails get_data_dir() writeability assertion failed for path `{data_dir}`. INSERT_SPECIFIC_HERE Ensure wags-tails directory is configured to be a writeable location, or use `get_data_dir(writeable=False)` or env var `WAGS_TAILS_READONLY=True` if read-only mode is intended. See docs entry on data dir configuration: https://wags-tails.readthedocs.io/latest/usage.html#configuration"

# since we might be making multiple new subdirectories, use the nearest existing directory
probe = data_dir
while not probe.exists():
parent = probe.parent
if parent == probe:
break
probe = parent

if not probe.exists() or not probe.is_dir():
msg = base_failure_msg.replace(
"INSERT_SPECIFIC_HERE",
"Parent directory doesn't exist or isn't a directory.",
)
_logger.error(msg)
raise WagsTailsDirWriteError(msg)
if not os.access(probe, os.W_OK | os.X_OK):
msg = base_failure_msg.replace(
"INSERT_SPECIFIC_HERE",
"os.access() check indicates user lacks write permissions.",
)
_logger.error(msg)
raise WagsTailsDirWriteError(msg)
if hasattr(os, "statvfs"):
try:
if os.statvfs(probe).f_flag & getattr(os, "ST_RDONLY", 1):
msg = base_failure_msg.replace(
"INSERT_SPECIFIC_HERE", "Filesystem appears to be read-only."
)
_logger.error(msg)
raise WagsTailsDirWriteError(msg)
except OSError:
pass
try:
data_dir.mkdir(exist_ok=True, parents=True)
except PermissionError as e:
msg = base_failure_msg.replace("INSERT_SPECIFIC_HERE", f"mkdir() failed: {e}.")
_logger.exception(msg)
raise WagsTailsDirWriteError(msg) from e


def get_data_dir(readonly: bool | None = None) -> Path:
"""Get base wags-tails data storage location.
By default, conform to `XDG Base Directory Specification <https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html>`_,
unless a directory is specified otherwise:
1) check env var ``"WAGS_TAILS_DIR"``
2) check env var ``"XDG_DATA_HOME"``
3) check env var ``"XDG_DATA_DIRS"`` for a colon-separated list, skipping any
that can't be used (i.e. they're already a file)
4) otherwise, use ``~/.local/share/``
2) check env var ``"XDG_DATA_HOME"``. If set, use ``${XDG_DATA_HOME}/wags_tails/``
3) check env var ``"XDG_DATA_DIRS"`` for a colon-separated list, looking for an
element that contains a ``wags_tails/`` subdirectory (only available in read-only mode)
4) otherwise, use ``~/.local/share/wags_tails``
Enable read-only mode by calling with ``readonly=True`` or setting the env var ``WAGS_TAILS_READONLY=TRUE``.
* If read-only is enabled, ``$XDG_DATA_DIRS`` can be used to provide a data directory, but only if
a ``wags-tails`` subdirectory already exists within it. Otherwise, an individual directory entry
is skipped. If unable to resolve to a directory that exists, raises ``WagsTailsDirNotAvailableError``.
Comment on lines +85 to +87

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this extra criteria needed? Only if read-only is enabled, and only if the subdirectory already exists?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I've been chasing my tail on this a bit. When read-only is on, a nonexistent directory is useless, but I think it begs the question of what to do when $WAGS_TAILS_READONLY=true and $WAGS_TAILS_DIR is defined but doesn't exist. I guess a simpler way forward would be to not skip and fail once the process tries to write a file to the data subdirectory, because presumably if you can define $WAGS_TAILS_READONLY then you can also be smart enough to point $WAGS_TAILS_DIR to a real directory.

* If read-only is not enabled, ``$XDG_DATA_DIRS`` is ignored, and cursory checks are performed
to assess writeability of the resolved data directory. If they fail, then a
``WagsTailsDirWriteError`` is raised.
:param readonly: whether to enable read-only mode. If left unset, checks the ``$WAGS_TAILS_READONLY``
env var, and otherwise defaults to ``False``.
:return: path to base data directory
:raise WagsTailsDirWriteError: if writeable assertion check fails
:raise WagsTailsDirNotAvailableError: if read-only mode enabled but resolved directory doesn't exist
"""
spec_wagstails_dir = os.environ.get("WAGS_TAILS_DIR")
if spec_wagstails_dir:
if readonly is None:
if env_var_value := os.environ.get("WAGS_TAILS_READONLY"):
if env_var_value.upper() == "TRUE":
readonly = True
elif env_var_value.upper() == "FALSE":
readonly = False
else:
_logger.warning(
"Unrecognized `WAGS_TAILS_READONLY` value: %s. Defaulting to readonly=False.",
env_var_value,
)
readonly = False
else:
readonly = False
default_name = "wags_tails"

data_base_dir = None
if spec_wagstails_dir := os.environ.get("WAGS_TAILS_DIR"):
data_base_dir = Path(spec_wagstails_dir)
else:
xdg_data_home = os.environ.get("XDG_DATA_HOME")
if xdg_data_home:
data_base_dir = Path(xdg_data_home) / "wags_tails"
else:
xdg_data_dirs = os.environ.get("XDG_DATA_DIRS")
if xdg_data_dirs:
dirs = os.environ["XDG_DATA_DIRS"].split(":")
if xdg_data_home := os.environ.get("XDG_DATA_HOME"):
data_base_dir = Path(xdg_data_home) / default_name
elif readonly: # noqa: SIM102
if xdg_data_dirs := os.environ.get("XDG_DATA_DIRS"):
dirs = xdg_data_dirs.split(":")
for directory in dirs:
dir_path = Path(directory) / "wags_tails"
if not dir_path.is_file():
dir_path = Path(directory) / default_name
if dir_path.exists() and not dir_path.is_file():
data_base_dir = dir_path
break
else:
data_base_dir = Path.home() / ".local" / "share" / "wags_tails"
else:
data_base_dir = Path.home() / ".local" / "share" / "wags_tails"
if data_base_dir is None:
data_base_dir = Path.home() / ".local" / "share" / default_name
try:
data_base_dir = data_base_dir.expanduser()
except RuntimeError:
msg = f"Unable to expand user prefix for path {data_base_dir}"
_logger.warning(msg)

if readonly:
if not data_base_dir.exists():
msg = f"Resolved wags-tails dir location `{data_base_dir}` does not exist, but write mode is disabled so it cannot be created or used"
_logger.error(msg)
raise WagsTailsDirNotAvailableError(msg)
if not data_base_dir.is_dir():
msg = f"Resolved wags-tails dir location `{data_base_dir}` is not a directory."
_logger.error(msg)
raise WagsTailsDirNotAvailableError(msg)
else:
_check_write(data_base_dir)

data_base_dir.mkdir(exist_ok=True, parents=True)
return data_base_dir


Expand Down
4 changes: 3 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Provide core testing utilities."""

import contextlib
import logging
import shutil
from pathlib import Path
Expand Down Expand Up @@ -49,4 +50,5 @@ def base_data_dir():
if path.exists(): # make sure it's empty
shutil.rmtree(str(path.absolute()))
yield path
shutil.rmtree(str(path.absolute())) # clean up afterward
with contextlib.suppress(FileNotFoundError):
shutil.rmtree(str(path.absolute())) # clean up afterward
31 changes: 1 addition & 30 deletions tests/test_base.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,19 @@
"""Test base class functions."""

import os
import tempfile
from pathlib import Path

import pytest

from wags_tails.mondo import MondoData


@pytest.fixture
def _config_teardown():
"""Make sure environment variables are unset after running `test_config_directory`"""
yield
for varname in ("XDG_DATA_DIRS", "XDG_DATA_HOME", "WAGS_TAILS_DIR"):
if varname in os.environ:
del os.environ[varname]


@pytest.mark.usefixtures("_config_teardown")
def test_config_directory(base_data_dir: Path):
"""Basic tests of directory configuration that shouldn't affect non-temporary files."""
def test_initialization(base_data_dir: Path):
m = MondoData(base_data_dir)
assert m.data_dir == base_data_dir
assert m.data_dir.exists()
assert m.data_dir.is_dir()

tempdir = Path(tempfile.gettempdir())

data_dirs_dir = tempdir / "xdg_data_dirs"
os.environ["XDG_DATA_DIRS"] = str(data_dirs_dir)
m = MondoData()
assert m.data_dir == data_dirs_dir / "wags_tails" / "mondo"

data_home_dir = tempdir / "xdg_data_home"
os.environ["XDG_DATA_HOME"] = str(data_home_dir)
m = MondoData()
assert m.data_dir == data_home_dir / "wags_tails" / "mondo"

wags_dir = tempdir / "wags_tails_dir"
os.environ["WAGS_TAILS_DIR"] = str(wags_dir)
m = MondoData()
assert m.data_dir == wags_dir / "mondo"


@pytest.mark.skipif(
os.environ.get("WAGS_TAILS_TEST_ENV", "").lower() != "true", reason="Not in CI"
Expand Down
74 changes: 74 additions & 0 deletions tests/test_storage_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import os
import tempfile
from pathlib import Path

import pytest

from wags_tails.utils.storage import WagsTailsDirNotAvailableError, get_data_dir


@pytest.fixture(autouse=True)
def env_var_teardown():
"""Make sure environment variables and directories are cleaned up after each test"""
yield
for varname in (
"XDG_DATA_DIRS",
"XDG_DATA_HOME",
"WAGS_TAILS_DIR",
"WAGS_TAILS_READONLY",
):
if varname in os.environ:
del os.environ[varname]


@pytest.fixture
def default_data_dir() -> Path:
return Path("~/.local/share/wags_tails/").expanduser()


def test_default(default_data_dir: Path):
assert get_data_dir() == default_data_dir


def test_xdg_data_home_variable():
with tempfile.TemporaryDirectory() as td:
xdg_data_home = Path(td)
os.environ["XDG_DATA_HOME"] = str(xdg_data_home)
assert get_data_dir() == xdg_data_home / "wags_tails"


def test_handle_create_subdirectories():
with tempfile.TemporaryDirectory() as td:
xdg_data_home = Path(td) / "a" / "b" / "c"
os.environ["XDG_DATA_HOME"] = str(xdg_data_home)
assert get_data_dir() == xdg_data_home / "wags_tails"


def test_xdg_data_dirs_variable(default_data_dir: Path):
with tempfile.TemporaryDirectory() as td:
tempdir = Path(td)
bad_dir = tempdir / "bad_location"
bad_dir.mkdir(exist_ok=True, parents=True)
data_dirs_dir = tempdir / "my_data"
(data_dirs_dir / "wags_tails").mkdir(parents=True, exist_ok=True)
os.environ["XDG_DATA_DIRS"] = f"{bad_dir}:{data_dirs_dir}"
assert get_data_dir() == default_data_dir # should skip when writeable
assert get_data_dir(readonly=True) == data_dirs_dir / "wags_tails"


def test_wags_tails_dir_variable():
with tempfile.TemporaryDirectory() as td:
wtd = Path(td) / "wags_tails_dir"
os.environ["WAGS_TAILS_DIR"] = str(wtd)
assert get_data_dir() == wtd


def test_readonly_mode_settings():
with tempfile.TemporaryDirectory() as td:
new_directory = Path(td) / "fake_directory"
os.environ["WAGS_TAILS_DIR"] = str(new_directory)
with pytest.raises(WagsTailsDirNotAvailableError):
get_data_dir(readonly=True)
os.environ["WAGS_TAILS_READONLY"] = "TRUE"
with pytest.raises(WagsTailsDirNotAvailableError):
get_data_dir()