diff --git a/.devcontainer/Dockerfile.dev b/.devcontainer/Dockerfile.dev new file mode 100644 index 00000000..ea04d0aa --- /dev/null +++ b/.devcontainer/Dockerfile.dev @@ -0,0 +1,46 @@ +# USAGE: +# This file will be used by the VS Code DevContainer extension +# to create a development environment for the mdio-python project. +# HOW TO MANUALLY BUILD AND DEBUG THE CONTAINER +# docker build -t mdio-dev -f .devcontainer/Dockerfile.dev . +# docker run -it --rm --entrypoint /bin/bash --name mdio-dev mdio-dev +# NOTES: +# 1. The container will be run as the non-root user 'vscode' with UID 1000. +# 2. The virtual environment will be setup at /home/vscode/.venv +# 3. The project source code will be host-mounted at /workspaces/mdio-python +ARG PYTHON_VERSION="3.13" +ARG LINUX_DISTRO="bookworm" +ARG UV_VERSION="0.6.11" +FROM mcr.microsoft.com/devcontainers/python:1-${PYTHON_VERSION}-${LINUX_DISTRO} + +ENV USERNAME="vscode" +USER $USERNAME + +COPY --chown=$USERNAME:$USERNAME ./ /workspaces/mdio-python + +WORKDIR /workspaces/mdio-python + +ARG UV_VERSION +# Install UV as described in https://devblogs.microsoft.com/ise/dockerizing-uv/ +RUN python3 -m pip install --no-cache-dir uv==${UV_VERSION} +# Prevent uv from trying to create hard links, which does not work in a container +# that mounts local file systems (e.g. VS Code Dev Containers) +ENV UV_LINK_MODE=copy +# Add path to the site-packages +ENV PYTHONUSERBASE=/home/$USERNAME/.local +ENV PATH="$PYTHONUSERBASE/bin:$PATH" + +# Initialize virtual environment in the container +ENV VIRTUAL_ENV="/home/$USERNAME/.venv" +ENV UV_PROJECT_ENVIRONMENT=$VIRTUAL_ENV +ENV PATH="$VIRTUAL_ENV/bin:$PATH" +RUN uv venv $VIRTUAL_ENV + +# Install the project in the editable mode +# https://setuptools.pypa.io/en/latest/userguide/development_mode.html +# This allows for live reloading of the code during development +RUN uv pip install -e . +# Install "extras" (development dependencies) in pyproject.toml +RUN uv sync --group dev +# Now one can run: +# pre-commit run --all-files diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b618a526..cc26bcf2 100755 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,12 +2,12 @@ // README at: https://github.com/devcontainers/templates/tree/main/src/python { "build": { - "dockerfile": "Dockerfile", + "dockerfile": "Dockerfile.dev", "context": ".." }, // Use 'postCreateCommand' to run commands after the container is created. "postCreateCommand": { - "post_create_script": "bash ./.devcontainer/post-install.sh" + // "post_create_script": "bash ./.devcontainer/post-install.sh" }, // Forward 8787 to enable us to view dask dashboard "forwardPorts": [8787], @@ -16,8 +16,9 @@ // Configure properties specific to VS Code. "vscode": { "settings": { - "python.terminal.activateEnvInCurrentTerminal": true, - "python.defaultInterpreterPath": "/opt/venv/bin/python" + "python.testing.pytestArgs": ["tests"], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true }, "extensions": [ "ms-python.python", @@ -27,17 +28,19 @@ "ms-toolsai.jupyter-renderers", "vscode-icons-team.vscode-icons", "wayou.vscode-todo-highlight", - "streetsidesoftware.code-spell-checker" + "streetsidesoftware.code-spell-checker", + "eamodio.gitlens", + "visualstudioexptteam.vscodeintellicode", + "richie5um2.vscode-sort-json" ] } }, // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. // "remoteUser": "root", "updateRemoteUserUID": true, + "workspaceMount": "source=${localWorkspaceFolder},target=/workspaces/mdio-python,type=bind", + "workspaceFolder": "/workspaces/mdio-python", "mounts": [ - // Re-use local Git configuration - "source=${localEnv:HOME}/.gitconfig,target=/home/vscode/.gitconfig_tmp,type=bind,consistency=cached", - "source=${localEnv:HOME}/.gitconfig,target=/root/.gitconfig_tmp,type=bind,consistency=cached", - "source=${localEnv:SCRATCH_DIR}/${localEnv:USER},target=/scratch/,type=bind,consistency=cached" + // "source=${localWorkspaceFolder}/../DATA/,target=/DATA/,type=bind,consistency=cached" ] } diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 56509d66..9320babb 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: matrix: include: - { python: "3.13", os: "ubuntu-latest", session: "pre-commit" } - - { python: "3.13", os: "ubuntu-latest", session: "safety" } + # - { python: "3.13", os: "ubuntu-latest", session: "safety" } # - { python: "3.13", os: "ubuntu-latest", session: "mypy" } # - { python: "3.12", os: "ubuntu-latest", session: "mypy" } # - { python: "3.11", os: "ubuntu-latest", session: "mypy" } diff --git a/docs/conf.py b/docs/conf.py index 89990324..aeb59636 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,6 +17,7 @@ "sphinx.ext.napoleon", "sphinx.ext.intersphinx", "sphinx.ext.autosummary", + "sphinxcontrib.autodoc_pydantic", "sphinx.ext.autosectionlabel", "sphinx_click", "sphinx_copybutton", @@ -38,6 +39,7 @@ intersphinx_mapping = { "python": ("https://docs.python.org/3", None), "numpy": ("https://numpy.org/doc/stable/", None), + "pydantic": ("https://docs.pydantic.dev/latest/", None), "zarr": ("https://zarr.readthedocs.io/en/stable/", None), } @@ -50,6 +52,14 @@ autoclass_content = "class" autosectionlabel_prefix_document = True +autodoc_pydantic_field_list_validators = False +autodoc_pydantic_field_swap_name_and_alias = True +autodoc_pydantic_field_show_alias = False +autodoc_pydantic_model_show_config_summary = False +autodoc_pydantic_model_show_validator_summary = False +autodoc_pydantic_model_show_validator_members = False +autodoc_pydantic_model_show_field_summary = False + html_theme = "furo" myst_number_code_blocks = ["python"] diff --git a/docs/data_models/chunk_grids.md b/docs/data_models/chunk_grids.md new file mode 100644 index 00000000..5f178a04 --- /dev/null +++ b/docs/data_models/chunk_grids.md @@ -0,0 +1,154 @@ +```{eval-rst} +:tocdepth: 3 +``` + +```{currentModule} mdio.schemas.chunk_grid + +``` + +# Chunk Grid Models + +```{article-info} +:author: Altay Sansal +:date: "{sub-ref}`today`" +:read-time: "{sub-ref}`wordcount-minutes` min read" +:class-container: sd-p-0 sd-outline-muted sd-rounded-3 sd-font-weight-light +``` + +The variables in MDIO data model can represent different types of chunk grids. +These grids are essential for managing multi-dimensional data arrays efficiently. +In this breakdown, we will explore four distinct data models within the MDIO schema, +each serving a specific purpose in data handling and organization. + +MDIO implements data models following the guidelines of the Zarr v3 spec and ZEPs: + +- [Zarr core specification (version 3)](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html) +- [ZEP 1 — Zarr specification version 3](https://zarr.dev/zeps/accepted/ZEP0001.html) +- [ZEP 3 — Variable chunking](https://zarr.dev/zeps/draft/ZEP0003.html) + +## Regular Grid + +The regular grid models are designed to represent a rectangular and regularly +paced chunk grid. + +```{eval-rst} +.. autosummary:: + RegularChunkGrid + RegularChunkShape +``` + +For 1D array with `size = 31`{l=python}, we can divide it into 5 equally sized +chunks. Note that the last chunk will be truncated to match the size of the array. + +`{ "name": "regular", "configuration": { "chunkShape": [7] } }`{l=json} + +Using the above schema resulting array chunks will look like this: + +```bash + ←─ 7 ─→ ←─ 7 ─→ ←─ 7 ─→ ←─ 7 ─→ ↔ 3 +┌───────┬───────┬───────┬───────┬───┐ +└───────┴───────┴───────┴───────┴───┘ +``` + +For 2D array with shape `rows, cols = (7, 17)`{l=python}, we can divide it into 9 +equally sized chunks. + +`{ "name": "regular", "configuration": { "chunkShape": [3, 7] } }`{l=json} + +Using the above schema, the resulting 2D array chunks will look like below. +Note that the rows and columns are conceptual and visually not to scale. + +```bash + ←─ 7 ─→ ←─ 7 ─→ ↔ 3 +┌───────┬───────┬───┐ +│ ╎ ╎ │ ↑ +│ ╎ ╎ │ 3 +│ ╎ ╎ │ ↓ +├╶╶╶╶╶╶╶┼╶╶╶╶╶╶╶┼╶╶╶┤ +│ ╎ ╎ │ ↑ +│ ╎ ╎ │ 3 +│ ╎ ╎ │ ↓ +├╶╶╶╶╶╶╶┼╶╶╶╶╶╶╶┼╶╶╶┤ +│ ╎ ╎ │ ↕ 1 +└───────┴───────┴───┘ +``` + +## Rectilinear Grid + +The [RectilinearChunkGrid](RectilinearChunkGrid) model extends +the concept of chunk grids to accommodate rectangular and irregularly spaced chunks. +This model is useful in data structures where non-uniform chunk sizes are necessary. +[RectilinearChunkShape](RectilinearChunkShape) specifies the chunk sizes for each +dimension as a list allowing for irregular intervals. + +```{eval-rst} +.. autosummary:: + RectilinearChunkGrid + RectilinearChunkShape +``` + +:::{note} +It's important to ensure that the sum of the irregular spacings specified +in the `chunkShape` matches the size of the respective array dimension. +::: + +For 1D array with `size = 39`{l=python}, we can divide it into 5 irregular sized +chunks. + +`{ "name": "rectilinear", "configuration": { "chunkShape": [[10, 7, 5, 7, 10]] } }`{l=json} + +Using the above schema resulting array chunks will look like this: + +```bash + ←── 10 ──→ ←─ 7 ─→ ← 5 → ←─ 7 ─→ ←── 10 ──→ +┌──────────┬───────┬─────┬───────┬──────────┐ +└──────────┴───────┴─────┴───────┴──────────┘ +``` + +For 2D array with shape `rows, cols = (7, 25)`{l=python}, we can divide it into 12 +rectilinear (rectangular bur irregular) chunks. Note that the rows and columns are +conceptual and visually not to scale. + +`{ "name": "rectilinear", "configuration": { "chunkShape": [[3, 1, 3], [10, 5, 7, 3]] } }`{l=json} + +```bash + ←── 10 ──→ ← 5 → ←─ 7 ─→ ↔ 3 +┌──────────┬─────┬───────┬───┐ +│ ╎ ╎ ╎ │ ↑ +│ ╎ ╎ ╎ │ 3 +│ ╎ ╎ ╎ │ ↓ +├╶╶╶╶╶╶╶╶╶╶┼╶╶╶╶╶┼╶╶╶╶╶╶╶┼╶╶╶┤ +│ ╎ ╎ ╎ │ ↕ 1 +├╶╶╶╶╶╶╶╶╶╶┼╶╶╶╶╶┼╶╶╶╶╶╶╶┼╶╶╶┤ +│ ╎ ╎ ╎ │ ↑ +│ ╎ ╎ ╎ │ 3 +│ ╎ ╎ ╎ │ ↓ +└──────────┴─────┴───────┴───┘ +``` + +## Model Reference + +:::{dropdown} RegularChunkGrid +:animate: fade-in-slide-down + +```{eval-rst} +.. autopydantic_model:: RegularChunkGrid + +---------- + +.. autopydantic_model:: RegularChunkShape +``` + +::: +:::{dropdown} RectilinearChunkGrid +:animate: fade-in-slide-down + +```{eval-rst} +.. autopydantic_model:: RectilinearChunkGrid + +---------- + +.. autopydantic_model:: RectilinearChunkShape +``` + +::: diff --git a/docs/data_models/compressors.md b/docs/data_models/compressors.md new file mode 100644 index 00000000..17a1afd8 --- /dev/null +++ b/docs/data_models/compressors.md @@ -0,0 +1,100 @@ +```{eval-rst} +:tocdepth: 3 +``` + +```{currentModule} mdio.schemas.compressors + +``` + +# Compressors + +```{article-info} +:author: Altay Sansal +:date: "{sub-ref}`today`" +:read-time: "{sub-ref}`wordcount-minutes` min read" +:class-container: sd-p-0 sd-outline-muted sd-rounded-3 sd-font-weight-light +``` + +## Dataset Compression + +MDIO relies on [numcodecs] for data compression. We provide good defaults based +on opinionated and limited heuristics for each compressor for various energy datasets. +However, using these data models, the compression can be customized. + +[Numcodecs] is a project that a convenient interface to different compression +libraries. We selected the [Blosc] and [ZFP] compressors for lossless and lossy +compression of energy data. + +## Blosc + +A high-performance compressor optimized for binary data, combining fast compression +with a byte-shuffle filter for enhanced efficiency, particularly effective with +numerical arrays in multi-threaded environments. + +For more details about compression modes, see [Blosc Documentation]. + +```{eval-rst} +.. autosummary:: + Blosc +``` + +## ZFP + +ZFP is a compression algorithm tailored for floating-point and integer arrays, offering +lossy and lossless compression with customizable precision, well-suited for large +scientific datasets with a focus on balancing data fidelity and compression ratio. + +For more details about compression modes, see [ZFP Documentation]. + +```{eval-rst} +.. autosummary:: + ZFP +``` + +[numcodecs]: https://github.com/zarr-developers/numcodecs +[blosc]: https://github.com/Blosc/c-blosc +[blosc documentation]: https://www.blosc.org/python-blosc/python-blosc.html +[zfp]: https://github.com/LLNL/zfp +[zfp documentation]: https://computing.llnl.gov/projects/zfp + +## Model Reference + +::: +:::{dropdown} Blosc +:animate: fade-in-slide-down + +```{eval-rst} +.. autopydantic_model:: Blosc + +---------- + +.. autoclass:: BloscAlgorithm() + :members: + :undoc-members: + :member-order: bysource + +---------- + +.. autoclass:: BloscShuffle() + :members: + :undoc-members: + :member-order: bysource +``` + +::: + +:::{dropdown} ZFP +:animate: fade-in-slide-down + +```{eval-rst} +.. autopydantic_model:: ZFP + +---------- + +.. autoclass:: ZFPMode() + :members: + :undoc-members: + :member-order: bysource +``` + +::: diff --git a/docs/data_models/data_types.md b/docs/data_models/data_types.md new file mode 100644 index 00000000..b58ff535 --- /dev/null +++ b/docs/data_models/data_types.md @@ -0,0 +1,259 @@ +```{eval-rst} +:tocdepth: 3 +``` + +```{currentModule} mdio.schemas.dtype + +``` + +# Data Types + +```{article-info} +:author: Altay Sansal +:date: "{sub-ref}`today`" +:read-time: "{sub-ref}`wordcount-minutes` min read" +:class-container: sd-p-0 sd-outline-muted sd-rounded-3 sd-font-weight-light +``` + +## Scalar Type + +Scalar types are used to represent numbers and boolean values in MDIO arrays. + +```{eval-rst} +.. autosummary:: + :nosignatures: + + ScalarType +``` + +These numbers can be integers (whole numbers without a decimal +point, like 1, -15, 204), floating-point numbers (numbers with a fractional part, +like 3.14, -0.001, 2.71828) in various 16-64 bit formats like `float32` etc. + +It is important to choose the right type for the content of the data for type safety, +memory efficiency, performance, and accuracy of the numbers represented. Most scientific +datasets are `float16`, `float32`, or `float64` values. However, there are many good +use cases for integer and complex values as well. + +The [`ScalarType`](#ScalarType)s MDIO supports can be viewed below with the tabs. + +:::::{tab-set} + +::::{tab-item} Boolean +:::{table} +:widths: auto +:align: center + +| Data Type | Options | Example Value | +| --------- | --------------- | ------------- | +| `bool` | `False`, `True` | `True` | + +::: +:::: + +::::{tab-item} Integers +:::{table} +:widths: auto +:align: center + +| Data Type | Range | Example Value | +| --------- | ----------------------------------------------------------- | ------------- | +| `int8` | `-128` to `127` | `45` | +| `int16` | `-32,768` to `32,767` | `1,234` | +| `int32` | `-2,147,483,648` to `2,147,483,647` | `2,024` | +| `int64` | `-9,223,372,036,854,775,808` to `9,223,372,036,854,775,807` | `987,654,321` | + +::: +:::: + +::::{tab-item} Unsigned Integers +:::{table} +:widths: auto +:align: center + +| Data Type | Range | Example Value | +| --------- | ----------------------------------- | --------------- | +| `uint8` | `0` to `255` | `200` | +| `uint16` | `0` to `65,535` | `50,000` | +| `uint32` | `0` to `4,294,967,295` | `3,000,000` | +| `uint64` | `0` to `18,446,744,073,709,551,615` | `5,000,000,000` | + +::: +:::: + +::::{tab-item} Floating Point +:::{table} +:widths: auto +:align: center + +| Data Type | Range | Example Value | +| --------- | ------------------------------------------------------- | -------------------- | +| `float16` | `-65,504` to `65,504` | `10.10` | +| `float32` | `-3.4028235e+38` to `3.4028235e+38` | `0.1234567` | +| `float64` | `-1.7976931348623157e+308` to `1.7976931348623157e+308` | `3.1415926535897932` | + +::: + +**Precision** + +- `float16`: 2 decimal places +- `float32`: 7 decimal places +- `float32`: 16 decimal places + +:::: + +::::{tab-item} Complex Numbers +:::{table} +:widths: auto +:align: center + +| Data Type | Range | Example Value | +| ------------ | ------------------------------------------------------- | ------------------ | +| `complex64` | `-3.4028235e+38` to `3.4028235e+38` | `3.14+2.71j` | +| `complex128` | `-1.7976931348623157e+308` to `1.7976931348623157e+308` | `2.71828+3.14159j` | + +::: +Ranges are for both real and imaginary parts. +:::: + +::::: + +## Structured Type + +Structured data type organizes and stores data in a fixed arrangement, allowing memory +efficient access and manipulation. + +```{eval-rst} +.. autosummary:: + :nosignatures: + + StructuredType + StructuredField +``` + +Structured data types are an essential component in handling complex data structures, +particularly in specialized domains like seismic data processing for subsurface +imaging applications. These data types allow for the organization of heterogeneous +data into a single, structured format. + +They are designed to be memory-efficient, which is vital for handling large seismic +datasets. Structured data types are adaptable, allowing for the addition or +modification of fields. + +A [`StructuredType`](#StructuredType) consists of [`StructuredField`](#StructuredField)s. +Fields can be different [numeric types](#numeric-types), and each represent a specific +attribute of the seismic data, like coordinate, line numbers, and time stamps. + +Each [`StructuredField`](#StructuredField) must specify a `name` and a data format +(`format`). + +All the structured fields will be packed and there will be no gaps between them. + +## Examples + +The table below illustrate [ScalarType](#ScalarType) ranges and shows an example each +type. + +Variable `foo` with type `float32`. + +```json +{ + "name": "foo", + "dataType": "float32", + "dimensions": ["x", "y"] +} +``` + +Variable `bar` with type `uint8`. + +```json +{ + "name": "bar", + "dataType": "uint8", + "dimensions": ["x", "y"] +} +``` + +Below are a couple examples of [StructuredType](#StructuredType) with varying lengths. + +We can specify a variable named `headers` that holds a 32-byte struct with +four `int32` values. + +```json +{ + "name": "headers", + "dataType": { + "fields": [ + { "name": "cdp-x", "format": "int32" }, + { "name": "cdp-y", "format": "int32" }, + { "name": "inline", "format": "int32" }, + { "name": "crossline", "format": "int32" } + ] + }, + "dimensions": ["inline", "crossline"] +} +``` + +This will yield an in-memory or on-disk struct that looks like this (for each element): + +```bash + ←─ 4 ─→ ←─ 4 ─→ ←─ 4 ─→ ←─ 4 ─→ = 16-bytes +┌───────┬───────┬───────┬───────┐ +│ int32 ╎ int32 ╎ int32 ╎ int32 │ ⋯ (next sample) +└───────┴───────┴───────┴───────┘ + └→ cdp-x └→ cdp-y └→ inline └→crossline +``` + +The below example shows mixing different data types. + +```json +{ + "name": "headers", + "dataType": { + "fields": [ + { "name": "cdp", "format": "uint32" }, + { "name": "offset", "format": "int16" }, + { "name": "cdp-x", "format": "float64" }, + { "name": "cdp-y", "format": "float64" } + ] + }, + "dimensions": ["inline", "crossline"] +} +``` + +This will yield an in-memory or on-disk struct that looks like this (for each element): + +```bash + ←── 4 ──→ ← 2 → ←─── 8 ───→ ←─── 8 ───→ = 24-bytes +┌─────────┬─────┬───────────┬───────────┐ +│ int32 ╎int16╎ float64 ╎ float64 │ ⋯ (next sample) +└─────────┴─────┴───────────┴───────────┘ + └→ cdp └→ offset └→ cdp-x └→ cdp-y +``` + +## Model Reference + +:::{dropdown} Scalar Types +:animate: fade-in-slide-down + +```{eval-rst} +.. autoclass:: ScalarType() + :members: + :undoc-members: + :member-order: bysource +``` + +::: + +:::{dropdown} Structured Type +:animate: fade-in-slide-down + +```{eval-rst} +.. autopydantic_model:: StructuredType + +---------- + +.. autopydantic_model:: StructuredField +``` + +::: diff --git a/docs/data_models/dimensions.md b/docs/data_models/dimensions.md new file mode 100644 index 00000000..c04b9206 --- /dev/null +++ b/docs/data_models/dimensions.md @@ -0,0 +1,33 @@ +```{eval-rst} +:tocdepth: 3 +``` + +```{currentModule} mdio.schemas.dimension + +``` + +# Dimensions + +```{article-info} +:author: Altay Sansal +:date: "{sub-ref}`today`" +:read-time: "{sub-ref}`wordcount-minutes` min read" +:class-container: sd-p-0 sd-outline-muted sd-rounded-3 sd-font-weight-light +``` + +## Intro + +```{eval-rst} +.. autosummary:: NamedDimension +``` + +## Reference + +:::{dropdown} Dimension +:open: + +```{eval-rst} +.. autopydantic_model:: NamedDimension +``` + +::: diff --git a/docs/data_models/index.md b/docs/data_models/index.md new file mode 100644 index 00000000..191f937b --- /dev/null +++ b/docs/data_models/index.md @@ -0,0 +1,10 @@ +# Dataset Models + +This section contains the data models for the MDIO format. + +```{toctree} +:maxdepth: 2 + +version_0 +version_1 +``` diff --git a/docs/data_models/version_0.md b/docs/data_models/version_0.md new file mode 100644 index 00000000..bac21ff6 --- /dev/null +++ b/docs/data_models/version_0.md @@ -0,0 +1,55 @@ +```{eval-rst} +:tocdepth: 3 +``` + +```{currentModule} mdio.schemas.v0.dataset + +``` + +# MDIO v0 + +```{article-info} +:author: Altay Sansal +:date: "{sub-ref}`today`" +:read-time: "{sub-ref}`wordcount-minutes` min read" +:class-container: sd-p-0 sd-outline-muted sd-rounded-3 sd-font-weight-light +``` + +## Intro + +```{eval-rst} +.. autosummary:: + + DatasetModelV0 + VariableModelV0 + DatasetMetadataModelV0 + DimensionModelV0 +``` + +## Reference + +:::{dropdown} Dataset +:open: + +```{eval-rst} +.. autopydantic_model:: DatasetModelV0 + :inherited-members: BaseModel + +.. autopydantic_model:: DatasetMetadataModelV0 + :inherited-members: BaseModel + +.. autopydantic_model:: DimensionModelV0 + :inherited-members: BaseModel +``` + +::: + +:::{dropdown} Variable +:open: + +```{eval-rst} +.. autopydantic_model:: VariableModelV0 + :inherited-members: BaseModel +``` + +::: diff --git a/docs/data_models/version_1.md b/docs/data_models/version_1.md new file mode 100644 index 00000000..2fd39753 --- /dev/null +++ b/docs/data_models/version_1.md @@ -0,0 +1,134 @@ +```{eval-rst} +:tocdepth: 3 +``` + +```{currentModule} mdio.schemas.v1.dataset + +``` + +# MDIO v1 + +```{article-info} +:author: Altay Sansal +:date: "{sub-ref}`today`" +:read-time: "{sub-ref}`wordcount-minutes` min read" +:class-container: sd-p-0 sd-outline-muted sd-rounded-3 sd-font-weight-light +``` + +## Intro + +```{eval-rst} +.. autosummary:: Dataset +.. autosummary:: DatasetMetadata +``` + +## Reference + +:::{dropdown} Dataset +:open: + +```{eval-rst} +.. autopydantic_model:: Dataset + :inherited-members: BaseModel + +.. autopydantic_model:: DatasetMetadata + :inherited-members: BaseModel +``` + +::: +:::{dropdown} Variable + +```{eval-rst} +.. autopydantic_model:: mdio.schemas.v1.variable.Variable + :inherited-members: BaseModel + +.. autopydantic_model:: mdio.schemas.v1.variable.Coordinate + :inherited-members: BaseModel + +.. autopydantic_model:: mdio.schemas.v1.variable.CoordinateMetadata + :inherited-members: BaseModel + +.. automodule:: mdio.schemas.metadata + :members: UserAttributes + +.. autopydantic_model:: mdio.schemas.v1.variable.VariableMetadata + :inherited-members: BaseModel +``` + +::: + +:::{dropdown} Units + +```{eval-rst} +.. autopydantic_model:: mdio.schemas.v1.units.AllUnits +``` + +```{eval-rst} +.. automodule:: mdio.schemas.v1.units + :members: LengthUnitModel, + TimeUnitModel, + AngleUnitModel, + DensityUnitModel, + SpeedUnitModel, + FrequencyUnitModel, + VoltageUnitModel +``` + +::: + +:::{dropdown} Stats + +```{eval-rst} +.. autopydantic_model:: mdio.schemas.v1.stats.StatisticsMetadata + +.. autopydantic_model:: mdio.schemas.v1.stats.SummaryStatistics + +.. autopydantic_model:: mdio.schemas.v1.stats.EdgeDefinedHistogram + :inherited-members: BaseModel + +.. autopydantic_model:: mdio.schemas.v1.stats.CenteredBinHistogram + :inherited-members: BaseModel +``` + +::: + +:::{dropdown} Enums + +```{eval-rst} +.. autoclass:: mdio.schemas.v1.units.AngleUnitEnum() + :members: + :undoc-members: + :member-order: bysource + +.. autoclass:: mdio.schemas.v1.units.DensityUnitEnum() + :members: + :undoc-members: + :member-order: bysource + +.. autoclass:: mdio.schemas.v1.units.FrequencyUnitEnum() + :members: + :undoc-members: + :member-order: bysource + +.. autoclass:: mdio.schemas.v1.units.LengthUnitEnum() + :members: + :undoc-members: + :member-order: bysource + +.. autoclass:: mdio.schemas.v1.units.SpeedUnitEnum() + :members: + :undoc-members: + :member-order: bysource + +.. autoclass:: mdio.schemas.v1.units.TimeUnitEnum() + :members: + :undoc-members: + :member-order: bysource + +.. autoclass:: mdio.schemas.v1.units.VoltageUnitEnum() + :members: + :undoc-members: + :member-order: bysource +``` + +::: diff --git a/docs/index.md b/docs/index.md index 9b674107..da8408e3 100644 --- a/docs/index.md +++ b/docs/index.md @@ -26,6 +26,18 @@ tutorials/index api_reference ``` +```{toctree} +:hidden: +:caption: Core Concepts and Structures + +data_models/index +data_models/dimensions +data_models/chunk_grids +data_models/data_types +data_models/compressors +template_registry +``` + ```{toctree} :hidden: :caption: Community and Contribution diff --git a/docs/requirements.txt b/docs/requirements.txt index 158a4ef5..935a0fee 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,4 @@ +autodoc-pydantic==2.2.0 furo==2025.7.19 linkify-it-py==2.0.3 myst-nb==1.3.0 diff --git a/docs/template_registry.md b/docs/template_registry.md new file mode 100644 index 00000000..3b51de81 --- /dev/null +++ b/docs/template_registry.md @@ -0,0 +1,254 @@ +# Template Registry Singleton + +A thread-safe singleton registry for managing dataset templates in MDIO applications. + +## Overview + +The `TemplateRegistry` implements the singleton pattern to ensure there's only one instance managing all dataset templates throughout the application lifecycle. This provides a centralized registry for template management with thread-safe operations. + +## Features + +- **Singleton Pattern**: Ensures only one registry instance exists +- **Thread Safety**: All operations are thread-safe using locks +- **Global Access**: Convenient global functions for common operations +- **Advanced Support**: Reset functionality for environment re-usability. +- **Default Templates**: The registry is instantiated with the default set of templates: + - PostStack2DTime + - PostStack3DTime + - PreStackCdpGathers3DTime + - PreStackShotGathers3DTime + - PostStack2DDepth + - PostStack3DDepth + - PreStackCdpGathers3DDepth + - PreStackShotGathers3DDepth + +## Usage + +### Basic Usage + +```python +from mdio.schemas.v1.templates.template_registry import TemplateRegistry + +# Get the singleton instance +registry = TemplateRegistry() + +# Or use the class method +registry = TemplateRegistry.get_instance() + +# Register a template +template = MyDatasetTemplate() +template_name=registry.register(template) +print(f"Registered template named {template_name}") + +# Retrieve a template using a well-known name +template = registry.get("my_template") +# Retrieve a template using the name returned when the template was registered +template = registry.get(template_name) + +# Check if template exists +if registry.is_registered("my_template"): + print("Template is registered") + +# List all templates +template_names = registry.list_all_templates() +``` + +### Global Functions + +For convenience, you can use global functions that operate on the singleton instance: + +```python +from mdio.schemas.v1.templates.template_registry import ( + register_template, + get_template, + is_template_registered, + list_templates +) + +# Register a template globally +register_template(Seismic3DTemplate()) + +# Get a template +template = get_template("seismic_3d") + +# Check registration +if is_template_registered("seismic_3d"): + print("Template available") + +# List all registered templates +templates = list_templates() +``` + +### Multiple Instantiation + +The singleton pattern ensures all instantiations return the same object: + +```python +registry1 = TemplateRegistry() +registry2 = TemplateRegistry() +registry3 = TemplateRegistry.get_instance() + +# All variables point to the same instance +assert registry1 is registry2 is registry3 +``` + +## API Reference + +### Core Methods + +#### `register(instance: AbstractDatasetTemplate) -> str` + +Registers a template instance and returns its normalized name. + +- **Parameters:** + - `instance`: Template instance implementing `AbstractDatasetTemplate` +- **Returns:** The template name +- **Raises:** `ValueError` if template name is already registered + +#### `get(template_name: str) -> AbstractDatasetTemplate` + +Retrieves a registered template by name. + +- **Parameters:** + - `template_name`: Name of the template (case-insensitive) +- **Returns:** The registered template instance +- **Raises:** `KeyError` if template is not registered + +#### `unregister(template_name: str) -> None` + +Removes a template from the registry. + +- **Parameters:** + - `template_name`: Name of the template to remove +- **Raises:** `KeyError` if template is not registered + +#### `is_registered(template_name: str) -> bool` + +Checks if a template is registered. + +- **Parameters:** + - `template_name`: Name of the template to check +- **Returns:** `True` if template is registered, `False` otherwise + +#### `list_all_templates() -> List[str]` + +Returns a list of all registered template names. + +- **Returns:** List of template names + +#### `clear() -> None` + +Removes all registered templates. Useful for testing. + +### Class Methods + +#### `get_instance() -> TemplateRegistry` + +Alternative way to get the singleton instance. + +- **Returns:** The singleton registry instance + +### Global Functions + +#### `get_template_registry() -> TemplateRegistry` + +Returns the global singleton registry instance. + +#### `register_template(template: AbstractDatasetTemplate) -> str` + +Registers a template in the global registry. + +#### `get_template(name: str) -> AbstractDatasetTemplate` + +Gets a template from the global registry. + +#### `is_template_registered(name: str) -> bool` + +Checks if a template is registered in the global registry. + +#### `list_templates() -> List[str]` + +Lists all templates in the global registry. + +## Thread Safety + +All operations on the registry are thread-safe: + +```python +import threading + +def register_templates(): + registry = TemplateRegistry() + for i in range(10): + template = MyTemplate(f"template_{i}") + registry.register(template) + +# Multiple threads can safely access the registry +threads = [threading.Thread(target=register_templates) for _ in range(5)] +for thread in threads: + thread.start() +for thread in threads: + thread.join() +``` + +## Best Practices + +1. **Use Global Functions**: For simple operations, prefer the global convenience functions +2. **Register Early**: Register all templates during application startup +3. **Thread Safety**: The registry is thread-safe, but individual templates may not be +4. **Testing Isolation**: Always reset the singleton in test setup/teardown + +## Example: Complete Template Management + +```python +from mdio.schemas.v1.templates.template_registry import TemplateRegistry +from mdio.schemas.v1.templates.seismic_3d_poststack import Seismic3DPostStackTemplate +from mdio.schemas.v1.templates.seismic_3d_prestack_time import Seismic3DPostStackTimeTemplate +from mdio.schemas.v1.templates.seismic_3d_prestack import Seismic3DPreStackTemplate + +def setup_templates(): + """Register MDIO templates runtime. + Custom templates can be created in external projects and added without modifying the MDIO library code + """ + # Use strongly-typed template + template_name = TemplateRegistry.register(Seismic3DPostStackTimeTemplate()) + print(f"Registered template named {template_name}") + # Use parametrized template + template_name = TemplateRegistry.register(Seismic3DPostStackTemplate("Depth")) + print(f"Registered template named {template_name}") + template_name = TemplateRegistry.register(Seismic3DPreStackTemplate()) + print(f"Registered template named {template_name}") + + print(f"Registered templates: {list_templates()}") + +# Application startup +setup_standard_templates() + +# Later in the application +template = TemplateRegistry().get_template("PostStack3DDepth") +dataset = template.create_dataset(name="Seismic 3d m/m/ft", + sizes = [256, 512, 384] + coord_units = [ + AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)), + AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)), + AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.FOOT))] +``` + +## Error Handling + +The registry provides clear error messages: + +```python +# Template not registered +try: + template = get_template("nonexistent") +except KeyError as e: + print(f"Error: {e}") # "Template 'nonexistent' is not registered." + +# Duplicate registration +try: + register_template("duplicate", template1) + register_template("duplicate", template2) +except ValueError as e: + print(f"Error: {e}") # "Template 'duplicate' is already registered." +``` diff --git a/noxfile.py b/noxfile.py index d78994a0..281ec525 100644 --- a/noxfile.py +++ b/noxfile.py @@ -257,6 +257,7 @@ def docs_build(session: Session) -> None: session_install_uv_package( session, [ + "autodoc-pydantic", "sphinx", "sphinx-click", "sphinx-copybutton", @@ -282,6 +283,7 @@ def docs(session: Session) -> None: session_install_uv_package( session, [ + "autodoc-pydantic", "sphinx", "sphinx-autobuild", "sphinx-click", diff --git a/pyproject.toml b/pyproject.toml index 82788c81..4bbf4c18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "multidimio" -version = "0.9.3" +version = "1.0.0-alpha.1" description = "Cloud-native, scalable, and user-friendly multi dimensional energy data!" authors = [{ name = "Altay Sansal", email = "altay.sansal@tgs.com" }] requires-python = ">=3.11,<3.14" @@ -23,26 +23,30 @@ classifiers = [ ] dependencies = [ - "click (>=8.1.7,<9.0.0)", + "click (>=8.2.1,<9.0.0)", "click-params (>=0.5.0,<0.6.0)", - "dask (>=2024.12.0)", - "fsspec (>=2024.10.0)", - "psutil (>=6.1.0,<7.0.0)", - "rich (>=13.9.4,<14.0.0)", - "segy (>=0.4.0,<0.5.0)", - "tqdm (>=4.67.0,<5.0.0)", - "zarr (>=3.1.0,<4.0.0)", + "dask (>=2025.7.0)", + "fsspec (>=2025.7.0)", + "pint>=0.24.4,<0.25", + "psutil (>=7.0.0,<8.0.0)", + "pydantic (>=2.11.7,<3.0.0)", + "pydantic-settings (>=2.10.1,<3.0.0)", + "rich (>=14.1.0,<15.0.0)", + "segy (>=0.4.2,<0.5.0)", + "tqdm (>=4.67.1,<5.0.0)", + "xarray>=2025.7.1", + "zarr (>=3.1.1,<4.0.0)", ] [project.optional-dependencies] cloud = [ - "s3fs == 2024.12.0", - "gcsfs (>=2024.10.0)", - "adlfs (>=2024.7.0)", + "s3fs (>=2025.7.0)", + "gcsfs (>=2025.7.0)", + "adlfs (>=2024.12.0)", ] distributed = [ - "distributed (>=2024.12.0)", - "bokeh (>=3.4.2,<4.0.0)", + "distributed (>=2025.7.0)", + "bokeh (>=3.7.3,<4.0.0)", ] lossy = ["zfpy (>=1.0.1,<2.0.0)"] @@ -56,17 +60,17 @@ mdio = "mdio.__main__:main" [dependency-groups] dev = [ - "ruff (>=0.11.8)", - "coverage[toml] (>=7.6.7,<8)", - "mypy (>=1.13.0,<2)", - "pre-commit (>=4.0.1,<5)", + "ruff (>=0.12.1)", + "coverage[toml] (>=7.9.1,<8)", + "mypy (>=1.16.1,<2)", + "pre-commit (>=4.2.0,<5)", "pre-commit-hooks (>=5.0.0,<6)", - "pytest (>=8.3.3,<9)", + "pytest (>=8.4.1,<9)", "pytest-dependency (>=0.6.0,<0.7)", - "safety (>=3.2.3,<4)", - "typeguard (>=4.4.1,<5)", +# "safety (>=3.5.2,<4)", # too tight pydantic and psutil dependency + "typeguard (>=4.4.4,<5)", "xdoctest[colors] (>=1.2.0,<2)", - "Pygments (>=2.18.0,<3)", + "Pygments (>=2.19.2,<3)", ] docs = [ @@ -112,7 +116,7 @@ select = [ "RET", # return "SIM", # simplify "TID", # tidy-imports - "TCH", # type-checking + "TC", # type-checking "ARG", # unused-arguments "PTH", # use-pathlib "TD", # todos @@ -130,7 +134,7 @@ ignore = [ ] [tool.ruff.lint.per-file-ignores] -"tests/*" = ["S101"] +"tests/*" = ["S101", "PLR2004"] "tests/integration/test_segy_import_export_masked.py" = ["E501"] "docs/tutorials/*.ipynb" = ["S101"] @@ -180,11 +184,16 @@ pretty = true show_column_numbers = true show_error_codes = true show_error_context = true -disallow_untyped_defs = true # for strict mypy: (this is the tricky one) -plugins = ["numpy.typing.mypy_plugin"] +disallow_untyped_defs = true # for strict mypy: (this is the tricky one) +plugins = ["pydantic.mypy", "numpy.typing.mypy_plugin"] + +[tool.pydantic-mypy] +init_forbid_extra = true +init_typed = true +warn_required_dynamic_aliases = true [tool.bumpversion] -current_version = "0.9.3" +current_version = "1.0.0-alpha.1" allow_dirty = true commit = false tag = false diff --git a/src/mdio/api/convenience.py b/src/mdio/api/convenience.py index a0536214..4d8b5df6 100644 --- a/src/mdio/api/convenience.py +++ b/src/mdio/api/convenience.py @@ -79,7 +79,11 @@ def copy_mdio( # noqa: PLR0913 writer.live_mask[:] = reader.live_mask[:] - iterator = ChunkIterator(reader._traces, chunk_samples=False) + shape = reader._traces.shape + chunks = reader._traces.chunks + chunks = chunks[:-1] + (shape[-1],) # don't chunk samples + + iterator = ChunkIterator(shape=shape, chunks=chunks) progress = tqdm(iterator, unit="block") progress.set_description(desc=f"Copying data for '{access_pattern=}'") for slice_ in progress: @@ -177,7 +181,10 @@ def create_rechunk_plan( n_dimension = len(data_array.shape) dummy_array = zarr.empty(shape=data_array.shape, chunks=(MAX_BUFFER,) * n_dimension) - iterator = ChunkIterator(dummy_array) + + shape = dummy_array.shape + chunks = dummy_array.chunks + iterator = ChunkIterator(shape=shape, chunks=chunks) return metadata_arrs, data_arrs, live_mask, iterator diff --git a/src/mdio/constants.py b/src/mdio/constants.py index bff76531..5b8384e0 100644 --- a/src/mdio/constants.py +++ b/src/mdio/constants.py @@ -1,36 +1,59 @@ """Constant values used across MDIO.""" -import numpy as np +from numpy import finfo as np_finfo +from numpy import iinfo as np_iinfo +from numpy import nan as np_nan -FLOAT16_MAX = np.finfo("float16").max -FLOAT16_MIN = np.finfo("float16").min +from mdio.schemas.dtype import ScalarType -FLOAT32_MAX = np.finfo("float32").max -FLOAT32_MIN = np.finfo("float32").min +FLOAT16_MAX = np_finfo("float16").max +FLOAT16_MIN = np_finfo("float16").min -FLOAT64_MIN = np.finfo("float64").min -FLOAT64_MAX = np.finfo("float64").max +FLOAT32_MAX = np_finfo("float32").max +FLOAT32_MIN = np_finfo("float32").min -INT8_MIN = np.iinfo("int8").min -INT8_MAX = np.iinfo("int8").max +FLOAT64_MIN = np_finfo("float64").min +FLOAT64_MAX = np_finfo("float64").max -INT16_MIN = np.iinfo("int16").min -INT16_MAX = np.iinfo("int16").max +INT8_MIN = np_iinfo("int8").min +INT8_MAX = np_iinfo("int8").max -INT32_MIN = np.iinfo("int32").min -INT32_MAX = np.iinfo("int32").max +INT16_MIN = np_iinfo("int16").min +INT16_MAX = np_iinfo("int16").max -INT64_MIN = np.iinfo("int64").min -INT64_MAX = np.iinfo("int64").max +INT32_MIN = np_iinfo("int32").min +INT32_MAX = np_iinfo("int32").max + +INT64_MIN = np_iinfo("int64").min +INT64_MAX = np_iinfo("int64").max UINT8_MIN = 0 -UINT8_MAX = np.iinfo("uint8").max +UINT8_MAX = np_iinfo("uint8").max UINT16_MIN = 0 -UINT16_MAX = np.iinfo("uint16").max +UINT16_MAX = np_iinfo("uint16").max UINT32_MIN = 0 -UINT32_MAX = np.iinfo("uint32").max +UINT32_MAX = np_iinfo("uint32").max UINT64_MIN = 0 -UINT64_MAX = np.iinfo("uint64").max +UINT64_MAX = np_iinfo("uint64").max + +# Zarr fill values for different scalar types +fill_value_map = { + ScalarType.BOOL: None, + ScalarType.FLOAT16: np_nan, + ScalarType.FLOAT32: np_nan, + ScalarType.FLOAT64: np_nan, + ScalarType.UINT8: UINT8_MAX, + ScalarType.UINT16: UINT16_MAX, + ScalarType.UINT32: UINT32_MAX, + ScalarType.UINT64: UINT64_MAX, + ScalarType.INT8: INT8_MAX, + ScalarType.INT16: INT16_MAX, + ScalarType.INT32: INT32_MAX, + ScalarType.INT64: INT64_MAX, + ScalarType.COMPLEX64: complex(np_nan, np_nan), + ScalarType.COMPLEX128: complex(np_nan, np_nan), + ScalarType.COMPLEX256: complex(np_nan, np_nan), +} diff --git a/src/mdio/converters/numpy.py b/src/mdio/converters/numpy.py index 832b9656..de52e314 100644 --- a/src/mdio/converters/numpy.py +++ b/src/mdio/converters/numpy.py @@ -7,7 +7,6 @@ import numpy as np from mdio.api.accessor import MDIOWriter -from mdio.converters.segy import get_compressor from mdio.core.dimension import Dimension from mdio.core.factory import MDIOCreateConfig from mdio.core.factory import MDIOVariableConfig @@ -137,6 +136,11 @@ def numpy_to_mdio( # noqa: PLR0913 suffix = [str(idx) for idx, value in enumerate(suffix) if value is not None] suffix = "".join(suffix) + # TODO(Dmitrit Repin): Implement Numpy converted in MDIO v1 + # https://github.com/TGSAI/mdio-python/issues/596 + def get_compressor(lossless: bool, tolerance: float) -> list[str]: + pass + compressors = get_compressor(lossless, compression_tolerance) mdio_var = MDIOVariableConfig( name=f"chunked_{suffix}", diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py index 15946e84..96f3ad1a 100644 --- a/src/mdio/converters/segy.py +++ b/src/mdio/converters/segy.py @@ -1,42 +1,39 @@ -"""Conversion from SEG-Y to MDIO.""" +"""Conversion from SEG-Y to MDIO v1 format.""" from __future__ import annotations import logging import os from typing import TYPE_CHECKING -from typing import Any import numpy as np -import zarr -from numcodecs import Blosc from segy import SegyFile from segy.config import SegySettings -from segy.schema import HeaderField +from segy.standards.codes import MeasurementSystem as segy_MeasurementSystem +from segy.standards.fields.trace import Rev0 as TraceHeaderFieldsRev0 +from mdio.constants import UINT32_MAX from mdio.converters.exceptions import EnvironmentFormatError from mdio.converters.exceptions import GridTraceCountError from mdio.converters.exceptions import GridTraceSparsityError -from mdio.core import Grid -from mdio.core.factory import MDIOCreateConfig -from mdio.core.factory import MDIOVariableConfig -from mdio.core.factory import create_empty -from mdio.core.utils_write import write_attribute +from mdio.converters.type_converter import to_structured_type +from mdio.core.grid import Grid +from mdio.schemas.v1.dataset_serializer import to_xarray_dataset +from mdio.schemas.v1.units import AllUnits +from mdio.schemas.v1.units import LengthUnitEnum +from mdio.schemas.v1.units import LengthUnitModel from mdio.segy import blocked_io -from mdio.segy.compat import mdio_segy_spec from mdio.segy.utilities import get_grid_plan if TYPE_CHECKING: - from collections.abc import Sequence - from pathlib import Path - -try: - import zfpy # Base library - from numcodecs import ZFPY # Codec -except ImportError: - ZFPY = None - zfpy = None + from segy.arrays import HeaderArray as SegyHeaderArray + from segy.schema import SegySpec + from xarray import Dataset as xr_Dataset + from mdio.core.dimension import Dimension + from mdio.core.storage_location import StorageLocation + from mdio.schemas.v1.dataset import Dataset + from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate logger = logging.getLogger(__name__) @@ -115,354 +112,262 @@ def grid_density_qc(grid: Grid, num_traces: int) -> None: raise GridTraceSparsityError(grid.shape, num_traces, msg) -def get_compressor(lossless: bool, compression_tolerance: float = -1) -> Blosc | ZFPY | None: - """Get the appropriate compressor for the seismic traces.""" - if lossless: - compressor = Blosc("zstd") - else: - if zfpy is None or ZFPY is None: - msg = ( - "Lossy compression requires the 'zfpy' library. It is not installed in your " - "environment. To proceed, please install 'zfpy' or install mdio `lossy` extra." - ) - raise ImportError(msg) - - compressor = ZFPY(mode=zfpy.mode_fixed_accuracy, tolerance=compression_tolerance) - return compressor - - -def segy_to_mdio( # noqa: PLR0913, PLR0915 - segy_path: str | Path, - mdio_path_or_buffer: str | Path, - index_bytes: Sequence[int], - index_names: Sequence[str] | None = None, - index_types: Sequence[str] | None = None, - chunksize: tuple[int, ...] | None = None, - lossless: bool = True, - compression_tolerance: float = 0.01, - storage_options_input: dict[str, Any] | None = None, - storage_options_output: dict[str, Any] | None = None, - overwrite: bool = False, - grid_overrides: dict | None = None, -) -> None: - """Convert SEG-Y file to MDIO format. - - MDIO allows ingesting flattened seismic surveys in SEG-Y format into a multidimensional tensor - that represents the correct geometry of the seismic dataset. - - The SEG-Y file must be on disk, MDIO currently does not support reading SEG-Y directly from - the cloud object store. - - The output MDIO file can be local or on the cloud. For local files, a UNIX or Windows path is - sufficient. However, for cloud stores, an appropriate protocol must be provided. See examples - for more details. +def _scan_for_headers( + segy_file: SegyFile, template: AbstractDatasetTemplate +) -> tuple[list[Dimension], SegyHeaderArray]: + """Extract trace dimensions and index headers from the SEG-Y file. - The SEG-Y headers for indexing must also be specified. The index byte locations (starts from 1) - are the minimum amount of information needed to index the file. However, we suggest giving - names to the index dimensions, and if needed providing the header lengths if they are not - standard. By default, all header entries are assumed to be 4-byte long. + This is an expensive operation. + It scans the SEG-Y file in chunks by using ProcessPoolExecutor + """ + # TODO(Dmitriy): implement grid overrides + # https://github.com/TGSAI/mdio-python/issues/585 + # The 'grid_chunksize' is used only for grid_overrides + # While we do not support grid override, we can set it to None + grid_chunksize = None + segy_dimensions, chunksize, segy_headers = get_grid_plan( + segy_file=segy_file, + return_headers=True, + template=template, + chunksize=grid_chunksize, + grid_overrides=None, + ) + return segy_dimensions, segy_headers - The chunk size depends on the data type, however, it can be chosen to accommodate any - workflow's access patterns. See examples below for some common use cases. - By default, the data is ingested with LOSSLESS compression. This saves disk space in the range - of 20% to 40%. MDIO also allows data to be compressed using the ZFP compressor's fixed rate - lossy compression. If lossless parameter is set to False and MDIO was installed using the lossy - extra; then the data will be compressed to approximately 30% of its original size and will be - perceptually lossless. The compression ratio can be adjusted using the option compression_ratio - (integer). Higher values will compress more, but will introduce artifacts. +def _build_and_check_grid( + segy_dimensions: list[Dimension], segy_file: SegyFile, segy_headers: SegyHeaderArray +) -> Grid: + """Build and check the grid from the SEG-Y headers and dimensions. Args: - segy_path: Path to the input SEG-Y file - mdio_path_or_buffer: Output path for the MDIO file, either local or cloud-based (e.g., - with `s3://`, `gcs://`, or `abfs://` protocols). - index_bytes: Tuple of the byte location for the index attributes - index_names: List of names for the index dimensions. If not provided, defaults to `dim_0`, - `dim_1`, ..., with the last dimension named `sample`. - index_types: Tuple of the data-types for the index attributes. Must be in {"int16, int32, - float16, float32, ibm32"}. Default is 4-byte integers for each index key. - chunksize: Tuple specifying the chunk sizes for each dimension of the array. It must match - the number of dimensions in the input array. - lossless: If True, uses lossless Blosc compression with zstandard. If False, uses ZFP lossy - compression (requires `zfpy` library). - compression_tolerance: Tolerance for ZFP compression in lossy mode. Ignored if - `lossless=True`. Default is 0.01, providing ~70% size reduction. - storage_options_input: Dictionary of storage options for the SEGY input output file (e.g., - cloud credentials). Defaults to None. - storage_options_output: Dictionary of storage options for the MDIO output output file - (e.g., cloud credentials). Defaults to None. - overwrite: If True, overwrites existing MDIO file at the specified path. - grid_overrides: Option to add grid overrides. See examples. + segy_dimensions: List of of all SEG-Y dimensions to build grid from. + segy_file: Instance of SegyFile to check for trace count. + segy_headers: Headers read in from SEG-Y file for building the trace map. + + Returns: + A grid instance populated with the dimensions and trace index map. Raises: - GridTraceCountError: Raised if grid won't hold all traces in the SEG-Y file. - ValueError: If length of chunk sizes don't match number of dimensions. - NotImplementedError: If can't determine chunking automatically for 4D+. - - Examples: - If we are working locally and ingesting a 3D post-stack seismic file, we can use the - following example. This will ingest with default chunks of 128 x 128 x 128. - - >>> from mdio import segy_to_mdio - >>> - >>> - >>> segy_to_mdio( - ... segy_path="prefix1/file.segy", - ... mdio_path_or_buffer="prefix2/file.mdio", - ... index_bytes=(189, 193), - ... index_names=("inline", "crossline") - ... ) - - If we are on Amazon Web Services, we can do it like below. The protocol before the URL can - be `s3` for AWS, `gcs` for Google Cloud, and `abfs` for Microsoft Azure. In this example we - also change the chunk size as a demonstration. - - >>> segy_to_mdio( - ... segy_path="prefix/file.segy", - ... mdio_path_or_buffer="s3://bucket/file.mdio", - ... index_bytes=(189, 193), - ... index_names=("inline", "crossline"), - ... chunksize=(64, 64, 512), - ... ) - - Another example of loading a 4D seismic such as 3D seismic pre-stack gathers is below. This - will allow us to extract offset planes efficiently or run things in a local neighborhood - very efficiently. - - >>> segy_to_mdio( - ... segy_path="prefix/file.segy", - ... mdio_path_or_buffer="s3://bucket/file.mdio", - ... index_bytes=(189, 193, 37), - ... index_names=("inline", "crossline", "offset"), - ... chunksize=(16, 16, 16, 512), - ... ) - - We can override the dataset grid by the `grid_overrides` parameter. This allows us to - ingest files that don't conform to the true geometry of the seismic acquisition. - - For example if we are ingesting 3D seismic shots that don't have a cable number and channel - numbers are sequential (i.e. each cable doesn't start with channel number 1; we can tell - MDIO to ingest this with the correct geometry by calculating cable numbers and wrapped - channel numbers. Note the missing byte location and word length for the "cable" index. - - >>> segy_to_mdio( - ... segy_path="prefix/shot_file.segy", - ... mdio_path_or_buffer="s3://bucket/shot_file.mdio", - ... index_bytes=(17, None, 13), - ... index_lengths=(4, None, 4), - ... index_names=("shot", "cable", "channel"), - ... chunksize=(8, 2, 128, 1024), - ... grid_overrides={ - ... "ChannelWrap": True, "ChannelsPerCable": 800, - ... "CalculateCable": True - ... }, - ... ) - - If we do have cable numbers in the headers, but channels are still sequential (aka. - unwrapped), we can still ingest it like this. - - >>> segy_to_mdio( - ... segy_path="prefix/shot_file.segy", - ... mdio_path_or_buffer="s3://bucket/shot_file.mdio", - ... index_bytes=(17, 137, 13), - ... index_lengths=(4, 2, 4), - ... index_names=("shot_point", "cable", "channel"), - ... chunksize=(8, 2, 128, 1024), - ... grid_overrides={"ChannelWrap": True, "ChannelsPerCable": 800}, - ... ) - - For shot gathers with channel numbers and wrapped channels, no grid overrides necessary. - - In cases where the user does not know if the input has unwrapped channels but desires to - store with wrapped channel index use: - >>> grid_overrides = { - ... "AutoChannelWrap": True, - ... "AutoChannelTraceQC": 1000000 - ... } - - For ingestion of pre-stack streamer data where the user needs to access/index - *common-channel gathers* (single gun) then the following strategy can be used to densely - ingest while indexing on gun number: - - >>> segy_to_mdio( - ... segy_path="prefix/shot_file.segy", - ... mdio_path_or_buffer="s3://bucket/shot_file.mdio", - ... index_bytes=(133, 171, 17, 137, 13), - ... index_lengths=(2, 2, 4, 2, 4), - ... index_names=("shot_line", "gun", "shot_point", "cable", "channel"), - ... chunksize=(1, 1, 8, 1, 128, 1024), - ... grid_overrides={ - ... "AutoShotWrap": True, - ... "AutoChannelWrap": True, - ... "AutoChannelTraceQC": 1000000 - ... }, - ... ) - - For AutoShotWrap and AutoChannelWrap to work, the user must provide "shot_line", "gun", - "shot_point", "cable", "channel". For improved common-channel performance consider - modifying the chunksize to be (1, 1, 32, 1, 32, 2048) for good common-shot and - common-channel performance or (1, 1, 128, 1, 1, 2048) for common-channel performance. - - For cases with no well-defined trace header for indexing a NonBinned grid override is - provided.This creates the index and attributes an incrementing integer to the trace for - the index based on first in first out. For example a CDP and Offset keyed file might have a - header for offset as real world offset which would result in a very sparse populated index. - Instead, the following override will create a new index from 1 to N, where N is the number - of offsets within a CDP ensemble. The index to be auto generated is called "trace". Note - the required "chunksize" parameter in the grid override. This is due to the non-binned - ensemble chunksize is irrelevant to the index dimension chunksizes and has to be specified - in the grid override itself. Note the lack of offset, only indexing CDP, providing CDP - header type, and chunksize for only CDP and Sample dimension. The chunksize for non-binned - dimension is in the grid overrides as described above. The below configuration will yield - 1MB chunks: - - >>> segy_to_mdio( - ... segy_path="prefix/cdp_offset_file.segy", - ... mdio_path_or_buffer="s3://bucket/cdp_offset_file.mdio", - ... index_bytes=(21,), - ... index_types=("int32",), - ... index_names=("cdp",), - ... chunksize=(4, 1024), - ... grid_overrides={"NonBinned": True, "chunksize": 64}, - ... ) - - A more complicated case where you may have a 5D dataset that is not binned in Offset and - Azimuth directions can be ingested like below. However, the Offset and Azimuth dimensions - will be combined to "trace" dimension. The below configuration will yield 1MB chunks. - - >>> segy_to_mdio( - ... segy_path="prefix/cdp_offset_file.segy", - ... mdio_path_or_buffer="s3://bucket/cdp_offset_file.mdio", - ... index_bytes=(189, 193), - ... index_types=("int32", "int32"), - ... index_names=("inline", "crossline"), - ... chunksize=(4, 4, 1024), - ... grid_overrides={"NonBinned": True, "chunksize": 64}, - ... ) - - For dataset with expected duplicate traces we have the following parameterization. This - will use the same logic as NonBinned with a fixed chunksize of 1. The other keys are still - important. The below example allows multiple traces per receiver (i.e. reshoot). - - >>> segy_to_mdio( - ... segy_path="prefix/cdp_offset_file.segy", - ... mdio_path_or_buffer="s3://bucket/cdp_offset_file.mdio", - ... index_bytes=(9, 213, 13), - ... index_types=("int32", "int16", "int32"), - ... index_names=("shot", "cable", "chan"), - ... chunksize=(8, 2, 256, 512), - ... grid_overrides={"HasDuplicates": True}, - ... ) + GridTraceCountError: If number of traces in SEG-Y file does not match the parsed grid """ - index_names = index_names or [f"dim_{i}" for i in range(len(index_bytes))] - index_types = index_types or ["int32"] * len(index_bytes) - - if chunksize is not None and len(chunksize) != len(index_bytes) + 1: - message = ( - f"Length of chunks={len(chunksize)} must be equal to array " - f"dimensions={len(index_bytes) + 1}" - ) - raise ValueError(message) - - # Handle storage options and check permissions etc - storage_options_input = storage_options_input or {} - storage_options_output = storage_options_output or {} - - # Open SEG-Y with MDIO's SegySpec. Endianness will be inferred. - mdio_spec = mdio_segy_spec() - segy_settings = SegySettings(storage_options=storage_options_input) - segy = SegyFile(url=segy_path, spec=mdio_spec, settings=segy_settings) - - text_header = segy.text_header - binary_header = segy.binary_header - num_traces = segy.num_traces - - # Index the dataset using a spec that interprets the user provided index headers. - index_fields = [] - for name, byte, format_ in zip(index_names, index_bytes, index_types, strict=True): - index_fields.append(HeaderField(name=name, byte=byte, format=format_)) - mdio_spec_grid = mdio_spec.customize(trace_header_fields=index_fields) - segy_grid = SegyFile(url=segy_path, spec=mdio_spec_grid, settings=segy_settings) - - dimensions, chunksize, index_headers = get_grid_plan( - segy_file=segy_grid, - return_headers=True, - chunksize=chunksize, - grid_overrides=grid_overrides, - ) - grid = Grid(dims=dimensions) - grid_density_qc(grid, num_traces) - grid.build_map(index_headers) - + grid = Grid(dims=segy_dimensions) + grid_density_qc(grid, segy_file.num_traces) + grid.build_map(segy_headers) # Check grid validity by comparing trace numbers - if np.sum(grid.live_mask) != num_traces: + if np.sum(grid.live_mask) != segy_file.num_traces: for dim_name in grid.dim_names: dim_min, dim_max = grid.get_min(dim_name), grid.get_max(dim_name) logger.warning("%s min: %s max: %s", dim_name, dim_min, dim_max) logger.warning("Ingestion grid shape: %s.", grid.shape) - raise GridTraceCountError(np.sum(grid.live_mask), num_traces) - - if chunksize is None: - dim_count = len(index_names) + 1 - if dim_count == 2: # noqa: PLR2004 - chunksize = (512,) * 2 - - elif dim_count == 3: # noqa: PLR2004 - chunksize = (64,) * 3 - - else: - msg = ( - f"Default chunking for {dim_count}-D seismic data is not implemented yet. " - "Please explicity define chunk sizes." - ) - raise NotImplementedError(msg) - - suffix = [str(x) for x in range(dim_count)] - suffix = "".join(suffix) - else: - suffix = [dim_chunks if dim_chunks > 0 else None for dim_chunks in chunksize] - suffix = [str(idx) for idx, value in enumerate(suffix) if value is not None] - suffix = "".join(suffix) - - compressors = get_compressor(lossless, compression_tolerance) - header_dtype = segy.spec.trace.header.dtype.newbyteorder("=") - var_conf = MDIOVariableConfig( - name=f"chunked_{suffix}", - dtype="float32", - chunks=chunksize, - compressors=compressors, - header_dtype=header_dtype, + raise GridTraceCountError(np.sum(grid.live_mask), segy_file.num_traces) + return grid + + +def _get_coordinates( + segy_dimensions: list[Dimension], + segy_headers: SegyHeaderArray, + mdio_template: AbstractDatasetTemplate, +) -> tuple[list[Dimension], dict[str, SegyHeaderArray]]: + """Get the data dim and non-dim coordinates from the SEG-Y headers and MDIO template. + + Select a subset of the segy_dimensions that corresponds to the MDIO dimensions + The dimensions are ordered as in the MDIO template. + The last dimension is always the vertical domain dimension + + Args: + segy_dimensions: List of of all SEG-Y dimensions. + segy_headers: Headers read in from SEG-Y file. + mdio_template: The MDIO template to use for the conversion. + + Raises: + ValueError: If a dimension or coordinate name from the MDIO template is not found in + the SEG-Y headers. + + Returns: + A tuple containing: + - A list of dimension coordinates (1-D arrays). + - A dict of non-dimension coordinates (str: N-D arrays). + """ + dimensions_coords = [] + dim_names = [dim.name for dim in segy_dimensions] + for dim_name in mdio_template.dimension_names: + try: + dim_index = dim_names.index(dim_name) + except ValueError: + err = f"Dimension '{dim_name}' was not found in SEG-Y dimensions." + raise ValueError(err) from err + dimensions_coords.append(segy_dimensions[dim_index]) + + non_dim_coords: dict[str, SegyHeaderArray] = {} + available_headers = segy_headers.dtype.names + for coord_name in mdio_template.coordinate_names: + if coord_name not in available_headers: + err = f"Coordinate '{coord_name}' not found in SEG-Y dimensions." + raise ValueError(err) + non_dim_coords[coord_name] = segy_headers[coord_name] + + return dimensions_coords, non_dim_coords + + +def populate_dim_coordinates( + dataset: xr_Dataset, grid: Grid, drop_vars_delayed: list[str] +) -> tuple[xr_Dataset, list[str]]: + """Populate the xarray dataset with dimension coordinate variables.""" + for dim in grid.dims: + dataset[dim.name].values[:] = dim.coords + drop_vars_delayed.append(dim.name) + return dataset, drop_vars_delayed + + +def populate_non_dim_coordinates( + dataset: xr_Dataset, + grid: Grid, + coordinates: dict[str, SegyHeaderArray], + drop_vars_delayed: list[str], +) -> tuple[xr_Dataset, list[str]]: + """Populate the xarray dataset with coordinate variables.""" + not_null = grid.map[:] != UINT32_MAX + for c_name, c_values in coordinates.items(): + dataset[c_name].values[not_null] = c_values + drop_vars_delayed.append(c_name) + return dataset, drop_vars_delayed + + +def _get_horizontal_coordinate_unit(segy_headers: list[Dimension]) -> LengthUnitEnum | None: + """Get the coordinate unit from the SEG-Y headers.""" + name = TraceHeaderFieldsRev0.COORDINATE_UNIT.name.upper() + unit_hdr = next((c for c in segy_headers if c.name.upper() == name), None) + if unit_hdr is None or len(unit_hdr.coords) == 0: + # If the coordinate unit header is not found or empty, return None + # This is a common case for SEG-Y files, where the coordinate unit is not specified + return None + + if segy_MeasurementSystem(unit_hdr.coords[0]) == segy_MeasurementSystem.METERS: + # If the coordinate unit is in meters, return "m" + return AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) + if segy_MeasurementSystem(unit_hdr.coords[0]) == segy_MeasurementSystem.FEET: + # If the coordinate unit is in feet, return "ft" + return AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.FOOT)) + err = f"Unsupported coordinate unit value: {unit_hdr.value[0]} in SEG-Y file." + raise ValueError(err) + + +def _populate_coordinates( + dataset: xr_Dataset, + grid: Grid, + coords: dict[str, SegyHeaderArray], +) -> tuple[xr_Dataset, list[str]]: + """Populate dim and non-dim coordinates in the xarray dataset and write to Zarr. + + This will write the xr Dataset with coords and dimensions, but empty traces and headers. + + Args: + dataset: The xarray dataset to populate. + grid: The grid object containing the grid map. + coords: The non-dim coordinates to populate. + + Returns: + Xarray dataset with filled coordinates and updated variables to drop after writing + """ + drop_vars_delayed = [] + # Populate the dimension coordinate variables (1-D arrays) + dataset, vars_to_drop_later = populate_dim_coordinates( + dataset, grid, drop_vars_delayed=drop_vars_delayed ) - config = MDIOCreateConfig(path=mdio_path_or_buffer, grid=grid, variables=[var_conf]) - root_group = create_empty( - config, - overwrite=overwrite, - storage_options=storage_options_output, - consolidate_meta=False, + # Populate the non-dimension coordinate variables (N-dim arrays) + dataset, vars_to_drop_later = populate_non_dim_coordinates( + dataset, grid, coordinates=coords, drop_vars_delayed=drop_vars_delayed ) - data_group = root_group["data"] - meta_group = root_group["metadata"] - data_array = data_group[f"chunked_{suffix}"] - header_array = meta_group[f"chunked_{suffix}_trace_headers"] - - # Write actual live mask and metadata to empty MDIO - meta_group["live_mask"][:] = grid.live_mask[:] - nonzero_count = np.count_nonzero(grid.live_mask) - write_attribute(name="trace_count", zarr_group=root_group, attribute=nonzero_count) - write_attribute(name="text_header", zarr_group=meta_group, attribute=text_header.split("\n")) - write_attribute(name="binary_header", zarr_group=meta_group, attribute=binary_header.to_dict()) - - # Write traces - stats = blocked_io.to_zarr( - segy_file=segy, + + return dataset, drop_vars_delayed + + +def segy_to_mdio( + segy_spec: SegySpec, + mdio_template: AbstractDatasetTemplate, + input_location: StorageLocation, + output_location: StorageLocation, + overwrite: bool = False, +) -> None: + """A function that converts a SEG-Y file to an MDIO v1 file. + + Ingest a SEG-Y file according to the segy_spec. This could be a spec from registry or custom. + + Args: + segy_spec: The SEG-Y specification to use for the conversion. + mdio_template: The MDIO template to use for the conversion. + input_location: The storage location of the input SEG-Y file. + output_location: The storage location for the output MDIO v1 file. + overwrite: Whether to overwrite the output file if it already exists. Defaults to False. + + Raises: + FileExistsError: If the output location already exists and overwrite is False. + """ + if not overwrite and output_location.exists(): + err = f"Output location '{output_location.uri}' exists. Set `overwrite=True` if intended." + raise FileExistsError(err) + + segy_settings = SegySettings(storage_options=input_location.options) + segy_file = SegyFile(url=input_location.uri, spec=segy_spec, settings=segy_settings) + + # Scan the SEG-Y file for headers + segy_dimensions, segy_headers = _scan_for_headers(segy_file, mdio_template) + + grid = _build_and_check_grid(segy_dimensions, segy_file, segy_headers) + + dimensions, non_dim_coords = _get_coordinates(segy_dimensions, segy_headers, mdio_template) + shape = [len(dim.coords) for dim in dimensions] + headers = to_structured_type(segy_headers.dtype) + + horizontal_unit = _get_horizontal_coordinate_unit(segy_dimensions) + mdio_ds: Dataset = mdio_template.build_dataset( + name=mdio_template.name, sizes=shape, horizontal_coord_unit=horizontal_unit, headers=headers + ) + + xr_dataset: xr_Dataset = to_xarray_dataset(mdio_ds=mdio_ds) + + xr_dataset, drop_vars_delayed = _populate_coordinates( + dataset=xr_dataset, grid=grid, - data_array=data_array, - header_array=header_array, + coords=non_dim_coords, ) - # Write actual stats - for key, value in stats.items(): - write_attribute(name=key, zarr_group=root_group, attribute=value) + xr_dataset.trace_mask.data[:] = grid.live_mask + + # TODO(Dmitriy Repin): Write out text and binary headers. + # https://github.com/TGSAI/mdio-python/issues/595 + + # IMPORTANT: Do not drop the "trace_mask" here, as it will be used later in + # blocked_io.to_zarr() -> _workers.trace_worker() + + # Write the xarray dataset to Zarr with as following: + # Populated arrays: + # - 1D dimensional coordinates + # - ND non-dimensional coordinates + # - ND trace_mask + # Empty arrays (will be populated later in chunks): + # - ND+1 traces + # - ND headers (no _FillValue set due to the bug https://github.com/TGSAI/mdio-python/issues/582) + # This will create the Zarr store with the correct structure + # TODO(Dmitriy Repin): do chunked write for non-dimensional coordinates and trace_mask + # https://github.com/TGSAI/mdio-python/issues/587 + xr_dataset.to_zarr( + store=output_location.uri, mode="w", write_empty_chunks=False, zarr_format=2, compute=True + ) - zarr.consolidate_metadata(root_group.store) + # Now we can drop them to simplify chunked write of the data variable + xr_dataset = xr_dataset.drop_vars(drop_vars_delayed) + + # Write the headers and traces in chunks using grid_map to indicate dead traces + data_variable_name = mdio_template.trace_variable_name + # This is an memory-expensive and time-consuming read-write operation + # performed in chunks to save the memory + blocked_io.to_zarr( + segy_file=segy_file, + output_location=output_location, + grid_map=grid.map, + dataset=xr_dataset, + data_variable_name=data_variable_name, + ) diff --git a/src/mdio/converters/type_converter.py b/src/mdio/converters/type_converter.py new file mode 100644 index 00000000..20ce63d2 --- /dev/null +++ b/src/mdio/converters/type_converter.py @@ -0,0 +1,85 @@ +"""A module for converting numpy dtypes to MDIO scalar and structured types.""" + +from numpy import dtype as np_dtype + +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredField +from mdio.schemas.dtype import StructuredType + + +def to_scalar_type(data_type: np_dtype) -> ScalarType: + """Convert numpy dtype to MDIO ScalarType. + + Out of the 24 built-in numpy scalar type objects + (see https://numpy.org/doc/stable/reference/arrays.dtypes.html) + this function supports only a limited subset: + ScalarType.INT8 <-> int8 + ScalarType.INT16 <-> int16 + ScalarType.INT32 <-> int32 + ScalarType.INT64 <-> int64 + ScalarType.UINT8 <-> uint8 + ScalarType.UINT16 <-> uint16 + ScalarType.UINT32 <-> uint32 + ScalarType.UINT64 <-> uint64 + ScalarType.FLOAT32 <-> float32 + ScalarType.FLOAT64 <-> float64 + ScalarType.COMPLEX64 <-> complex64 + ScalarType.COMPLEX128 <-> complex128 + ScalarType.BOOL <-> bool + + Args: + data_type: numpy dtype to convert + + Returns: + ScalarType: corresponding MDIO scalar type + + Raises: + ValueError: if dtype is not supported + """ + try: + return ScalarType(data_type.name) + except ValueError as exc: + err = f"Unsupported numpy dtype '{data_type.name}' for conversion to ScalarType." + raise ValueError(err) from exc + + +def to_structured_type(data_type: np_dtype) -> StructuredType: + """Convert numpy dtype to MDIO StructuredType. + + This function supports only a limited subset of structured types. + In particular: + It does not support nested structured types. + It supports fields of only 13 out of 24 built-in numpy scalar types. + (see `to_scalar_type` for details). + + Args: + data_type: numpy dtype to convert + + Returns: + StructuredType: corresponding MDIO structured type + + Raises: + ValueError: if dtype is not structured or has no fields + + """ + if data_type is None or len(data_type.names or []) == 0: + err = "None or empty dtype provided, cannot convert to StructuredType." + raise ValueError(err) + + fields = [] + for field_name in data_type.names: + field_dtype = data_type.fields[field_name][0] + scalar_type = to_scalar_type(field_dtype) + structured_field = StructuredField(name=field_name, format=scalar_type) + fields.append(structured_field) + return StructuredType(fields=fields) + + +def to_numpy_dtype(data_type: ScalarType | StructuredType) -> np_dtype: + """Get the numpy dtype for a variable.""" + if isinstance(data_type, ScalarType): + return np_dtype(data_type.value) + if isinstance(data_type, StructuredType): + return np_dtype([(f.name, f.format.value) for f in data_type.fields]) + msg = f"Expected ScalarType or StructuredType, got '{type(data_type).__name__}'" + raise ValueError(msg) diff --git a/src/mdio/core/indexing.py b/src/mdio/core/indexing.py index 936178f8..1e10cadf 100644 --- a/src/mdio/core/indexing.py +++ b/src/mdio/core/indexing.py @@ -4,78 +4,83 @@ from math import ceil import numpy as np -from zarr import Array class ChunkIterator: - """Iterator for traversing a Zarr array in chunks. + """Chunk iterator for multi-dimensional arrays. - This iterator yields tuples of slices corresponding to the chunk boundaries of a Zarr array. - It supports chunking all dimensions or taking the full extent of the last dimension. + This iterator takes an array shape and chunks and every time it is iterated, it returns + a dictionary (if dimensions are provided) or a tuple of slices that align with + chunk boundaries. When dimensions are provided, they are used as the dictionary keys. Args: - array: The Zarr array to iterate, providing shape and chunk sizes. - chunk_samples: If True, chunks all dimensions. If False, takes the full extent of the - last dimension. Defaults to True. - - - Example: - >>> import zarr - >>> arr = zarr.array(np.zeros((10, 20)), chunks=(3, 4)) - >>> it = ChunkIterator(arr) - >>> for slices in it: - ... print(slices) - (slice(0, 3, None), slice(0, 4, None)) - (slice(0, 3, None), slice(4, 8, None)) - ... - >>> it = ChunkIterator(arr, chunk_samples=False) - >>> for slices in it: - ... print(slices) - (slice(0, 3, None), slice(0, 20, None)) - (slice(3, 6, None), slice(0, 20, None)) - ... + shape: The shape of the array. + chunks: The chunk sizes for each dimension. + dim_names: The names of the array dimensions, to be used with DataArray.isel(). + If the dim_names are not provided, a tuple of the slices will be returned. + + Attributes: # noqa: DOC602 + arr_shape: Shape of the array. + len_chunks: Length of chunks in each dimension. + dim_chunks: Number of chunks in each dimension. + num_chunks: Total number of chunks. + + Examples: + >> chunks = (3, 4, 5) + >> shape = (5, 11, 19) + >> dims = ["inline", "crossline", "depth"] + >> + >> iter = ChunkIterator(shape=shape, chunks=chunks, dim_names=dims) + >> for i in range(13): + >> region = iter.__next__() + >> print(region) + { "inline": slice(3,6, None), "crossline": slice(0,4, None), "depth": slice(0,5, None) } + + >> iter = ChunkIterator(shape=shape, chunks=chunks, dim_names=None) + >> for i in range(13): + >> region = iter.__next__() + >> print(region) + (slice(3,6,None), slice(0,4,None), slice(0,5,None)) """ - def __init__(self, array: Array, chunk_samples: bool = True): - self.arr_shape = array.shape - self.len_chunks = array.chunks - - # If chunk_samples is False, set the last dimension's chunk size to its full extent - if not chunk_samples: - self.len_chunks = self.len_chunks[:-1] + (self.arr_shape[-1],) - - # Calculate the number of chunks per dimension - self.dim_chunks = [ - ceil(len_dim / chunk) - for len_dim, chunk in zip(self.arr_shape, self.len_chunks, strict=True) - ] + def __init__( + self, shape: tuple[int, ...], chunks: tuple[int, ...], dim_names: tuple[str, ...] = None + ): + self.arr_shape = tuple(shape) # Deep copy to ensure immutability + self.len_chunks = tuple(chunks) # Deep copy to ensure immutability + self.dims = dim_names + + # Compute number of chunks per dimension, and total number of chunks + self.dim_chunks = tuple( + [ + ceil(len_dim / chunk) + for len_dim, chunk in zip(self.arr_shape, self.len_chunks, strict=True) + ] + ) self.num_chunks = np.prod(self.dim_chunks) - # Set up chunk index combinations using ranges for each dimension + # Under the hood stuff for the iterator. This generates C-ordered + # permutation of chunk indices. dim_ranges = [range(dim_len) for dim_len in self.dim_chunks] self._ranges = itertools.product(*dim_ranges) self._idx = 0 def __iter__(self) -> "ChunkIterator": - """Return the iterator object itself.""" + """Iteration context.""" return self def __len__(self) -> int: - """Return the total number of chunks.""" + """Get total number of chunks.""" return self.num_chunks - def __next__(self) -> tuple[slice, ...]: - """Yield the next set of chunk slices. - - Returns: - A tuple of slice objects for each dimension. - - Raises: - StopIteration: When all chunks have been iterated over. - """ - if self._idx < self.num_chunks: + def __next__(self) -> dict[str, slice]: + """Iteration logic.""" + if self._idx <= self.num_chunks: + # We build slices here. It is dimension agnostic current_start = next(self._ranges) + # TODO (Dmitriy Repin): Enhance ChunkIterator to make the last slice, if needed, smaller + # https://github.com/TGSAI/mdio-python/issues/586 start_indices = tuple( dim * chunk for dim, chunk in zip(current_start, self.len_chunks, strict=True) ) @@ -88,7 +93,17 @@ def __next__(self) -> tuple[slice, ...]: slice(start, stop) for start, stop in zip(start_indices, stop_indices, strict=True) ) + if self.dims: # noqa SIM108 + # Example + # {"inline":slice(3,6,None), "crossline":slice(0,4,None), "depth":slice(0,5,None)} + result = dict(zip(self.dims, slices, strict=False)) + else: + # Example + # (slice(3,6,None), slice(0,4,None), slice(0,5,None)) + result = slices + self._idx += 1 - return slices + + return result raise StopIteration diff --git a/src/mdio/core/storage_location.py b/src/mdio/core/storage_location.py new file mode 100644 index 00000000..3a5adb2a --- /dev/null +++ b/src/mdio/core/storage_location.py @@ -0,0 +1,87 @@ +"""StorageLocation class for managing local and cloud storage locations.""" + +from pathlib import Path +from typing import Any + +import fsspec + + +# TODO(Dmitriy Repin): Reuse fsspec functions for some methods we implemented here +# https://github.com/TGSAI/mdio-python/issues/597 +class StorageLocation: + """A class to represent a local or cloud storage location for SEG-Y or MDIO files. + + This class abstracts the storage location, allowing for both local file paths and + cloud storage URIs (e.g., S3, GCS). It uses fsspec to check existence and manage options. + Note, we do not want to make it a dataclass because we want the uri and the options to + be read-only immutable properties. + + uri: The URI of the storage location (e.g., '/path/to/file', 'file:///path/to/file', + 's3://bucket/path', 'gs://bucket/path'). + options: Optional dictionary of options for the cloud, such as credentials. + + """ + + def __init__(self, uri: str = "", options: dict[str, Any] = None): + self._uri = uri + self._options = options or {} + self._fs = None + + if uri.startswith(("s3://", "gs://")): + return + + if uri.startswith("file://"): + self._uri = self._uri.removeprefix("file://") + # For local paths, ensure they are absolute and resolved + self._uri = str(Path(self._uri).resolve()) + return + + @property + def uri(self) -> str: + """Get the URI (read-only).""" + return self._uri + + @property + def options(self) -> dict[str, Any]: + """Get the options (read-only).""" + # Return a copy to prevent external modification + return self._options.copy() + + @property + def _filesystem(self) -> fsspec.AbstractFileSystem: + """Get the fsspec filesystem instance for this storage location.""" + if self._fs is None: + self._fs = fsspec.filesystem(self._protocol, **self._options) + return self._fs + + @property + def _path(self) -> str: + """Extract the path portion from the URI.""" + if "://" in self._uri: + return self._uri.split("://", 1)[1] + return self._uri # For local paths without file:// prefix + + @property + def _protocol(self) -> str: + """Extract the protocol/scheme from the URI.""" + if "://" in self._uri: + return self._uri.split("://", 1)[0] + return "file" # Default to file protocol + + def exists(self) -> bool: + """Check if the storage location exists using fsspec.""" + try: + return self._filesystem.exists(self._path) + except Exception as e: + # Log the error and return False for safety + # In a production environment, you might want to use proper logging + print(f"Error checking existence of {self._uri}: {e}") + return False + + def __str__(self) -> str: + """String representation of the storage location.""" + return self._uri + + def __repr__(self) -> str: + """Developer representation of the storage location.""" + return f"StorageLocation(uri='{self._uri}', options={self._options})" diff --git a/src/mdio/schemas/__init__.py b/src/mdio/schemas/__init__.py new file mode 100644 index 00000000..8d9d8b86 --- /dev/null +++ b/src/mdio/schemas/__init__.py @@ -0,0 +1,17 @@ +"""MDIO schemas for different data types.""" + +from mdio.schemas.compressors import ZFP +from mdio.schemas.compressors import Blosc +from mdio.schemas.dimension import NamedDimension +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredField +from mdio.schemas.dtype import StructuredType + +__all__ = [ + "Blosc", + "ZFP", + "NamedDimension", + "ScalarType", + "StructuredField", + "StructuredType", +] diff --git a/src/mdio/schemas/base.py b/src/mdio/schemas/base.py new file mode 100644 index 00000000..5684fa10 --- /dev/null +++ b/src/mdio/schemas/base.py @@ -0,0 +1,40 @@ +"""Base models to subclass from.""" + +from pydantic import ConfigDict +from pydantic import Field +from pydantic.json_schema import GenerateJsonSchema + +from mdio.schemas.compressors import ZFP +from mdio.schemas.compressors import Blosc +from mdio.schemas.core import CamelCaseStrictModel +from mdio.schemas.dimension import NamedDimension +from mdio.schemas.dtype import DataTypeModel + +JSON_SCHEMA_DIALECT = GenerateJsonSchema.schema_dialect + + +class BaseDataset(CamelCaseStrictModel): + """A base class for MDIO datasets. + + We add schema dialect to extend the config of `StrictCamelBaseModel`. + We use the default Pydantic schema generator `GenerateJsonSchema` to + define the JSON schema dialect accurately. + """ + + model_config = ConfigDict(json_schema_extra={"$schema": JSON_SCHEMA_DIALECT}) + + +class BaseArray(DataTypeModel, CamelCaseStrictModel): + """A base array schema.""" + + dimensions: list[NamedDimension] | list[str] = Field( + ..., description="List of Dimension collection or reference to dimension names." + ) + compressor: Blosc | ZFP | None = Field(default=None, description="Compression settings.") + + +class NamedArray(BaseArray): + """An array with a name.""" + + name: str = Field(..., description="Name of the array.") + long_name: str | None = Field(default=None, description="Fully descriptive name.") diff --git a/src/mdio/schemas/builder.py b/src/mdio/schemas/builder.py new file mode 100644 index 00000000..40908ff0 --- /dev/null +++ b/src/mdio/schemas/builder.py @@ -0,0 +1,154 @@ +"""Schema builders.""" + +from __future__ import annotations + +from typing import Any + +from mdio.schemas import NamedDimension +from mdio.schemas.v1.dataset import Dataset +from mdio.schemas.v1.dataset import DatasetMetadata +from mdio.schemas.v1.variable import Variable +from mdio.schemas.v1.variable import VariableMetadata + + +class VariableBuilder: + """Dataset builder.""" + + def __init__(self) -> None: + self.name = None + self.long_name = None + self.dtype = None + self.chunks = None + self.dims = None + self.coords = None + self.compressor = None + self.meta_dict = None + + def set_name(self, name: str) -> VariableBuilder: + """Set variable name.""" + self.name = name + return self + + def set_long_name(self, long_name: str) -> VariableBuilder: + """Add long, descriptive name to the variable.""" + self.long_name = long_name + return self + + def set_compressor(self, compressor: dict[str, Any]) -> VariableBuilder: + """Add long, descriptive name to the variable.""" + self.compressor = compressor + return self + + def add_dimension(self, *dimensions: str | dict[str, int]) -> VariableBuilder: + """Add a dimension to the dataset.""" + if self.dims is None: + self.dims = [] + + if isinstance(dimensions[0], str): + dims = list(dimensions) + elif isinstance(dimensions[0], dict): + dims = [ + NamedDimension(name=name, size=size) + for dim in dimensions + for name, size in dim.items() + ] + else: + raise NotImplementedError + + self.dims.extend(dims) + return self + + def add_coordinate(self, *names: str) -> VariableBuilder: + """Add a coordinate to the variable.""" + if self.coords is None: + self.coords = [] + + self.coords.extend(names) + return self + + def set_format(self, format_: str | dict[str, str]) -> VariableBuilder: + """Set variable format.""" + if isinstance(format_, dict): + fields = [{"name": n, "format": f} for n, f in format_.items()] + format_ = {"fields": fields} + + self.dtype = format_ + return self + + def set_chunks(self, chunks: list[int]) -> VariableBuilder: + """Set variable chunks.""" + if self.meta_dict is None: + self.meta_dict = {} + + self.meta_dict["chunkGrid"] = {"configuration": {"chunkShape": chunks}} + return self + + def set_units(self, units: dict[str, str]) -> VariableBuilder: + """Set variable units.""" + if self.meta_dict is None: + self.meta_dict = {} + + self.meta_dict["unitsV1"] = units + return self + + def add_attribute(self, key: str, value: Any) -> VariableBuilder: # noqa: ANN401 + """Add a user attribute to the variable metadata.""" + if self.meta_dict is None: + self.meta_dict = {} + + self.meta_dict["attributes"] = {key: value} + return self + + def build(self) -> Variable: + """Build the dataset model.""" + if self.chunks is not None and len(self.chunks) != len(self.dims): + msg = "Variable chunks must have same number of dimensions." + raise ValueError(msg) + + var_kwargs = {} + + if self.meta_dict is not None: + var_kwargs["metadata"] = VariableMetadata.model_validate(self.meta_dict) + + return Variable( + name=self.name, + long_name=self.long_name, + data_type=self.dtype, + dimensions=self.dims, + coordinates=self.coords, + compressor=self.compressor, + **var_kwargs, + ) + + +class DatasetBuilder: + """Dataset builder.""" + + def __init__(self) -> None: + self.variables = [] + self.name = None + self.metadata = None + + def set_name(self, name: str) -> DatasetBuilder: + """Set dataset name.""" + self.name = name + return self + + def add_variable(self, variable: Variable) -> DatasetBuilder: + """Add a variable to the dataset.""" + self.variables.append(variable) + return self + + def add_variables(self, variables: list[Variable]) -> DatasetBuilder: + """Add multiple variables to the dataset.""" + [self.add_variable(variable) for variable in variables] + return self + + def set_metadata(self, metadata: DatasetMetadata) -> DatasetBuilder: + """Add a metadata to the dataset.""" + self.metadata = metadata + return self + + def build(self) -> Dataset: + """Build the dataset model.""" + return Dataset(variables=self.variables, metadata=self.metadata) diff --git a/src/mdio/schemas/chunk_grid.py b/src/mdio/schemas/chunk_grid.py new file mode 100644 index 00000000..478f11cb --- /dev/null +++ b/src/mdio/schemas/chunk_grid.py @@ -0,0 +1,44 @@ +"""This module contains data models for Zarr's chunk grid.""" + +from __future__ import annotations + +from pydantic import Field + +from mdio.schemas.core import CamelCaseStrictModel + + +class RegularChunkShape(CamelCaseStrictModel): + """Represents regular chunk sizes along each dimension.""" + + chunk_shape: list[int] = Field( + ..., description="Lengths of the chunk along each dimension of the array." + ) + + +class RectilinearChunkShape(CamelCaseStrictModel): + """Represents irregular chunk sizes along each dimension.""" + + chunk_shape: list[list[int]] = Field( + ..., + description="Lengths of the chunk along each dimension of the array.", + ) + + +class RegularChunkGrid(CamelCaseStrictModel): + """Represents a rectangular and regularly spaced chunk grid.""" + + name: str = Field(default="regular", description="The name of the chunk grid.") + + configuration: RegularChunkShape = Field( + ..., description="Configuration of the regular chunk grid." + ) + + +class RectilinearChunkGrid(CamelCaseStrictModel): + """Represents a rectangular and irregularly spaced chunk grid.""" + + name: str = Field(default="rectilinear", description="The name of the chunk grid.") + + configuration: RectilinearChunkShape = Field( + ..., description="Configuration of the irregular chunk grid." + ) diff --git a/src/mdio/schemas/compressors.py b/src/mdio/schemas/compressors.py new file mode 100644 index 00000000..2eb87f7a --- /dev/null +++ b/src/mdio/schemas/compressors.py @@ -0,0 +1,135 @@ +"""This module contains a Pydantic model to parameterize compressors. + +Important Objects: + - Blosc: A Pydantic model that represents a Blosc compression setup. + - ZFP: Class that represents the ZFP compression model. +""" + +from __future__ import annotations + +from enum import IntEnum +from enum import StrEnum + +from pydantic import Field +from pydantic import model_validator + +from mdio.schemas.core import CamelCaseStrictModel + + +class BloscAlgorithm(StrEnum): + """Enum for Blosc algorithm options.""" + + BLOSCLZ = "blosclz" + LZ4 = "lz4" + LZ4HC = "lz4hc" + ZLIB = "zlib" + ZSTD = "zstd" + + +class BloscShuffle(IntEnum): + """Enum for Blosc shuffle options.""" + + NOSHUFFLE = 0 + SHUFFLE = 1 + BITSHUFFLE = 2 + AUTOSHUFFLE = -1 + + +class Blosc(CamelCaseStrictModel): + """Data Model for Blosc options.""" + + name: str = Field(default="blosc", description="Name of the compressor.") + algorithm: BloscAlgorithm = Field( + default=BloscAlgorithm.LZ4, + description="The Blosc compression algorithm to be used.", + ) + level: int = Field(default=5, ge=0, le=9, description="The compression level.") + shuffle: BloscShuffle = Field( + default=BloscShuffle.SHUFFLE, + description="The shuffle strategy to be applied before compression.", + ) + blocksize: int = Field( + default=0, + description="The size of the block to be used for compression.", + ) + + +zfp_mode_map = { + "fixed_rate": 2, + "fixed_precision": 3, + "fixed_accuracy": 4, + "reversible": 5, +} + + +class ZFPMode(StrEnum): + """Enum for ZFP algorithm modes.""" + + FIXED_RATE = "fixed_rate" + FIXED_PRECISION = "fixed_precision" + FIXED_ACCURACY = "fixed_accuracy" + REVERSIBLE = "reversible" + + @property + def int_code(self) -> int: + """Return the integer code of ZFP mode.""" + return zfp_mode_map[self.value] + + +class ZFP(CamelCaseStrictModel): + """Data Model for ZFP options.""" + + name: str = Field(default="zfp", description="Name of the compressor.") + mode: ZFPMode = Field() + + tolerance: float | None = Field( + default=None, + description="Fixed accuracy in terms of absolute error tolerance.", + ) + + rate: float | None = Field( + default=None, + description="Fixed rate in terms of number of compressed bits per value.", + ) + + precision: int | None = Field( + default=None, + description="Fixed precision in terms of number of uncompressed bits per value.", + ) + + write_header: bool = Field( + default=True, + description="Encode array shape, scalar type, and compression parameters.", + ) + + @model_validator(mode="after") + def check_requirements(self) -> ZFP: + """Check if ZFP parameters make sense.""" + mode = self.mode + + # Check if reversible mode is provided without other parameters. + if mode == ZFPMode.REVERSIBLE and any( + getattr(self, key) is not None for key in ["tolerance", "rate", "precision"] + ): + msg = "Other fields must be None in REVERSIBLE mode" + raise ValueError(msg) + + if mode == ZFPMode.FIXED_ACCURACY and self.tolerance is None: + msg = "Tolerance required for FIXED_ACCURACY mode" + raise ValueError(msg) + + if mode == ZFPMode.FIXED_RATE and self.rate is None: + msg = "Rate required for FIXED_RATE mode" + raise ValueError(msg) + + if mode == ZFPMode.FIXED_PRECISION and self.precision is None: + msg = "Precision required for FIXED_PRECISION mode" + raise ValueError(msg) + + return self + + +class CompressorModel(CamelCaseStrictModel): + """Model representing compressor configuration.""" + + compressor: Blosc | ZFP | None = Field(default=None, description="Compression settings.") diff --git a/src/mdio/schemas/core.py b/src/mdio/schemas/core.py new file mode 100644 index 00000000..34a09066 --- /dev/null +++ b/src/mdio/schemas/core.py @@ -0,0 +1,49 @@ +"""This module implements the core components of the MDIO schemas.""" + +from __future__ import annotations + +from typing import Any +from typing import get_type_hints + +from pydantic import BaseModel +from pydantic import ConfigDict +from pydantic.alias_generators import to_camel + + +def model_fields(model: type[BaseModel]) -> dict[str, tuple[Any, Any]]: + """Extract Pydantic BaseModel fields. + + Args: + model: (Type) The model object for which the fields will be extracted. + + Returns: + A dictionary containing the fields of the model along with + their corresponding types and default values. + + Example: + >>> class MyModel(BaseModel): + ... name: str + ... age: int = 0 + ... + >>> model_fields(MyModel) + {'name': (str, ), 'age': (int, 0)} + """ + annotations = get_type_hints(model) + + fields = {} + for field_name, field in model.model_fields.items(): + fields[field_name] = (annotations[field_name], field) + + return fields + + +class StrictModel(BaseModel): + """A model with forbidden extras.""" + + model_config = ConfigDict(extra="forbid", populate_by_name=True) + + +class CamelCaseStrictModel(StrictModel): + """A model with forbidden extras and camel case aliases.""" + + model_config = ConfigDict(alias_generator=to_camel) diff --git a/src/mdio/schemas/dimension.py b/src/mdio/schemas/dimension.py new file mode 100644 index 00000000..62185a39 --- /dev/null +++ b/src/mdio/schemas/dimension.py @@ -0,0 +1,12 @@ +"""Dimension schema.""" + +from pydantic import Field + +from mdio.schemas.core import CamelCaseStrictModel + + +class NamedDimension(CamelCaseStrictModel): + """Represents a single dimension with a name and size.""" + + name: str = Field(..., description="Unique identifier for the dimension.") + size: int = Field(..., gt=0, description="Total size of the dimension.") diff --git a/src/mdio/schemas/dtype.py b/src/mdio/schemas/dtype.py new file mode 100644 index 00000000..af7f6582 --- /dev/null +++ b/src/mdio/schemas/dtype.py @@ -0,0 +1,53 @@ +"""Schemas for scalar types. + +We take booleans, unsigned and signed integers, floats, and +complex numbers from numpy data types and allow those. +""" + +from __future__ import annotations + +from enum import StrEnum + +from pydantic import Field + +from mdio.schemas.core import CamelCaseStrictModel + + +class ScalarType(StrEnum): + """Scalar array data type.""" + + BOOL = "bool" + INT8 = "int8" + INT16 = "int16" + INT32 = "int32" + INT64 = "int64" + UINT8 = "uint8" + UINT16 = "uint16" + UINT32 = "uint32" + UINT64 = "uint64" + FLOAT16 = "float16" + FLOAT32 = "float32" + FLOAT64 = "float64" + FLOAT128 = "float128" + COMPLEX64 = "complex64" + COMPLEX128 = "complex128" + COMPLEX256 = "complex256" + + +class StructuredField(CamelCaseStrictModel): + """Structured array field with name, format.""" + + format: ScalarType = Field(...) + name: str = Field(...) + + +class StructuredType(CamelCaseStrictModel): + """Structured array type with packed fields.""" + + fields: list[StructuredField] = Field() + + +class DataTypeModel(CamelCaseStrictModel): + """Structured array type with fields and total item size.""" + + data_type: ScalarType | StructuredType = Field(..., description="Type of the array.") diff --git a/src/mdio/schemas/metadata.py b/src/mdio/schemas/metadata.py new file mode 100644 index 00000000..dc2d6676 --- /dev/null +++ b/src/mdio/schemas/metadata.py @@ -0,0 +1,31 @@ +"""Metadata schemas and conventions.""" + +from typing import Any + +from pydantic import Field + +from mdio.schemas.chunk_grid import RectilinearChunkGrid +from mdio.schemas.chunk_grid import RegularChunkGrid +from mdio.schemas.core import CamelCaseStrictModel + + +class ChunkGridMetadata(CamelCaseStrictModel): + """Definition of chunk grid.""" + + chunk_grid: RegularChunkGrid | RectilinearChunkGrid | None = Field( + default=None, + description="Chunk grid specification for the array.", + ) + + +class VersionedMetadataConvention(CamelCaseStrictModel): + """Data model for versioned metadata convention.""" + + +class UserAttributes(CamelCaseStrictModel): + """User defined attributes as key/value pairs.""" + + attributes: dict[str, Any] | None = Field( + default=None, + description="User defined attributes as key/value pairs.", + ) diff --git a/src/mdio/schemas/units.py b/src/mdio/schemas/units.py new file mode 100644 index 00000000..6844b433 --- /dev/null +++ b/src/mdio/schemas/units.py @@ -0,0 +1,51 @@ +"""Common units for resource assessment data.""" + +from __future__ import annotations + +from enum import Enum +from enum import unique + +from pydantic import Field +from pydantic import create_model + +from mdio.schemas.core import CamelCaseStrictModel + + +@unique +class UnitEnum(str, Enum): + """An Enum representing units as strings, from pint.""" + + +def create_unit_model( + unit_enum: type[UnitEnum], + model_name: str, + quantity: str, + module: str, +) -> type[CamelCaseStrictModel]: + """Dynamically creates a pydantic model from a unit Enum. + + Args: + unit_enum: UnitEnum representing the units for a specific quantity. + model_name: The name of the model to be created. + quantity: String representing the quantity for which the unit model is created. + module: Name of the module in which the model is to be created. + This should be the `__name__` attribute of the module. + + Returns: + A Pydantic Model representing the unit model derived from the BaseModel. + + Example: + unit_enum = UnitEnum + model_name = "LengthUnitModel" + quantity = "length" + create_unit_model(unit_enum, model_name, quantity) + """ + fields = {quantity: (unit_enum, Field(..., description=f"Unit of {quantity}."))} + + return create_model( + model_name, + **fields, + __base__=CamelCaseStrictModel, + __doc__=f"Model representing units of {quantity}.", + __module__=module, + ) diff --git a/src/mdio/schemas/v0/__init__.py b/src/mdio/schemas/v0/__init__.py new file mode 100644 index 00000000..9b953304 --- /dev/null +++ b/src/mdio/schemas/v0/__init__.py @@ -0,0 +1,5 @@ +"""Schema specific to MDIO v0.""" + +from mdio.schemas.v0.dataset import DatasetModelV0 + +__all__ = ["DatasetModelV0"] diff --git a/src/mdio/schemas/v0/dataset.py b/src/mdio/schemas/v0/dataset.py new file mode 100644 index 00000000..50103736 --- /dev/null +++ b/src/mdio/schemas/v0/dataset.py @@ -0,0 +1,82 @@ +"""Dataset model for MDIO V0.""" + +from __future__ import annotations + +from pydantic import AwareDatetime +from pydantic import Field + +from mdio.schemas.base import BaseArray +from mdio.schemas.base import BaseDataset +from mdio.schemas.core import CamelCaseStrictModel +from mdio.schemas.core import StrictModel + + +class DimensionModelV0(CamelCaseStrictModel): + """Represents dimension schema for MDIO v0.""" + + name: str = Field(..., description="Name of the dimension.") + coords: list[int] = Field(..., description="Coordinate labels (ticks).") + + +class DatasetMetadataModelV0(StrictModel): + """Represents dataset attributes schema for MDIO v0.""" + + api_version: str = Field( + ..., + description="MDIO version.", + ) + + created: AwareDatetime = Field( + ..., + description="Creation time with TZ info.", + ) + + dimension: list[DimensionModelV0] = Field( + ..., + description="Dimensions.", + ) + + mean: float | None = Field( + default=None, + description="Mean value of the samples.", + ) + + # Statistical information + std: float | None = Field(default=None, description="Standard deviation of the samples.") + + rms: float | None = Field(default=None, description="Root mean squared value of the samples.") + + min: float | None = Field( + default=None, + description="Minimum value of the samples.", + ) + + max: float | None = Field( + default=None, + description="Maximum value of the samples.", + ) + + trace_count: int | None = Field(default=None, description="Number of traces in the SEG-Y file.") + + +class VariableModelV0(BaseArray): + """Represents an MDIO v0 variable schema.""" + + +class DatasetModelV0(BaseDataset): + """Represents an MDIO v0 dataset schema.""" + + seismic: list[VariableModelV0] = Field( + ..., + description="Variable containing seismic.", + ) + + headers: list[VariableModelV0] = Field( + ..., + description="Variable containing headers.", + ) + + metadata: DatasetMetadataModelV0 = Field( + ..., + description="Dataset metadata.", + ) diff --git a/src/mdio/schemas/v1/__init__.py b/src/mdio/schemas/v1/__init__.py new file mode 100644 index 00000000..fb98d55e --- /dev/null +++ b/src/mdio/schemas/v1/__init__.py @@ -0,0 +1,5 @@ +"""Schema specific to MDIO v1.""" + +from mdio.schemas.v1.dataset import Dataset + +__all__ = ["Dataset"] diff --git a/src/mdio/schemas/v1/dataset.py b/src/mdio/schemas/v1/dataset.py new file mode 100644 index 00000000..f101093b --- /dev/null +++ b/src/mdio/schemas/v1/dataset.py @@ -0,0 +1,49 @@ +"""Dataset model for MDIO V1.""" + +from pydantic import AwareDatetime +from pydantic import Field +from pydantic import create_model + +from mdio.schemas.base import BaseDataset +from mdio.schemas.core import CamelCaseStrictModel +from mdio.schemas.core import model_fields +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.variable import Variable + + +class DatasetInfo(CamelCaseStrictModel): + """Contains information about a dataset.""" + + name: str = Field(..., description="Name or identifier for the dataset.") + + api_version: str = Field( + ..., + description="The version of the MDIO API that the dataset complies with.", + ) + + created_on: AwareDatetime = Field( + ..., + description=( + "The timestamp indicating when the dataset was first created, " + "including timezone information. Expressed in ISO 8601 format." + ), + ) + + +DatasetMetadata = create_model( + "DatasetMetadata", + **model_fields(DatasetInfo), + **model_fields(UserAttributes), + __base__=CamelCaseStrictModel, +) +DatasetMetadata.__doc__ = "The metadata about the dataset." + + +class Dataset(BaseDataset): + """Represents an MDIO v1 dataset. + + A dataset consists of variables and metadata. + """ + + variables: list[Variable] = Field(..., description="Variables in MDIO dataset") + metadata: DatasetMetadata = Field(..., description="Dataset metadata.") diff --git a/src/mdio/schemas/v1/dataset_builder.py b/src/mdio/schemas/v1/dataset_builder.py new file mode 100644 index 00000000..698b1874 --- /dev/null +++ b/src/mdio/schemas/v1/dataset_builder.py @@ -0,0 +1,335 @@ +"""Builder pattern implementation for MDIO v1 schema models.""" + +from datetime import UTC +from datetime import datetime +from enum import Enum +from enum import auto +from importlib import metadata +from typing import Any +from typing import TypeAlias + +from pydantic import BaseModel + +from mdio.schemas.compressors import ZFP +from mdio.schemas.compressors import Blosc +from mdio.schemas.dimension import NamedDimension +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType +from mdio.schemas.metadata import ChunkGridMetadata +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.dataset import Dataset +from mdio.schemas.v1.dataset import DatasetInfo +from mdio.schemas.v1.stats import StatisticsMetadata +from mdio.schemas.v1.units import AllUnits +from mdio.schemas.v1.variable import Coordinate +from mdio.schemas.v1.variable import Variable + +AnyMetadataList: TypeAlias = list[ + AllUnits | UserAttributes | ChunkGridMetadata | StatisticsMetadata | DatasetInfo +] +CoordinateMetadataList: TypeAlias = list[AllUnits | UserAttributes] +VariableMetadataList: TypeAlias = list[ + AllUnits | UserAttributes | ChunkGridMetadata | StatisticsMetadata +] +DatasetMetadataList: TypeAlias = list[DatasetInfo | UserAttributes] + + +class _BuilderState(Enum): + """States for the template builder.""" + + INITIAL = auto() + HAS_DIMENSIONS = auto() + HAS_COORDINATES = auto() + HAS_VARIABLES = auto() + + +def _get_named_dimension( + dimensions: list[NamedDimension], name: str, size: int | None = None +) -> NamedDimension | None: + """Get a dimension by name and optional size from the list[NamedDimension].""" + if dimensions is None: + return False + if not isinstance(name, str): + msg = f"Expected str, got {type(name).__name__}" + raise TypeError(msg) + + nd = next((dim for dim in dimensions if dim.name == name), None) + if nd is None: + return None + if size is not None and nd.size != size: + msg = f"Dimension {name!r} found but size {nd.size} does not match expected size {size}" + raise ValueError(msg) + return nd + + +def _to_dictionary(val: BaseModel | dict[str, Any] | AnyMetadataList) -> dict[str, Any]: + """Convert a dictionary, list or pydantic BaseModel to a dictionary.""" + if val is None: + return None + if isinstance(val, BaseModel): + return val.model_dump(mode="json", by_alias=True) + if isinstance(val, dict): + return val + if isinstance(val, list): + metadata_dict = {} + for md in val: + if md is None: + continue + metadata_dict.update(_to_dictionary(md)) + return metadata_dict + msg = f"Expected BaseModel, dict or list, got {type(val).__name__}" + raise TypeError(msg) + + +class MDIODatasetBuilder: + """Builder for creating MDIO datasets with enforced build order. + + This builder implements the builder pattern to create MDIO datasets with a v1 schema. + It enforces a specific build order to ensure valid dataset construction: + 1. Must add dimensions first via add_dimension() + 2. Can optionally add coordinates via add_coordinate() + 3. Must add variables via add_variable() + 4. Must call build() to create the dataset. + """ + + def __init__(self, name: str, attributes: UserAttributes | None = None): + try: + api_version = metadata.version("multidimio") + except metadata.PackageNotFoundError: + api_version = "unknown" + + self._info = DatasetInfo(name=name, api_version=api_version, created_on=datetime.now(UTC)) + self._attributes = attributes + self._dimensions: list[NamedDimension] = [] + self._coordinates: list[Coordinate] = [] + self._variables: list[Variable] = [] + self._state = _BuilderState.INITIAL + self._unnamed_variable_counter = 0 + + def add_dimension(self, name: str, size: int) -> "MDIODatasetBuilder": + """Add a dimension. + + This function be called at least once before adding coordinates or variables. + + Args: + name: Name of the dimension + size: Size of the dimension + + Raises: + ValueError: If 'name' is not a non-empty string. + if the dimension is already defined. + + Returns: + self: Returns self for method chaining + """ + if not name: + msg = "'name' must be a non-empty string" + raise ValueError(msg) + + # Validate that the dimension is not already defined + old_var = next((e for e in self._dimensions if e.name == name), None) + if old_var is not None: + msg = "Adding dimension with the same name twice is not allowed" + raise ValueError(msg) + + dim = NamedDimension(name=name, size=size) + self._dimensions.append(dim) + self._state = _BuilderState.HAS_DIMENSIONS + return self + + def add_coordinate( # noqa: PLR0913 + self, + name: str, + *, + long_name: str = None, + dimensions: list[str], + data_type: ScalarType, + compressor: Blosc | ZFP | None = None, + metadata_info: CoordinateMetadataList | None = None, + ) -> "MDIODatasetBuilder": + """Add a coordinate after adding at least one dimension. + + This function must be called after all required dimensions are added via add_dimension(). + This call will create a coordinate variable. + + Args: + name: Name of the coordinate + long_name: Optional long name for the coordinate + dimensions: List of dimension names that the coordinate is associated with + data_type: Data type for the coordinate (defaults to FLOAT32) + compressor: Compressor used for the variable (defaults to None) + metadata_info: Optional metadata information for the coordinate + + Raises: + ValueError: If no dimensions have been added yet. + If 'name' is not a non-empty string. + If 'dimensions' is not a non-empty list. + If the coordinate is already defined. + If any referenced dimension is not already defined. + + Returns: + self: Returns self for method chaining + """ + if self._state == _BuilderState.INITIAL: + msg = "Must add at least one dimension before adding coordinates" + raise ValueError(msg) + if not name: + msg = "'name' must be a non-empty string" + raise ValueError(msg) + if dimensions is None or not dimensions: + msg = "'dimensions' must be a non-empty list" + raise ValueError(msg) + old_var = next((e for e in self._coordinates if e.name == name), None) + # Validate that the coordinate is not already defined + if old_var is not None: + msg = "Adding coordinate with the same name twice is not allowed" + raise ValueError(msg) + + # Validate that all referenced dimensions are already defined + named_dimensions = [] + for dim_name in dimensions: + nd = _get_named_dimension(self._dimensions, dim_name) + if nd is None: + msg = f"Pre-existing dimension named {dim_name!r} is not found" + raise ValueError(msg) + named_dimensions.append(nd) + + meta_dict = _to_dictionary(metadata_info) + coord = Coordinate( + name=name, + longName=long_name, + dimensions=named_dimensions, + compressor=compressor, + dataType=data_type, + metadata=meta_dict, + ) + self._coordinates.append(coord) + + # Add a coordinate variable to the dataset + self.add_variable( + name=coord.name, + long_name=coord.long_name, + dimensions=dimensions, # dimension names (list[str]) + data_type=coord.data_type, + compressor=compressor, + coordinates=[name], # Use the coordinate name as a reference + metadata_info=coord.metadata, + ) + + self._state = _BuilderState.HAS_COORDINATES + return self + + def add_variable( # noqa: PLR0913 + self, + name: str, + *, + long_name: str = None, + dimensions: list[str], + data_type: ScalarType | StructuredType, + compressor: Blosc | ZFP | None = None, + coordinates: list[str] | None = None, + metadata_info: VariableMetadataList | None = None, + ) -> "MDIODatasetBuilder": + """Add a variable after adding at least one dimension and, optionally, coordinate. + + This function must be called after all required dimensions are added via add_dimension() + This function must be called after all required coordinates are added via add_coordinate(). + + If this function is called with a single dimension name that matches the variable name, + it will create a dimension variable. Dimension variables are special variables that + represent sampling along a dimension. + + Args: + name: Name of the variable + long_name: Optional long name for the variable + dimensions: List of dimension names that the variable is associated with + data_type: Data type for the variable (defaults to FLOAT32) + compressor: Compressor used for the variable (defaults to None) + coordinates: List of coordinate names that the variable is associated with + (defaults to None, meaning no coordinates) + metadata_info: Optional metadata information for the variable + + Raises: + ValueError: If no dimensions have been added yet. + If 'name' is not a non-empty string. + If 'dimensions' is not a non-empty list. + If the variable is already defined. + If any referenced dimension is not already defined. + If any referenced coordinate is not already defined. + + Returns: + self: Returns self for method chaining. + """ + if self._state == _BuilderState.INITIAL: + msg = "Must add at least one dimension before adding variables" + raise ValueError(msg) + if not name: + msg = "'name' must be a non-empty string" + raise ValueError(msg) + if dimensions is None or not dimensions: + msg = "'dimensions' must be a non-empty list" + raise ValueError(msg) + + # Validate that the variable is not already defined + old_var = next((e for e in self._variables if e.name == name), None) + if old_var is not None: + msg = "Adding variable with the same name twice is not allowed" + raise ValueError(msg) + + # Validate that all referenced dimensions are already defined + named_dimensions = [] + for dim_name in dimensions: + nd = _get_named_dimension(self._dimensions, dim_name) + if nd is None: + msg = f"Pre-existing dimension named {dim_name!r} is not found" + raise ValueError(msg) + named_dimensions.append(nd) + + coordinate_objs: list[Coordinate] = [] + # Validate that all referenced coordinates are already defined + if coordinates is not None: + for coord in coordinates: + c: Coordinate = next((c for c in self._coordinates if c.name == coord), None) + if c is not None: + coordinate_objs.append(c) + else: + msg = f"Pre-existing coordinate named {coord!r} is not found" + raise ValueError(msg) + + # If this is a dimension coordinate variable, embed the Coordinate into it + if coordinates is not None and len(coordinates) == 1 and coordinates[0] == name: + coordinates = coordinate_objs + + meta_dict = _to_dictionary(metadata_info) + var = Variable( + name=name, + long_name=long_name, + dimensions=named_dimensions, + data_type=data_type, + compressor=compressor, + coordinates=coordinates, + metadata=meta_dict, + ) + self._variables.append(var) + + self._state = _BuilderState.HAS_VARIABLES + return self + + def build(self) -> Dataset: + """Build the final dataset. + + This function must be called after at least one dimension is added via add_dimension(). + It will create a Dataset object with all added dimensions, coordinates, and variables. + + Raises: + ValueError: If no dimensions have been added yet. + + Returns: + Dataset: The built dataset with all added dimensions, coordinates, and variables. + """ + if self._state == _BuilderState.INITIAL: + msg = "Must add at least one dimension before building" + raise ValueError(msg) + + var_meta_dict = _to_dictionary([self._info, self._attributes]) + return Dataset(variables=self._variables, metadata=var_meta_dict) diff --git a/src/mdio/schemas/v1/dataset_serializer.py b/src/mdio/schemas/v1/dataset_serializer.py new file mode 100644 index 00000000..af77e0c2 --- /dev/null +++ b/src/mdio/schemas/v1/dataset_serializer.py @@ -0,0 +1,260 @@ +"""Convert MDIO v1 schema Dataset to Xarray DataSet and write it in Zarr.""" + +import numpy as np +from numcodecs import Blosc as nc_Blosc +from xarray import DataArray as xr_DataArray +from xarray import Dataset as xr_Dataset +from zarr import zeros as zarr_zeros +from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding + +from mdio.converters.type_converter import to_numpy_dtype + +try: + # zfpy is an optional dependency for ZFP compression + # It is not installed by default, so we check for its presence and import it only if available. + from zfpy import ZFPY as zfpy_ZFPY # noqa: N811 +except ImportError: + zfpy_ZFPY = None # noqa: N816 + +from mdio.constants import fill_value_map +from mdio.schemas.compressors import ZFP as mdio_ZFP # noqa: N811 +from mdio.schemas.compressors import Blosc as mdio_Blosc +from mdio.schemas.dimension import NamedDimension +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType +from mdio.schemas.v1.dataset import Dataset +from mdio.schemas.v1.dataset_builder import _to_dictionary +from mdio.schemas.v1.variable import Coordinate +from mdio.schemas.v1.variable import Variable + + +def _get_all_named_dimensions(dataset: Dataset) -> dict[str, NamedDimension]: + """Get all NamedDimensions from the dataset variables. + + This function returns a dictionary of NamedDimensions, but if some dimensions + are not resolvable, they will not be included in the result. + + Args: + dataset: The MDIO Dataset to extract NamedDimensions from. + + Note: + The Dataset Builder ensures that all dimensions are resolvable by always embedding + dimensions as NamedDimension and never as str. + If the dataset is created in a different way, some dimensions may be specified as + dimension names (str) instead of NamedDimension. In this case, we will try to resolve + them to NamedDimension, but if the dimension is not found, it will be skipped. + It is the responsibility of the Dataset creator to ensure that all dimensions are + resolvable at the Dataset level. + + Returns: + A dictionary mapping dimension names to NamedDimension instances. + """ + all_named_dims: dict[str, NamedDimension] = {} + for v in dataset.variables: + if v.dimensions is not None: + for d in v.dimensions: + if isinstance(d, NamedDimension): + all_named_dims[d.name] = d + else: + pass + return all_named_dims + + +def _get_dimension_names(var: Variable) -> list[str]: + """Get the names of dimensions for a variable. + + Note: + We expect that Datasets produced by DatasetBuilder has all dimensions + embedded as NamedDimension, but we also support dimension name strings for + compatibility with Dataset produced in a different way. + """ + dim_names: list[str] = [] + if var.dimensions is not None: + for d in var.dimensions: + if isinstance(d, NamedDimension): + dim_names.append(d.name) + elif isinstance(d, str): + dim_names.append(d) + return dim_names + + +def _get_coord_names(var: Variable) -> list[str]: + """Get the names of coordinates for a variable.""" + coord_names: list[str] = [] + if var.coordinates is not None: + for c in var.coordinates: + if isinstance(c, Coordinate): + coord_names.append(c.name) + elif isinstance(c, str): + coord_names.append(c) + return coord_names + + +def _get_zarr_shape(var: Variable, all_named_dims: dict[str, NamedDimension]) -> tuple[int, ...]: + """Get the shape of a variable for Zarr storage. + + Note: + We expect that Datasets produced by DatasetBuilder has all dimensions + embedded as NamedDimension, but we also support dimension name strings for + compatibility with Dataset produced in a different way. + """ + shape: list[int] = [] + for dim in var.dimensions: + if isinstance(dim, NamedDimension): + shape.append(dim.size) + if isinstance(dim, str): + named_dim = all_named_dims.get(dim) + if named_dim is None: + err = f"Dimension named '{dim}' can't be resolved to a NamedDimension." + raise ValueError(err) + shape.append(named_dim.size) + return tuple(shape) + + +def _get_zarr_chunks(var: Variable, all_named_dims: dict[str, NamedDimension]) -> tuple[int, ...]: + """Get the chunk shape for a variable, defaulting to its shape if no chunk grid is defined.""" + if var.metadata is not None and var.metadata.chunk_grid is not None: + return tuple(var.metadata.chunk_grid.configuration.chunk_shape) + # Default to full shape if no chunk grid is defined + return _get_zarr_shape(var, all_named_dims=all_named_dims) + + +def _convert_compressor( + compressor: mdio_Blosc | mdio_ZFP | None, +) -> nc_Blosc | zfpy_ZFPY | None: + """Convert a compressor to a numcodecs compatible format.""" + if compressor is None: + return None + + if isinstance(compressor, mdio_Blosc): + return nc_Blosc( + cname=compressor.algorithm.value, + clevel=compressor.level, + shuffle=compressor.shuffle.value, + blocksize=compressor.blocksize if compressor.blocksize > 0 else 0, + ) + + if isinstance(compressor, mdio_ZFP): + if zfpy_ZFPY is None: + msg = "zfpy and numcodecs are required to use ZFP compression" + raise ImportError(msg) + return zfpy_ZFPY( + mode=compressor.mode.value, + tolerance=compressor.tolerance, + rate=compressor.rate, + precision=compressor.precision, + ) + + msg = f"Unsupported compressor model: {type(compressor)}" + raise TypeError(msg) + + +def _get_fill_value(data_type: ScalarType | StructuredType | str) -> any: + """Get the fill value for a given data type. + + The Zarr fill_value is a scalar value providing the default value to use for + uninitialized portions of the array, or null if no fill_value is to be used + https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html + """ + if isinstance(data_type, ScalarType): + return fill_value_map.get(data_type) + if isinstance(data_type, StructuredType): + d_type = to_numpy_dtype(data_type) + return np.zeros((), dtype=d_type) + if isinstance(data_type, str): + return "" + # If we do not have a fill value for this type, use None + return None + + +def to_xarray_dataset(mdio_ds: Dataset) -> xr_Dataset: # noqa: PLR0912 + """Build an XArray dataset with correct dimensions and dtypes. + + This function constructs the underlying data structure for an XArray dataset, + handling dimension mapping, data types, and metadata organization. + + Args: + mdio_ds: The source MDIO dataset to construct from. + + Notes: + - We can't use Dask (e.g., dask_array.zeros) because of the problems with + structured type support. We will uze zarr.zeros instead + + Returns: + The constructed dataset with proper MDIO structure and metadata. + """ + # See the xarray tutorial for more details on how to create datasets: + # https://tutorial.xarray.dev/fundamentals/01.1_creating_data_structures.html + + all_named_dims = _get_all_named_dimensions(mdio_ds) + + # First pass: Build all variables + data_arrays: dict[str, xr_DataArray] = {} + for v in mdio_ds.variables: + shape = _get_zarr_shape(v, all_named_dims=all_named_dims) + dtype = to_numpy_dtype(v.data_type) + chunks = _get_zarr_chunks(v, all_named_dims=all_named_dims) + + # Use zarr.zeros to create an empty array with the specified shape and dtype + # NOTE: zarr_format=2 is essential, to_zarr() will fail if zarr_format=2 is used + data = zarr_zeros(shape=shape, dtype=dtype, zarr_format=2) + # Create a DataArray for the variable. We will set coords in the second pass + dim_names = _get_dimension_names(v) + data_array = xr_DataArray(data, dims=dim_names) + + # Add array attributes + if v.metadata is not None: + meta_dict = _to_dictionary(v.metadata) + # Exclude chunk_grid + del meta_dict["chunkGrid"] + # Remove empty attributes + meta_dict = {k: v for k, v in meta_dict.items() if v is not None} + # Add metadata to the data array attributes + data_array.attrs.update(meta_dict) + if v.long_name: + data_array.attrs["long_name"] = v.long_name + + # Create a custom chunk key encoding with "/" as separator + chunk_key_encoding = V2ChunkKeyEncoding(separator="/").to_dict() + encoding = { + "chunks": chunks, + "chunk_key_encoding": chunk_key_encoding, + "compressor": _convert_compressor(v.compressor), + } + # NumPy structured data types have fields attribute, while scalar types do not. + if not hasattr(v.data_type, "fields"): + # TODO(Dmitriy Repin): work around of the bug + # https://github.com/TGSAI/mdio-python/issues/582 + # For structured data types we will not use the _FillValue + # NOTE: See Zarr documentation on use of fill_value and _FillValue in Zarr v2 vs v3 + encoding["_FillValue"] = _get_fill_value(v.data_type) + + data_array.encoding = encoding + + # Let's store the data array for the second pass + data_arrays[v.name] = data_array + + # Second pass: Add non-dimension coordinates to the data arrays + for v in mdio_ds.variables: + da = data_arrays[v.name] + non_dim_coords_names = set(_get_coord_names(v)) - set(_get_dimension_names(v)) - {v.name} + # Create and populate a dictionary {coord_name: DataArray for the coordinate} + non_dim_coords_dict: dict[str, xr_DataArray] = {} + for name in non_dim_coords_names: + non_dim_coords_dict[name] = data_arrays[name] + if non_dim_coords_dict: + # NOTE: here is a gotcha: assign_coords() does not update in-place, + # but returns an updated instance! + data_arrays[v.name] = da.assign_coords(non_dim_coords_dict) + + # Now let's create a dataset with all data arrays + xr_ds = xr_Dataset(data_arrays) + # Attach dataset metadata + if mdio_ds.metadata is not None: + xr_ds.attrs["apiVersion"] = mdio_ds.metadata.api_version + xr_ds.attrs["createdOn"] = str(mdio_ds.metadata.created_on) + xr_ds.attrs["name"] = mdio_ds.metadata.name + if mdio_ds.metadata.attributes: + xr_ds.attrs["attributes"] = mdio_ds.metadata.attributes + + return xr_ds diff --git a/src/mdio/schemas/v1/stats.py b/src/mdio/schemas/v1/stats.py new file mode 100644 index 00000000..29d50add --- /dev/null +++ b/src/mdio/schemas/v1/stats.py @@ -0,0 +1,68 @@ +"""Statistics schema for MDIO v1 arrays. + +This module provides two Histogram classes (CenteredBinHistogram and +EdgeDefinedHistogram),a summary statistics class, and a summary statistics +metadata class. + +SummaryStatistics: a class that represents the minimum summary statistics +of an array consisting of count, sum, sum of squares, min, max, and a histogram. + +SummaryStatisticsMetadata: represents metadata for statistics, with a field +for v1 of the stats. + +CenteredBinHistogram takes the center points of each bin in a histogram, +while EdgeDefinedHistogram takes the left edges and widths of each bin. +Both classes extend from the base class BaseHistogram, which represents +a histogram with count of each bin. +""" + +from __future__ import annotations + +from typing import TypeAlias + +from pydantic import Field + +from mdio.schemas.core import CamelCaseStrictModel +from mdio.schemas.metadata import VersionedMetadataConvention + + +class BaseHistogram(CamelCaseStrictModel): + """Represents a histogram with bin counts.""" + + counts: list[int] = Field(..., description="Count of each each bin.") + + +class CenteredBinHistogram(BaseHistogram): + """Class representing a center bin histogram.""" + + bin_centers: list[float | int] = Field(..., description="List of bin centers.") + + +class EdgeDefinedHistogram(BaseHistogram): + """A class representing an edge-defined histogram.""" + + bin_edges: list[float | int] = Field(..., description="The left edges of the histogram bins.") + bin_widths: list[float | int] = Field(..., description="The widths of the histogram bins.") + + +Histogram: TypeAlias = CenteredBinHistogram | EdgeDefinedHistogram + + +class SummaryStatistics(CamelCaseStrictModel): + """Data model for some statistics in MDIO v1 arrays.""" + + count: int = Field(..., description="The number of data points.") + sum: float = Field(..., description="The total of all data values.") + sum_squares: float = Field(..., description="The total of all data values squared.") + min: float = Field(..., description="The smallest value in the variable.") + max: float = Field(..., description="The largest value in the variable.") + histogram: Histogram = Field(..., description="Binned frequency distribution.") + + +class StatisticsMetadata(VersionedMetadataConvention): + """Data Model representing metadata for statistics.""" + + stats_v1: SummaryStatistics | list[SummaryStatistics] | None = Field( + default=None, + description="Minimal summary statistics.", + ) diff --git a/src/mdio/schemas/v1/templates/abstract_dataset_template.py b/src/mdio/schemas/v1/templates/abstract_dataset_template.py new file mode 100644 index 00000000..ef3cfcba --- /dev/null +++ b/src/mdio/schemas/v1/templates/abstract_dataset_template.py @@ -0,0 +1,242 @@ +"""Template method pattern implementation for MDIO v1 dataset template.""" + +import copy +from abc import ABC +from abc import abstractmethod + +from mdio.schemas import compressors +from mdio.schemas.chunk_grid import RegularChunkGrid +from mdio.schemas.chunk_grid import RegularChunkShape +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType +from mdio.schemas.metadata import ChunkGridMetadata +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.dataset import Dataset +from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder +from mdio.schemas.v1.units import AllUnits + + +class AbstractDatasetTemplate(ABC): + """Abstract base class that defines the template method for Dataset building factory. + + The template method defines the skeleton of the data processing algorithm, + while allowing subclasses to override specific steps. + """ + + def __init__(self, domain: str = "") -> None: + # Template attributes to be overridden by subclasses + # Domain of the seismic data, e.g. "time" or "depth" + self._trace_domain = domain.lower() + # Names of all coordinate dimensions in the dataset + # e.g. ["cdp"] for 2D post-stack depth + # e.g. ["inline", "crossline"] for 3D post-stack + # e.g. ["inline", "crossline"] for 3D pre-stack CDP gathers + # Note: For pre-stack Shot gathers, the coordinates are defined differently + # and are not directly tied to _coord_dim_names. + self._coord_dim_names = [] + # *ORDERED* list of names of all dimensions in the dataset + # e.g. ["cdp", "depth"] for 2D post-stack depth + # e.g. ["inline", "crossline", "depth"] for 3D post-stack depth + # e.g. ["inline", "crossline", "offset", "depth"] for 3D pre-stack depth CPD gathers + # e.g. ["shot_point", "cable", "channel", "time"] for 3D pre-stack time Shot gathers + self._dim_names = [] + # Names of all coordinates in the dataset + # e.g. ["cdp_x", "cdp_y"] for 2D post-stack depth + # e.g. ["cdp_x", "cdp_y"] for 3D post-stack depth + # e.g. ["cdp_x", "cdp_y"] for 3D pre-stack CPD depth + # e.g. ["gun", "shot-x", "shot-y", "receiver-x", "receiver-y"] for 3D pre-stack + # time Shot gathers + self._coord_names = [] + # Chunk shape for the variable in the dataset + # e.g. [1024, 1024] for 2D post-stack depth + # e.g. [128, 128, 128] for 3D post-stack depth + # e.g. [1, 1, 512, 4096] for 3D pre-stack CPD depth + # e.g. [1, 1, 512, 4096] for 3D pre-stack time Shot gathers + self._var_chunk_shape = [] + + # Variables instantiated when build_dataset() is called + self._builder: MDIODatasetBuilder = None + # Sizes of the dimensions in the dataset, to be set when build_dataset() is called + self._dim_sizes = [] + # Horizontal units for the coordinates (e.g, "m", "ft"), to be set when + # build_dataset() is called + self._horizontal_coord_unit = None + + def build_dataset( + self, + name: str, + sizes: list[int], + horizontal_coord_unit: AllUnits, + headers: StructuredType = None, + ) -> Dataset: + """Template method that builds the dataset. + + Args: + name: The name of the dataset. + sizes: The sizes of the dimensions. + horizontal_coord_unit: The units for the horizontal coordinates. + headers: Optional structured headers for the dataset. + + Returns: + Dataset: The constructed dataset + """ + self._dim_sizes = sizes + self._horizontal_coord_unit = horizontal_coord_unit + + self._builder = MDIODatasetBuilder(name=name, attributes=self._load_dataset_attributes()) + self._add_dimensions() + self._add_coordinates() + self._add_variables() + self._add_trace_mask() + if headers: + self._add_trace_headers(headers) + return self._builder.build() + + @property + def name(self) -> str: + """Returns the name of the template.""" + return self._name + + @property + def trace_variable_name(self) -> str: + """Returns the name of the trace variable.""" + return self._trace_variable_name + + @property + def trace_domain(self) -> str: + """Returns the name of the trace domain.""" + return self._trace_domain + + @property + def dimension_names(self) -> list[str]: + """Returns the names of the dimensions.""" + return copy.deepcopy(self._dim_names) + + @property + def coordinate_names(self) -> list[str]: + """Returns the names of the coordinates.""" + return copy.deepcopy(self._coord_names) + + @property + @abstractmethod + def _name(self) -> str: + """Abstract method to get the name of the template. + + Must be implemented by subclasses. + + Returns: + str: The name of the template + """ + + @property + def _trace_variable_name(self) -> str: + """Get the name of the data variable. + + A virtual method that can be overwritten by subclasses to return a + custom data variable name. + + Returns: + str: The name of the data variable + """ + return "amplitude" + + @abstractmethod + def _load_dataset_attributes(self) -> UserAttributes: + """Abstract method to load dataset attributes. + + Must be implemented by subclasses. + + Returns: + UserAttributes: The dataset attributes + """ + + def _add_dimensions(self) -> None: + """Add custom dimensions. + + A virtual method that can be overwritten by subclasses to add custom dimensions. + Uses the class field 'builder' to add dimensions to the dataset. + """ + for i in range(len(self._dim_names)): + self._builder.add_dimension(self._dim_names[i], self._dim_sizes[i]) + + def _add_coordinates(self) -> None: + """Add custom coordinates. + + A virtual method that can be overwritten by subclasses to add custom coordinates. + Uses the class field 'builder' to add coordinates to the dataset. + """ + # Add dimension coordinates + for name in self._dim_names: + self._builder.add_coordinate( + name, + dimensions=[name], + data_type=ScalarType.INT32, + metadata_info=None, + ) + + # Add non-dimension coordinates + # TODO(Dmitriy Repin): do chunked write for non-dimensional coordinates and trace_mask + # https://github.com/TGSAI/mdio-python/issues/587 + # The chunk size used for trace mask will be different from the _var_chunk_shape + hor_coord_units = [self._horizontal_coord_unit] * len(self._coord_dim_names) + for i in range(len(self._coord_names)): + self._builder.add_coordinate( + self._coord_names[i], + dimensions=self._coord_dim_names, + data_type=ScalarType.FLOAT64, + metadata_info=hor_coord_units, + ) + + def _add_trace_mask(self) -> None: + """Add trace mask variables.""" + # TODO(Dmitriy Repin): do chunked write for non-dimensional coordinates and trace_mask + # https://github.com/TGSAI/mdio-python/issues/587 + # The chunk size used for trace mask will be different from the _var_chunk_shape + self._builder.add_variable( + name="trace_mask", + dimensions=self._dim_names[:-1], # All dimensions except vertical (the last one) + data_type=ScalarType.BOOL, + compressor=compressors.Blosc(algorithm=compressors.BloscAlgorithm.ZSTD), + coordinates=self._coord_names, + metadata_info=None, + ) + + def _add_trace_headers(self, headers: StructuredType) -> None: + """Add trace mask variables.""" + # headers = StructuredType.model_validate(header_fields) + + self._builder.add_variable( + name="headers", + dimensions=self._dim_names[:-1], # All dimensions except vertical (the last one) + data_type=headers, + compressor=compressors.Blosc(algorithm=compressors.BloscAlgorithm.ZSTD), + coordinates=self._coord_names, + metadata_info=[ + ChunkGridMetadata( + chunk_grid=RegularChunkGrid( + configuration=RegularChunkShape(chunk_shape=self._var_chunk_shape[:-1]) + ) + ) + ], + ) + + def _add_variables(self) -> None: + """Add custom variables. + + A virtual method that can be overwritten by subclasses to add custom variables. + Uses the class field 'builder' to add variables to the dataset. + """ + self._builder.add_variable( + name=self._trace_variable_name, + dimensions=self._dim_names, + data_type=ScalarType.FLOAT32, + compressor=compressors.Blosc(algorithm=compressors.BloscAlgorithm.ZSTD), + coordinates=self._coord_names, + metadata_info=[ + ChunkGridMetadata( + chunk_grid=RegularChunkGrid( + configuration=RegularChunkShape(chunk_shape=self._var_chunk_shape) + ) + ) + ], + ) diff --git a/src/mdio/schemas/v1/templates/seismic_2d_poststack.py b/src/mdio/schemas/v1/templates/seismic_2d_poststack.py new file mode 100644 index 00000000..297d598b --- /dev/null +++ b/src/mdio/schemas/v1/templates/seismic_2d_poststack.py @@ -0,0 +1,29 @@ +"""Seismic2DPostStackTemplate MDIO v1 dataset templates.""" + +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate + + +class Seismic2DPostStackTemplate(AbstractDatasetTemplate): + """Seismic post-stack 2D time or depth Dataset template.""" + + def __init__(self, domain: str): + super().__init__(domain=domain) + + self._coord_dim_names = ["cdp"] + self._dim_names = [*self._coord_dim_names, self._trace_domain] + self._coord_names = ["cdp_x", "cdp_y"] + self._var_chunk_shape = [1024, 1024] + + @property + def _name(self) -> str: + return f"PostStack2D{self._trace_domain.capitalize()}" + + def _load_dataset_attributes(self) -> UserAttributes: + return UserAttributes( + attributes={ + "surveyDimensionality": "2D", + "ensembleType": "line", + "processingStage": "post-stack", + } + ) diff --git a/src/mdio/schemas/v1/templates/seismic_3d_poststack.py b/src/mdio/schemas/v1/templates/seismic_3d_poststack.py new file mode 100644 index 00000000..78021f72 --- /dev/null +++ b/src/mdio/schemas/v1/templates/seismic_3d_poststack.py @@ -0,0 +1,29 @@ +"""Seismic3DPostStackTemplate MDIO v1 dataset templates.""" + +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate + + +class Seismic3DPostStackTemplate(AbstractDatasetTemplate): + """Seismic post-stack 3D time or depth Dataset template.""" + + def __init__(self, domain: str): + super().__init__(domain=domain) + # Template attributes to be overridden by subclasses + self._coord_dim_names = ["inline", "crossline"] + self._dim_names = [*self._coord_dim_names, self._trace_domain] + self._coord_names = ["cdp_x", "cdp_y"] + self._var_chunk_shape = [128, 128, 128] + + @property + def _name(self) -> str: + return f"PostStack3D{self._trace_domain.capitalize()}" + + def _load_dataset_attributes(self) -> UserAttributes: + return UserAttributes( + attributes={ + "surveyDimensionality": "3D", + "ensembleType": "line", + "processingStage": "post-stack", + } + ) diff --git a/src/mdio/schemas/v1/templates/seismic_3d_prestack_cdp.py b/src/mdio/schemas/v1/templates/seismic_3d_prestack_cdp.py new file mode 100644 index 00000000..2ae22b55 --- /dev/null +++ b/src/mdio/schemas/v1/templates/seismic_3d_prestack_cdp.py @@ -0,0 +1,29 @@ +"""Seismic3DPreStackCDPTemplate MDIO v1 dataset templates.""" + +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate + + +class Seismic3DPreStackCDPTemplate(AbstractDatasetTemplate): + """Seismic CDP pre-stack 3D time or depth Dataset template.""" + + def __init__(self, domain: str): + super().__init__(domain=domain) + + self._coord_dim_names = ["inline", "crossline", "offset"] + self._dim_names = [*self._coord_dim_names, self._trace_domain] + self._coord_names = ["cdp_x", "cdp_y"] + self._var_chunk_shape = [1, 1, 512, 4096] + + @property + def _name(self) -> str: + return f"PreStackCdpGathers3D{self._trace_domain.capitalize()}" + + def _load_dataset_attributes(self) -> UserAttributes: + return UserAttributes( + attributes={ + "surveyDimensionality": "3D", + "ensembleType": "cdp", + "processingStage": "pre-stack", + } + ) diff --git a/src/mdio/schemas/v1/templates/seismic_3d_prestack_shot.py b/src/mdio/schemas/v1/templates/seismic_3d_prestack_shot.py new file mode 100644 index 00000000..ac644fbd --- /dev/null +++ b/src/mdio/schemas/v1/templates/seismic_3d_prestack_shot.py @@ -0,0 +1,77 @@ +"""Seismic3DPreStackShotTemplate MDIO v1 dataset templates.""" + +from mdio.schemas.dtype import ScalarType +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate +from mdio.schemas.v1.units import AllUnits + + +class Seismic3DPreStackShotTemplate(AbstractDatasetTemplate): + """Seismic Shot pre-stack 3D time or depth Dataset template.""" + + def __init__(self, domain: str): + super().__init__(domain=domain) + + self._coord_dim_names = [ + "shot_point", + "cable", + "channel", + ] # Custom coordinate definition for shot gathers + self._dim_names = [*self._coord_dim_names, self._trace_domain] + self._coord_names = ["gun", "shot-x", "shot-y", "receiver-x", "receiver-y"] + self._var_chunk_shape = [1, 1, 512, 4096] + + @property + def _name(self) -> str: + return f"PreStackShotGathers3D{self._trace_domain.capitalize()}" + + def _load_dataset_attributes(self) -> UserAttributes: + return UserAttributes( + attributes={ + "surveyDimensionality": "3D", + "ensembleType": "shot", + "processingStage": "pre-stack", + } + ) + + def _add_coordinates(self) -> None: + # Add dimension coordinates + for name in self._dim_names: + self._builder.add_coordinate( + name, + dimensions=[name], + data_type=ScalarType.INT32, + metadata_info=None, + ) + + # Add non-dimension coordinates + self._builder.add_coordinate( + "gun", + dimensions=["shot_point"], + data_type=ScalarType.UINT8, + metadata_info=[AllUnits(units_v1=None)], + ) + self._builder.add_coordinate( + "shot-x", + dimensions=["shot_point"], + data_type=ScalarType.FLOAT64, + metadata_info=[self._horizontal_coord_unit], + ) + self._builder.add_coordinate( + "shot-y", + dimensions=["shot_point"], + data_type=ScalarType.FLOAT64, + metadata_info=[self._horizontal_coord_unit], + ) + self._builder.add_coordinate( + "receiver-x", + dimensions=["shot_point", "cable", "channel"], + data_type=ScalarType.FLOAT64, + metadata_info=[self._horizontal_coord_unit], + ) + self._builder.add_coordinate( + "receiver-y", + dimensions=["shot_point", "cable", "channel"], + data_type=ScalarType.FLOAT64, + metadata_info=[self._horizontal_coord_unit], + ) diff --git a/src/mdio/schemas/v1/templates/template_registry.py b/src/mdio/schemas/v1/templates/template_registry.py new file mode 100644 index 00000000..b1c7550a --- /dev/null +++ b/src/mdio/schemas/v1/templates/template_registry.py @@ -0,0 +1,212 @@ +"""Template registry for MDIO v1 dataset templates.""" + +import threading +from typing import Optional + +from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate +from mdio.schemas.v1.templates.seismic_2d_poststack import Seismic2DPostStackTemplate +from mdio.schemas.v1.templates.seismic_3d_poststack import Seismic3DPostStackTemplate +from mdio.schemas.v1.templates.seismic_3d_prestack_cdp import Seismic3DPreStackCDPTemplate +from mdio.schemas.v1.templates.seismic_3d_prestack_shot import Seismic3DPreStackShotTemplate + + +class TemplateRegistry: + """A thread-safe singleton registry for dataset templates.""" + + _instance: Optional["TemplateRegistry"] = None + _lock = threading.RLock() + _initialized = False + + def __new__(cls) -> "TemplateRegistry": + """Create or return the singleton instance. + + Uses double-checked locking pattern to ensure thread safety. + + Returns: + The singleton instance of TemplateRegistry. + """ + if cls._instance is None: + with cls._lock: + # Double-checked locking pattern + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self) -> None: + if not self._initialized: + with self._lock: + if not self._initialized: + self._templates: dict[str, AbstractDatasetTemplate] = {} + self._registry_lock = threading.RLock() + self._register_default_templates() + TemplateRegistry._initialized = True + + def register(self, instance: AbstractDatasetTemplate) -> str: + """Register a template instance by its name. + + Args: + instance: An instance of template to register. + + Returns: + The name of the registered template. + + Raises: + ValueError: If the template name is already registered. + """ + with self._registry_lock: + name = instance.name + if name in self._templates: + err = f"Template '{name}' is already registered." + raise ValueError(err) + self._templates[name] = instance + return name + + def _register_default_templates(self) -> None: + """Register default templates if needed. + + This method can be overridden by subclasses to register default templates. + """ + self.register(Seismic2DPostStackTemplate("time")) + self.register(Seismic2DPostStackTemplate("depth")) + + self.register(Seismic3DPostStackTemplate("time")) + self.register(Seismic3DPostStackTemplate("depth")) + + self.register(Seismic3DPreStackCDPTemplate("time")) + self.register(Seismic3DPreStackCDPTemplate("depth")) + + self.register(Seismic3DPreStackShotTemplate("time")) + self.register(Seismic3DPreStackShotTemplate("depth")) + + def get(self, template_name: str) -> AbstractDatasetTemplate: + """Get a template from the registry by its name. + + Args: + template_name: The name of the template to retrieve. + + Returns: + The template instance if found. + + Raises: + KeyError: If the template is not registered. + """ + with self._registry_lock: + name = template_name + if name not in self._templates: + err = f"Template '{name}' is not registered." + raise KeyError(err) + return self._templates[name] + + def unregister(self, template_name: str) -> None: + """Unregister a template from the registry. + + Args: + template_name: The name of the template to unregister. + + Raises: + KeyError: If the template is not registered. + """ + with self._registry_lock: + name = template_name + if name not in self._templates: + err_msg = f"Template '{name}' is not registered." + raise KeyError(err_msg) + del self._templates[name] + + def is_registered(self, template_name: str) -> bool: + """Check if a template is registered in the registry. + + Args: + template_name: The name of the template to check. + + Returns: + True if the template is registered, False otherwise. + """ + with self._registry_lock: + name = template_name + return name in self._templates + + def list_all_templates(self) -> list[str]: + """Get all registered template names. + + Returns: + A list of all registered template names. + """ + with self._registry_lock: + return list(self._templates.keys()) + + def clear(self) -> None: + """Clear all registered templates (useful for testing).""" + with self._registry_lock: + self._templates.clear() + + @classmethod + def get_instance(cls) -> "TemplateRegistry": + """Get the singleton instance (alternative to constructor). + + Returns: + The singleton instance of TemplateRegistry. + """ + return cls() + + @classmethod + def _reset_instance(cls) -> None: + """Reset the singleton instance (useful for testing).""" + with cls._lock: + cls._instance = None + cls._initialized = False + + +# Global convenience functions +def get_template_registry() -> TemplateRegistry: + """Get the global template registry instance. + + Returns: + The singleton instance of TemplateRegistry. + """ + return TemplateRegistry.get_instance() + + +def register_template(template: AbstractDatasetTemplate) -> str: + """Register a template in the global registry. + + Args: + template: An instance of AbstractDatasetTemplate to register. + + Returns: + The name of the registered template. + """ + return get_template_registry().register(template) + + +def get_template(name: str) -> AbstractDatasetTemplate: + """Get a template from the global registry. + + Args: + name: The name of the template to retrieve. + + Returns: + The template instance if found. + """ + return get_template_registry().get(name) + + +def is_template_registered(name: str) -> bool: + """Check if a template is registered in the global registry. + + Args: + name: The name of the template to check. + + Returns: + True if the template is registered, False otherwise. + """ + return get_template_registry().is_registered(name) + + +def list_templates() -> list[str]: + """List all registered template names. + + Returns: + A list of all registered template names. + """ + return get_template_registry().list_all_templates() diff --git a/src/mdio/schemas/v1/units.py b/src/mdio/schemas/v1/units.py new file mode 100644 index 00000000..1913ff2e --- /dev/null +++ b/src/mdio/schemas/v1/units.py @@ -0,0 +1,119 @@ +"""Unit schemas specific to MDIO v1.""" + +from __future__ import annotations + +from typing import TypeAlias + +from pint import UnitRegistry +from pydantic import Field + +from mdio.schemas.metadata import VersionedMetadataConvention +from mdio.schemas.units import UnitEnum +from mdio.schemas.units import create_unit_model + +ureg = UnitRegistry() +ureg.default_format = "~C" # compact, abbreviated (symbol). + + +class LengthUnitEnum(UnitEnum): + """Enum class representing metric units of length.""" + + MILLIMETER = ureg.millimeter + CENTIMETER = ureg.centimeter + METER = ureg.meter + KILOMETER = ureg.kilometer + + INCH = ureg.inch + FOOT = ureg.foot + YARD = ureg.yard + MILE = ureg.mile + + +LengthUnitModel = create_unit_model(LengthUnitEnum, "LengthUnitModel", "length", __name__) + + +class TimeUnitEnum(UnitEnum): + """Enum class representing units of time.""" + + NANOSECOND = ureg.nanosecond + MICROSECOND = ureg.microsecond + MILLISECOND = ureg.millisecond + SECOND = ureg.second + MINUTE = ureg.minute + HOUR = ureg.hour + DAY = ureg.day + + +TimeUnitModel = create_unit_model(TimeUnitEnum, "TimeUnitModel", "time", __name__) + + +class DensityUnitEnum(UnitEnum): + """Enum class representing units of density.""" + + GRAMS_PER_CC = ureg.gram / ureg.centimeter**3 + KILOGRAMS_PER_M3 = ureg.kilogram / ureg.meter**3 + POUNDS_PER_GAL = ureg.pounds / ureg.gallon + + +DensityUnitModel = create_unit_model(DensityUnitEnum, "DensityUnitModel", "density", __name__) + + +class SpeedUnitEnum(UnitEnum): + """Enum class representing units of speed.""" + + METER_PER_SECOND = ureg.meter / ureg.second + FEET_PER_SECOND = ureg.feet / ureg.second + + +SpeedUnitModel = create_unit_model(SpeedUnitEnum, "SpeedUnitModel", "speed", __name__) + + +class AngleUnitEnum(UnitEnum): + """Enum class representing units of angle.""" + + DEGREES = ureg.degree + RADIANS = ureg.radian + + +AngleUnitModel = create_unit_model(AngleUnitEnum, "AngleUnitModel", "angle", __name__) + + +class FrequencyUnitEnum(UnitEnum): + """Enum class representing units of frequency.""" + + HERTZ = ureg.hertz + + +FrequencyUnitModel = create_unit_model( + FrequencyUnitEnum, "FrequencyUnitModel", "frequency", __name__ +) + + +class VoltageUnitEnum(UnitEnum): + """Enum class representing units of voltage.""" + + MICROVOLT = ureg.microvolt + MILLIVOLT = ureg.millivolt + VOLT = ureg.volt + + +VoltageUnitModel = create_unit_model(VoltageUnitEnum, "VoltageUnitModel", "voltage", __name__) + + +# Composite model types +AllUnitModel: TypeAlias = ( + LengthUnitModel + | TimeUnitModel + | AngleUnitModel + | DensityUnitModel + | SpeedUnitModel + | FrequencyUnitModel + | VoltageUnitModel +) + + +# Versioned metadata conventions for units +class AllUnits(VersionedMetadataConvention): + """All Units.""" + + units_v1: AllUnitModel | list[AllUnitModel] | None = Field(default=None) diff --git a/src/mdio/schemas/v1/variable.py b/src/mdio/schemas/v1/variable.py new file mode 100644 index 00000000..93b59117 --- /dev/null +++ b/src/mdio/schemas/v1/variable.py @@ -0,0 +1,63 @@ +"""This module defines `LabeledArray`, `Coordinate`, and `Variable`. + +`LabeledArray` is a basic array unit which includes basic properties like +name, dimension, data type, compressor etc. + +`Coordinate` extends the `LabeledArray` class, it represents the Coordinate +array in the MDIO format. It has dimensions which are fully defined and can hold +additional metadata. + +`Variable` is another class that extends the `LabeledArray`. It represents a +variable in MDIO format. It can have coordinates and can also hold metadata. +""" + +from pydantic import Field +from pydantic import create_model + +from mdio.schemas.base import NamedArray +from mdio.schemas.core import CamelCaseStrictModel +from mdio.schemas.core import model_fields +from mdio.schemas.dtype import ScalarType +from mdio.schemas.metadata import ChunkGridMetadata +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.stats import StatisticsMetadata +from mdio.schemas.v1.units import AllUnits + +CoordinateMetadata = create_model( + "CoordinateMetadata", + **model_fields(AllUnits), + **model_fields(UserAttributes), + __base__=CamelCaseStrictModel, + __doc__="Reduced Metadata, perfect for simple Coordinates.", +) + + +class Coordinate(NamedArray): + """A simple MDIO Coordinate array with metadata. + + For large or complex Coordinates, define a Variable instead. + """ + + data_type: ScalarType = Field(..., description="Data type of Coordinate.") + metadata: CoordinateMetadata | None = Field(default=None, description="Coordinate Metadata.") + + +VariableMetadata = create_model( + "VariableMetadata", + **model_fields(ChunkGridMetadata), + **model_fields(AllUnits), + **model_fields(StatisticsMetadata), + **model_fields(UserAttributes), + __base__=CamelCaseStrictModel, + __doc__="Complete Metadata for Variables and complex or large Coordinates.", +) + + +class Variable(NamedArray): + """An MDIO Variable that has coordinates and metadata.""" + + coordinates: list[Coordinate] | list[str] | None = Field( + default=None, + description="Coordinates of the MDIO Variable dimensions.", + ) + metadata: VariableMetadata | None = Field(default=None, description="Variable Metadata.") diff --git a/src/mdio/segy/_workers.py b/src/mdio/segy/_workers.py index 2380b16f..2be53003 100644 --- a/src/mdio/segy/_workers.py +++ b/src/mdio/segy/_workers.py @@ -4,7 +4,6 @@ import os from typing import TYPE_CHECKING -from typing import Any from typing import TypedDict from typing import cast @@ -15,9 +14,14 @@ from segy.arrays import HeaderArray from segy.config import SegySettings from segy.schema import SegySpec - from zarr import Array + from xarray import Dataset as xr_Dataset + from zarr import Array as zarr_Array - from mdio.core import Grid + from mdio.core.storage_location import StorageLocation + +from mdio.constants import UINT32_MAX +from mdio.schemas.v1.stats import CenteredBinHistogram +from mdio.schemas.v1.stats import SummaryStatistics class SegyFileArguments(TypedDict): @@ -29,8 +33,7 @@ class SegyFileArguments(TypedDict): def header_scan_worker( - segy_kw: SegyFileArguments, - trace_range: tuple[int, int], + segy_kw: SegyFileArguments, trace_range: tuple[int, int], subset: list[str] | None = None ) -> HeaderArray: """Header scan worker. @@ -40,6 +43,7 @@ def header_scan_worker( Args: segy_kw: Arguments to open SegyFile instance. trace_range: Tuple consisting of the trace ranges to read. + subset: List of header names to filter and keep. Returns: HeaderArray parsed from SEG-Y library. @@ -55,6 +59,9 @@ def header_scan_worker( else: trace_header = segy_file.header[slice_] + if subset is not None: + trace_header = trace_header[subset] + # Get non-void fields from dtype and copy to new array for memory efficiency fields = trace_header.dtype.fields non_void_fields = [(name, dtype) for name, (dtype, _) in fields.items()] @@ -67,72 +74,68 @@ def header_scan_worker( return cast("HeaderArray", trace_header) -def trace_worker( +def trace_worker( # noqa: PLR0913 segy_kw: SegyFileArguments, - data_array: Array, - metadata_array: Array, - grid: Grid, - chunk_indices: tuple[slice, ...], -) -> tuple[Any, ...] | None: - """Worker function for multi-process enabled blocked SEG-Y I/O. - - Performance of `zarr.Array` writes is slow if data isn't aligned with chunk boundaries, - sacrificing sequential reads of SEG-Y files. This won't be an issue with SSDs or cloud. - - It retrieves trace numbers from the grid and gathers the current chunk's SEG-Y trace indices. - Then, it fills a temporary array in memory and writes to the `zarr.Array` chunk. We take full - slices across the sample dimension since SEG-Y data isn't chunked, eliminating concern. + output_location: StorageLocation, + data_variable_name: str, + region: dict[str, slice], + grid_map: zarr_Array, + dataset: xr_Dataset, +) -> SummaryStatistics | None: + """Writes a subset of traces from a region of the dataset of Zarr file. Args: segy_kw: Arguments to open SegyFile instance. - data_array: Handle for zarr.Array we are writing traces to - metadata_array: Handle for zarr.Array we are writing trace headers - grid: mdio.Grid instance - chunk_indices: Tuple consisting of the chunk slice indices for each dimension + output_location: StorageLocation for the output Zarr dataset + (e.g. local file path or cloud storage URI) the location + also includes storage options for cloud storage. + data_variable_name: Name of the data variable to write. + region: Region of the dataset to write to. + grid_map: Zarr array mapping live traces to their positions in the dataset. + dataset: Xarray dataset containing the data to write. Returns: - Partial statistics for chunk, or None + SummaryStatistics object containing statistics about the written traces. """ - # Special case where there are no traces inside chunk. - segy_file = SegyFile(**segy_kw) - live_subset = grid.live_mask[chunk_indices[:-1]] - - if np.count_nonzero(live_subset) == 0: + if not dataset.trace_mask.any(): return None - # Let's get trace numbers from grid map using the chunk indices. - seq_trace_indices = grid.map[chunk_indices[:-1]] - - tmp_data = np.zeros(seq_trace_indices.shape + (grid.shape[-1],), dtype=data_array.dtype) - tmp_metadata = np.zeros(seq_trace_indices.shape, dtype=metadata_array.dtype) - - del grid # To save some memory - - # Read headers and traces for block - valid_indices = seq_trace_indices[live_subset] - - traces = segy_file.trace[valid_indices.tolist()] - headers, samples = traces["header"], traces["data"] - - tmp_metadata[live_subset] = headers.view(tmp_metadata.dtype) - tmp_data[live_subset] = samples - - # Flush metadata to zarr - metadata_array.set_basic_selection(selection=chunk_indices[:-1], value=tmp_metadata) - - nonzero_mask = samples != 0 - nonzero_count = nonzero_mask.sum(dtype="uint32") + # Open the SEG-Y file in every new process / spawned worker since the + # open file handles cannot be shared across processes. + segy_file = SegyFile(**segy_kw) - if nonzero_count == 0: - return None + not_null = grid_map != UINT32_MAX - data_array.set_basic_selection(selection=chunk_indices, value=tmp_data) + live_trace_indexes = grid_map[not_null].tolist() + traces = segy_file.trace[live_trace_indexes] - # Calculate statistics - tmp_data = samples[nonzero_mask] - chunk_sum = tmp_data.sum(dtype="float64") - chunk_sum_squares = np.square(tmp_data, dtype="float64").sum() - min_val = tmp_data.min() - max_val = tmp_data.max() + # Get subset of the dataset that has not yet been saved + # The headers might not be present in the dataset + # TODO(Dmitriy Repin): Check, should we overwrite the 'dataset' instead to save the memory + # https://github.com/TGSAI/mdio-python/issues/584 + if "headers" in dataset.data_vars: + ds_to_write = dataset[[data_variable_name, "headers"]] + ds_to_write = ds_to_write.reset_coords() - return nonzero_count, chunk_sum, chunk_sum_squares, min_val, max_val + ds_to_write["headers"].data[not_null] = traces.header + ds_to_write["headers"].data[~not_null] = 0 + else: + ds_to_write = dataset[[data_variable_name]] + ds_to_write = ds_to_write.reset_coords() + + ds_to_write[data_variable_name].data[not_null] = traces.sample + + out_path = output_location.uri + ds_to_write.to_zarr(out_path, region=region, mode="r+", write_empty_chunks=False, zarr_format=2) + + histogram = CenteredBinHistogram(bin_centers=[], counts=[]) + return SummaryStatistics( + count=traces.sample.size, + min=traces.sample.min(), + max=traces.sample.max(), + sum=traces.sample.sum(), + # TODO(Altay): Look at how to do the sum squares statistic correctly + # https://github.com/TGSAI/mdio-python/issues/581 + sum_squares=(traces.sample**2).sum(), + histogram=histogram, + ) diff --git a/src/mdio/segy/blocked_io.py b/src/mdio/segy/blocked_io.py index 6e23be1f..e92aed2e 100644 --- a/src/mdio/segy/blocked_io.py +++ b/src/mdio/segy/blocked_io.py @@ -2,9 +2,10 @@ from __future__ import annotations +import multiprocessing as mp import os from concurrent.futures import ProcessPoolExecutor -from itertools import repeat +from concurrent.futures import as_completed from pathlib import Path from typing import TYPE_CHECKING @@ -13,8 +14,12 @@ from dask.array import map_blocks from psutil import cpu_count from tqdm.auto import tqdm +from zarr import consolidate_metadata as zarr_consolidate_metadata +from zarr import open_group as zarr_open_group from mdio.core.indexing import ChunkIterator +from mdio.schemas.v1.stats import CenteredBinHistogram +from mdio.schemas.v1.stats import SummaryStatistics from mdio.segy._workers import trace_worker from mdio.segy.creation import SegyPartRecord from mdio.segy.creation import concat_files @@ -25,89 +30,119 @@ from numpy.typing import NDArray from segy import SegyFactory from segy import SegyFile + from xarray import Dataset as xr_Dataset + from zarr import Array as zarr_Array - from mdio.core import Grid + from mdio.core.storage_location import StorageLocation default_cpus = cpu_count(logical=True) -def to_zarr(segy_file: SegyFile, grid: Grid, data_array: Array, header_array: Array) -> dict: - """Blocked I/O from SEG-Y to chunked `zarr.core.Array`. +def _create_stats() -> SummaryStatistics: + histogram = CenteredBinHistogram(bin_centers=[], counts=[]) + return SummaryStatistics(count=0, min=0, max=0, sum=0, sum_squares=0, histogram=histogram) + + +def _update_stats(final_stats: SummaryStatistics, partial_stats: SummaryStatistics) -> None: + final_stats.count += partial_stats.count + final_stats.min = min(final_stats.min, partial_stats.min) + final_stats.max = min(final_stats.max, partial_stats.max) + final_stats.sum += partial_stats.sum + final_stats.sum_squares += partial_stats.sum_squares + + +def to_zarr( # noqa: PLR0913, PLR0915 + segy_file: SegyFile, + output_location: StorageLocation, + grid_map: zarr_Array, + dataset: xr_Dataset, + data_variable_name: str, +) -> None: + """Blocked I/O from SEG-Y to chunked `xarray.Dataset`. Args: segy_file: SEG-Y file instance. - grid: mdio.Grid instance - data_array: Handle for zarr.core.Array we are writing trace data - header_array: Handle for zarr.core.Array we are writing trace headers + output_location: StorageLocation for the output Zarr dataset + (e.g. local file path or cloud storage URI) the location + also includes storage options for cloud storage. + grid_map: Zarr array with grid map for the traces. + dataset: Handle for xarray.Dataset we are writing trace data + data_variable_name: Name of the data variable in the dataset. Returns: - Global statistics for the SEG-Y as a dictionary. + None """ - # Initialize chunk iterator (returns next chunk slice indices each iteration) - chunker = ChunkIterator(data_array, chunk_samples=False) - num_chunks = len(chunker) - + data = dataset[data_variable_name] + + final_stats = _create_stats() + + # Must use data.encoding.get instead of data.chunks + chunks_t_of_t = (data.encoding.get("chunks"),) + # Unroll tuple of tuples into a flat list + chunks = [c for sub_tuple in chunks_t_of_t for c in sub_tuple] + # We will not chunk traces (old option chunk_samples=False) + chunks[-1] = data.shape[-1] + dim_names = list(data.dims) + # Initialize chunk iterator + # Since the dimensions are provided, it will return a dict of slices + chunk_iter = ChunkIterator(shape=data.shape, chunks=chunks, dim_names=dim_names) + num_chunks = chunk_iter.num_chunks + + # The following could be extracted in a function to allow executor injection + # (e.g. for unit testing or for debugging with non-parallelized processing) + # def _create_executor(num_chunks: int)-> ProcessPoolExecutor: + + # For Unix async writes with s3fs/fsspec & multiprocessing, use 'spawn' instead of default + # 'fork' to avoid deadlocks on cloud stores. Slower but necessary. Default on Windows. num_cpus = int(os.getenv("MDIO__IMPORT__CPU_COUNT", default_cpus)) num_workers = min(num_chunks, num_cpus) - - # Chunksize here is for multiprocessing, not Zarr chunksize. - pool_chunksize, extra = divmod(num_chunks, num_workers * 4) - pool_chunksize += 1 if extra else pool_chunksize + context = mp.get_context("spawn") + executor = ProcessPoolExecutor(max_workers=num_workers, mp_context=context) + # return executor segy_kw = { "url": segy_file.fs.unstrip_protocol(segy_file.url), "spec": segy_file.spec, "settings": segy_file.settings, } - tqdm_kw = {"unit": "block", "dynamic_ncols": True} - with ProcessPoolExecutor(max_workers=num_workers) as executor: - lazy_work = executor.map( - trace_worker, # fn - repeat(segy_kw), - repeat(data_array), - repeat(header_array), - repeat(grid), - chunker, - chunksize=pool_chunksize, - ) - - lazy_work = tqdm( - iterable=lazy_work, + with executor: + futures = [] + common_args = (segy_kw, output_location, data_variable_name) + for region in chunk_iter: + index_slices = tuple(region[key] for key in data.dims[:-1]) + subset_args = ( + region, + grid_map[index_slices], + dataset.isel(region), + ) + future = executor.submit(trace_worker, *common_args, *subset_args) + futures.append(future) + + iterable = tqdm( + as_completed(futures), total=num_chunks, - desc=f"Ingesting SEG-Y in {num_chunks} chunks", - **tqdm_kw, + unit="block", + desc="Ingesting traces", ) - # This executes the lazy work. - chunk_stats = list(lazy_work) - - # This comes in as n_chunk x 5 columns. - # Columns in order: count, sum, sum of squared, min, max. - # We can compute global mean, std, rms, min, max. - # Transposing because we want each statistic as a row to unpack later. - # REF: https://math.stackexchange.com/questions/1547141/aggregating-standard-deviation-to-a-summary-point # noqa: E501 - # REF: https://www.mathwords.com/r/root_mean_square.htm - chunk_stats = [stat for stat in chunk_stats if stat is not None] - - chunk_stats = zip(*chunk_stats) # noqa: B905 - glob_count, glob_sum, glob_sum_square, glob_min, glob_max = chunk_stats - - glob_count = np.sum(glob_count) # Comes in as `uint32` - glob_sum = np.sum(glob_sum) # `float64` - glob_sum_square = np.sum(glob_sum_square) # `float64` - glob_min = np.min(glob_min) # `float32` - glob_max = np.max(glob_max) # `float32` - - glob_mean = glob_sum / glob_count - glob_std = np.sqrt(glob_sum_square / glob_count - (glob_sum / glob_count) ** 2) - glob_rms = np.sqrt(glob_sum_square / glob_count) - - # We need to write these as float64 because float32 is not JSON serializable - # Trace data is originally float32, hence min/max - glob_min = glob_min.min().astype("float64") - glob_max = glob_max.max().astype("float64") - - return {"mean": glob_mean, "std": glob_std, "rms": glob_rms, "min": glob_min, "max": glob_max} + for future in iterable: + result = future.result() + if result is not None: + _update_stats(final_stats, result) + + # Xarray doesn't directly support incremental attribute updates when appending to an + # existing Zarr store. + # HACK: We will update the array attribute using zarr's API directly. + # Open the Zarr store using zarr directly + zarr_group = zarr_open_group(output_location.uri, mode="a") + attr_json = final_stats.model_dump_json() + # Use the data_variable_name to get the array in the Zarr group + # and write "statistics" metadata there + zarr_group[data_variable_name].attrs.update({"statsV1": attr_json}) + # Consolidate metadata (important for Xarray to recognize changes) + zarr_consolidate_metadata(output_location.uri) + + return final_stats def segy_record_concat( diff --git a/src/mdio/segy/parsers.py b/src/mdio/segy/parsers.py index 810924fb..d46c61b3 100644 --- a/src/mdio/segy/parsers.py +++ b/src/mdio/segy/parsers.py @@ -21,8 +21,9 @@ default_cpus = cpu_count(logical=True) -def parse_index_headers( +def parse_headers( segy_file: SegyFile, + subset: list[str] | None = None, block_size: int = 10000, progress_bar: bool = True, ) -> HeaderArray: @@ -30,6 +31,7 @@ def parse_index_headers( Args: segy_file: SegyFile instance. + subset: List of header names to filter and keep. block_size: Number of traces to read for each block. progress_bar: Enable or disable progress bar. Default is True. @@ -57,7 +59,7 @@ def parse_index_headers( } tqdm_kw = {"unit": "block", "dynamic_ncols": True} with ProcessPoolExecutor(num_workers) as executor: - lazy_work = executor.map(header_scan_worker, repeat(segy_kw), trace_ranges) + lazy_work = executor.map(header_scan_worker, repeat(segy_kw), trace_ranges, repeat(subset)) if progress_bar is True: lazy_work = tqdm( diff --git a/src/mdio/segy/utilities.py b/src/mdio/segy/utilities.py index 031d8b49..f5677e96 100644 --- a/src/mdio/segy/utilities.py +++ b/src/mdio/segy/utilities.py @@ -12,20 +12,23 @@ from mdio.core import Dimension from mdio.segy.geometry import GridOverrider -from mdio.segy.parsers import parse_index_headers +from mdio.segy.parsers import parse_headers if TYPE_CHECKING: from numpy.typing import DTypeLike from segy import SegyFile from segy.arrays import HeaderArray + from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate + logger = logging.getLogger(__name__) def get_grid_plan( # noqa: C901 segy_file: SegyFile, - chunksize: list[int], + chunksize: tuple[int, ...] | None, + template: AbstractDatasetTemplate, return_headers: bool = False, grid_overrides: dict[str, Any] | None = None, ) -> tuple[list[Dimension], tuple[int, ...]] | tuple[list[Dimension], tuple[int, ...], HeaderArray]: @@ -40,6 +43,7 @@ def get_grid_plan( # noqa: C901 Args: segy_file: SegyFile instance. chunksize: Chunk sizes to be used in grid plan. + template: MDIO template where coordinate names and domain will be taken. return_headers: Option to return parsed headers with `Dimension` objects. Default is False. grid_overrides: Option to add grid overrides. See main documentation. @@ -49,37 +53,37 @@ def get_grid_plan( # noqa: C901 if grid_overrides is None: grid_overrides = {} - index_headers = parse_index_headers(segy_file=segy_file) - index_names = list(index_headers.dtype.names) - - dims = [] + # Keep only dimension and non-dimension coordinates excluding the vertical axis + horizontal_dimensions = template.dimension_names[:-1] + horizontal_coordinates = horizontal_dimensions + template.coordinate_names + headers_subset = parse_headers(segy_file=segy_file, subset=horizontal_coordinates) # Handle grid overrides. override_handler = GridOverrider() - index_headers, index_names, chunksize = override_handler.run( - index_headers, - index_names, + headers_subset, horizontal_coordinates, chunksize = override_handler.run( + headers_subset, + horizontal_coordinates, chunksize=chunksize, grid_overrides=grid_overrides, ) - for index_name in index_names: - dim_unique = np.unique(index_headers[index_name]) - dims.append(Dimension(coords=dim_unique, name=index_name)) + dimensions = [] + for dim_name in horizontal_dimensions: + dim_unique = np.unique(headers_subset[dim_name]) + dimensions.append(Dimension(coords=dim_unique, name=dim_name)) sample_labels = segy_file.sample_labels / 1000 # normalize if all(sample_labels.astype("int64") == sample_labels): sample_labels = sample_labels.astype("int64") - sample_dim = Dimension(coords=sample_labels, name="sample") - - dims.append(sample_dim) + vertical_dim = Dimension(coords=sample_labels, name=template.trace_domain) + dimensions.append(vertical_dim) if return_headers: - return dims, chunksize, index_headers + return dimensions, chunksize, headers_subset - return dims, chunksize + return dimensions, chunksize def find_trailing_ones_index(dim_blocks: tuple[int, ...]) -> int: diff --git a/tests/integration/test_segy_import_export.py b/tests/integration/test_segy_import_export.py index 9e3d0e1b..d9d61243 100644 --- a/tests/integration/test_segy_import_export.py +++ b/tests/integration/test_segy_import_export.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import os from typing import TYPE_CHECKING @@ -9,13 +10,21 @@ import numpy as np import numpy.testing as npt import pytest +import xarray as xr from segy import SegyFile +from segy.standards import get_segy_standard +from tests.integration.testing_helpers import customize_segy_specs +from tests.integration.testing_helpers import get_inline_header_values +from tests.integration.testing_helpers import get_values +from tests.integration.testing_helpers import validate_variable from mdio import MDIOReader from mdio import mdio_to_segy -from mdio.converters import segy_to_mdio from mdio.converters.exceptions import GridTraceSparsityError +from mdio.converters.segy import segy_to_mdio from mdio.core import Dimension +from mdio.core.storage_location import StorageLocation +from mdio.schemas.v1.templates.template_registry import TemplateRegistry from mdio.segy.compat import mdio_segy_spec from mdio.segy.geometry import StreamerShotGeometryType @@ -247,20 +256,30 @@ def test_import_6d_segy( # noqa: PLR0913 @pytest.mark.dependency -@pytest.mark.parametrize("index_bytes", [(17, 13)]) -@pytest.mark.parametrize("index_names", [("inline", "crossline")]) -def test_3d_import( +@pytest.mark.parametrize("index_bytes", [(17, 13, 81, 85)]) +@pytest.mark.parametrize("index_names", [("inline", "crossline", "cdp_x", "cdp_y")]) +@pytest.mark.parametrize("index_types", [("int32", "int32", "int32", "int32")]) +def test_3d_import_v1( segy_input: Path, zarr_tmp: Path, index_bytes: tuple[int, ...], index_names: tuple[str, ...], + index_types: tuple[str, ...], ) -> None: """Test importing a SEG-Y file to MDIO.""" - segy_to_mdio( - segy_path=segy_input.__str__(), - mdio_path_or_buffer=zarr_tmp.__str__(), + segy_spec = get_segy_standard(1.0) + segy_spec = customize_segy_specs( + segy_spec=segy_spec, index_bytes=index_bytes, index_names=index_names, + index_types=index_types, + ) + + segy_to_mdio( + segy_spec=segy_spec, + mdio_template=TemplateRegistry().get("PostStack3DTime"), + input_location=StorageLocation(segy_input.__str__()), + output_location=StorageLocation(zarr_tmp.__str__()), overwrite=True, ) @@ -269,55 +288,120 @@ def test_3d_import( class TestReader: """Test reader functionality.""" - def test_meta_read(self, zarr_tmp: Path) -> None: + def test_meta_dataset_read(self, zarr_tmp: Path) -> None: + """Metadata reading tests.""" + path = zarr_tmp.__str__() + # path = "/tmp/pytest-of-vscode/my-mdio/mdio0" + ds = xr.open_dataset(path, engine="zarr") + expected_attrs = { + "apiVersion": "1.0.0a1", + "createdOn": "2025-08-06 16:21:54.747880+00:00", + "name": "PostStack3DTime", + } + actual_attrs_json = ds.attrs + # compare one by one due to ever changing createdOn. For it, we only check existence + for key, value in expected_attrs.items(): + assert key in actual_attrs_json + if key == "createdOn": + assert actual_attrs_json[key] is not None + else: + assert actual_attrs_json[key] == value + + def test_meta_variable_read(self, zarr_tmp: Path) -> None: """Metadata reading tests.""" - mdio = MDIOReader(zarr_tmp.__str__()) - assert mdio.binary_header["samples_per_trace"] == 1501 # noqa: PLR2004 - assert mdio.binary_header["sample_interval"] == 2000 # noqa: PLR2004 + path = zarr_tmp.__str__() + # path = "/tmp/pytest-of-vscode/my-mdio/mdio0" + ds = xr.open_dataset(path, engine="zarr") + expected_attrs = { + "count": 97354860, + "sum": -8594.551666259766, + "sum_squares": 40571291.6875, + "min": -8.375323295593262, + "max": 0.0, + "histogram": {"counts": [], "bin_centers": []}, + } + actual_attrs_json = json.loads(ds["amplitude"].attrs["statsV1"]) + assert actual_attrs_json == expected_attrs def test_grid(self, zarr_tmp: Path) -> None: - """Grid reading tests.""" - mdio = MDIOReader(zarr_tmp.__str__()) - grid = mdio.grid + """Test validating MDIO variables.""" + # Load Xarray dataset from the MDIO file + path = zarr_tmp.__str__() + # path = "/tmp/pytest-of-vscode/my-mdio/mdio0" + ds = xr.open_dataset(path, engine="zarr") + + # Note: in order to create the dataset we used the Time template, so the + # sample dimension is called "time" + + # Validate the dimension coordinate variables + validate_variable(ds, "inline", (345,), ["inline"], np.int32, range(1, 346), get_values) + validate_variable( + ds, "crossline", (188,), ["crossline"], np.int32, range(1, 189), get_values + ) + validate_variable(ds, "time", (1501,), ["time"], np.int32, range(0, 3002, 2), get_values) - assert grid.select_dim("inline") == Dimension(range(1, 346), "inline") - assert grid.select_dim("crossline") == Dimension(range(1, 189), "crossline") - assert grid.select_dim("sample") == Dimension(range(0, 3002, 2), "sample") + # Validate the non-dimensional coordinate variables + validate_variable(ds, "cdp_x", (345, 188), ["inline", "crossline"], np.float64, None, None) + validate_variable(ds, "cdp_y", (345, 188), ["inline", "crossline"], np.float64, None, None) - def test_get_data(self, zarr_tmp: Path) -> None: - """Data retrieval tests.""" - mdio = MDIOReader(zarr_tmp.__str__()) + # Validate the headers + # We have a subset of headers since we used customize_segy_specs() providing the values only + # for "inline", "crossline", "cdp_x", "cdp_y" + data_type = np.dtype( + [("inline", " None: """Read and compare every 75 inlines' mean and std. dev.""" - mdio = MDIOReader(zarr_tmp.__str__()) - - inlines = mdio[::75, :, :] + path = zarr_tmp.__str__() + # path = "/tmp/pytest-of-vscode/my-mdio/mdio0" + ds = xr.open_dataset(path, engine="zarr") + inlines = ds["amplitude"][::75, :, :] mean, std = inlines.mean(), inlines.std() - npt.assert_allclose([mean, std], [1.0555277e-04, 6.0027051e-01]) def test_crossline(self, zarr_tmp: Path) -> None: """Read and compare every 75 crosslines' mean and std. dev.""" - mdio = MDIOReader(zarr_tmp.__str__()) - - xlines = mdio[:, ::75, :] + path = zarr_tmp.__str__() + # path = "/tmp/pytest-of-vscode/my-mdio/mdio0" + ds = xr.open_dataset(path, engine="zarr") + xlines = ds["amplitude"][:, ::75, :] mean, std = xlines.mean(), xlines.std() npt.assert_allclose([mean, std], [-5.0329847e-05, 5.9406823e-01]) def test_zslice(self, zarr_tmp: Path) -> None: """Read and compare every 225 z-slices' mean and std. dev.""" - mdio = MDIOReader(zarr_tmp.__str__()) - - slices = mdio[:, :, ::225] + path = zarr_tmp.__str__() + # path = "/tmp/pytest-of-vscode/my-mdio/mdio0" + ds = xr.open_dataset(path, engine="zarr") + slices = ds["amplitude"][:, :, ::225] mean, std = slices.mean(), slices.std() - npt.assert_allclose([mean, std], [0.005236923, 0.61279935]) diff --git a/tests/integration/testing_helpers.py b/tests/integration/testing_helpers.py new file mode 100644 index 00000000..4df6bdad --- /dev/null +++ b/tests/integration/testing_helpers.py @@ -0,0 +1,69 @@ +"""This module provides testing helpers for integration testing.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np +from segy.schema import HeaderField +from segy.schema import SegySpec + +if TYPE_CHECKING: + from collections.abc import Callable + + import xarray as xr + + +def customize_segy_specs( + segy_spec: SegySpec, + index_bytes: tuple[int, ...] | None = None, + index_names: tuple[int, ...] | None = None, + index_types: tuple[int, ...] | None = None, +) -> SegySpec: + """Customize SEG-Y specifications with user-defined index fields.""" + if not index_bytes: + # No customization + return segy_spec + + index_names = index_names or [f"dim_{i}" for i in range(len(index_bytes))] + index_types = index_types or ["int32"] * len(index_bytes) + + if not (len(index_names) == len(index_bytes) == len(index_types)): + err = "All index fields must have the same length." + raise ValueError(err) + + # Index the dataset using a spec that interprets the user provided index headers. + index_fields = [] + for name, byte, format_ in zip(index_names, index_bytes, index_types, strict=True): + index_fields.append(HeaderField(name=name, byte=byte, format=format_)) + + return segy_spec.customize(trace_header_fields=index_fields) + + +def get_values(arr: xr.DataArray) -> np.ndarray: + """Extract actual values from an Xarray DataArray.""" + return arr.values + + +def get_inline_header_values(dataset: xr.Dataset) -> np.ndarray: + """Extract a specific header value from an Xarray DataArray.""" + return dataset["inline"].values + + +def validate_variable( # noqa PLR0913 + dataset: xr.Dataset, + name: str, + shape: list[int], + dims: list[str], + data_type: np.dtype, + expected_values: range | None, + actual_value_generator: Callable, +) -> None: + """Validate the properties of a variable in an Xarray dataset.""" + arr = dataset[name] + assert shape == arr.shape + assert set(dims) == set(arr.dims) + assert data_type == arr.dtype + if expected_values is not None and actual_value_generator is not None: + actual_values = actual_value_generator(arr) + assert np.array_equal(expected_values, actual_values) diff --git a/tests/integration/v1/test_segy_to_mdio_v1.py b/tests/integration/v1/test_segy_to_mdio_v1.py new file mode 100644 index 00000000..e44af6cb --- /dev/null +++ b/tests/integration/v1/test_segy_to_mdio_v1.py @@ -0,0 +1,354 @@ +"""End to end testing for SEG-Y to MDIO conversion v1.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numcodecs +import numpy as np +import pytest +import xarray as xr +import zarr +from segy.standards import get_segy_standard +from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding + +from mdio.converters.segy import segy_to_mdio +from mdio.converters.type_converter import to_numpy_dtype +from mdio.core.storage_location import StorageLocation +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredField +from mdio.schemas.dtype import StructuredType +from mdio.schemas.v1.templates.template_registry import TemplateRegistry + +if TYPE_CHECKING: + from collections.abc import Callable + + +def _slice_three_values(dims: tuple[int], values_from_start: bool) -> tuple[slice, ...]: + if values_from_start: + slices = tuple([slice(0, 3) for _ in range(len(dims))]) + else: + slices = tuple([slice(-3, None) for _ in range(len(dims))]) + return slices + + +def _get_actual_value(arr: xr.DataArray) -> np.ndarray: + return arr.values[_slice_three_values(arr.shape, values_from_start=True)] + + +def _validate_variable( # noqa PLR0913 + dataset: xr.Dataset, + name: str, + shape: list[int], + dims: list[str], + data_type: np.dtype, + expected_values: range | None, + actual_func: Callable, +) -> None: + arr = dataset[name] + assert shape == arr.shape + assert set(dims) == set(arr.dims) + assert data_type == arr.dtype + # Validate first/last values + # actual_values = d.values[_slice_three_values(shape, values_from_start)] + actual_values = actual_func(arr) + assert np.array_equal(expected_values, actual_values) + + +def test_segy_to_mdio_v1__f3() -> None: + """Test the SEG-Y to MDIO conversion for the f3 equinor/segyio dataset.""" + # The f3 dataset comes from + # equinor/segyio (https://github.com/equinor/segyio) project (GNU LGPL license) + # wget https://github.com/equinor/segyio/blob/main/test-data/f3.sgy + + pref_path = "/DATA/equinor-segyio/f3.sgy" + mdio_path = f"{pref_path}_mdio_v1" + + segy_to_mdio( + segy_spec=get_segy_standard(1.0), + mdio_template=TemplateRegistry().get("PostStack3DTime"), + input_location=StorageLocation(pref_path), + output_location=StorageLocation(mdio_path), + overwrite=True, + ) + + # Load Xarray dataset from the MDIO file + ds = xr.open_dataset(mdio_path, engine="zarr") + + # Tests "inline" variable + expected = np.array([111, 112, 113]) + _validate_variable(ds, "inline", (23,), ["inline"], np.int32, expected, _get_actual_value) + + # Tests "crossline" variable + expected = np.array([875, 876, 877]) + _validate_variable(ds, "crossline", (18,), ["crossline"], np.int32, expected, _get_actual_value) + + # Tests "time" variable + expected = np.array([0, 4, 8]) + _validate_variable(ds, "time", (75,), ["time"], np.int64, expected, _get_actual_value) + + # Tests "cdp_x" variable + expected = np.array( + [[6201972, 6202222, 6202472], [6201965, 6202215, 6202465], [6201958, 6202208, 6202458]] + ) + _validate_variable( + ds, "cdp_x", (23, 18), ["inline", "crossline"], np.int32, expected, _get_actual_value + ) + + # Tests "cdp_y" variable + expected = np.array( + [ + [60742329, 60742336, 60742343], + [60742579, 60742586, 60742593], + [60742828, 60742835, 60742842], + ] + ) + _validate_variable( + ds, "cdp_y", (23, 18), ["inline", "crossline"], np.int32, expected, _get_actual_value + ) + + # Tests "headers" variable + data_type = np.dtype( + [ + ("trace_seq_num_line", " np.ndarray: + cdp_x_headers = arr.values["cdp_x"] + return cdp_x_headers[_slice_three_values(arr.shape, values_from_start=True)] + + _validate_variable( + ds, "headers", (23, 18), ["inline", "crossline"], data_type, expected, get_actual_headers + ) + + # Tests "trace_mask" variable + expected = np.array([[True, True, True], [True, True, True], [True, True, True]]) + _validate_variable( + ds, "trace_mask", (23, 18), ["inline", "crossline"], np.bool, expected, _get_actual_value + ) + + # Tests "amplitude" variable + expected = np.array( + [ + [[487.0, -1104.0, -1456.0], [-129.0, -1728.0, 445.0], [-1443.0, 741.0, 1458.0]], + [[2464.0, 3220.0, 1362.0], [686.0, 530.0, -282.0], [3599.0, 2486.0, 433.0]], + [[4018.0, 5159.0, 2087.0], [-81.0, -3039.0, -1850.0], [2898.0, 1060.0, -121.0]], + ] + ) + + def get_actual_amplitudes(arr: xr.DataArray) -> np.ndarray: + return arr.values[_slice_three_values(arr.shape, values_from_start=False)] + + _validate_variable( + ds, + "amplitude", + (23, 18, 75), + ["inline", "crossline", "time"], + np.float32, + expected, + get_actual_amplitudes, + ) + + +@pytest.mark.skip(reason="Bug reproducer for the issue 582") +def test_bug_reproducer_structured_xr_to_zar() -> None: + """Bug reproducer for the issue https://github.com/TGSAI/mdio-python/issues/582. + + Will be removed in the when the bug is fixed + """ + shape = (4, 4, 2) + dim_names = ["inline", "crossline", "depth"] + chunks = (2, 2, 2) + # Pretend that we created a pydantic model from a template + structured_type = StructuredType( + fields=[ + StructuredField(name="cdp_x", format=ScalarType.INT32), + StructuredField(name="cdp_y", format=ScalarType.INT32), + StructuredField(name="elevation", format=ScalarType.FLOAT16), + StructuredField(name="some_scalar", format=ScalarType.FLOAT16), + ] + ) + + xr_dataset = xr.Dataset() + + # Add traces to the dataset, shape = (4, 4, 2) of floats + traces_zarr = zarr.zeros(shape=shape, dtype=np.float32, zarr_format=2) + traces_xr = xr.DataArray(traces_zarr, dims=dim_names) + traces_xr.encoding = { + "_FillValue": np.nan, + "chunks": chunks, + "chunk_key_encoding": V2ChunkKeyEncoding(separator="/").to_dict(), + "compressor": numcodecs.Blosc(cname="zstd", clevel=5, shuffle=1, blocksize=0), + } + xr_dataset["traces"] = traces_xr + + # Add headers to the dataset, shape = (4, 4) of structured type + data_type = to_numpy_dtype(structured_type) + + # Validate the conversion + assert data_type == np.dtype( + [("cdp_x", " trace_worker + + not_null = np.array( + [ + [True, False, False, False], + [False, True, False, False], + [False, False, True, False], + [False, False, False, True], + ] + ) + hdr = (11, 22, -33.0, 44.0) + headers = np.array([hdr, hdr, hdr, hdr], dtype=data_type) + trace = np.array( + [[100.0, 200.0], [300.0, 400.0], [500.0, 600.0], [700.0, 800.0]], dtype=np.float32 + ) + + # Here is one iteration of it: + ds_to_write = xr_dataset[["traces", "headers"]] + # We do not have any coords to reset + # ds_to_write = ds_to_write.reset_coords() + + ds_to_write["headers"].data[not_null] = headers + ds_to_write["headers"].data[~not_null] = 0 + ds_to_write["traces"].data[not_null] = trace + + region = { + "inline": slice(0, 2, None), + "crossline": slice(0, 2, None), + "depth": slice(0, 2, None), + } + + sub_dataset = ds_to_write.isel(region) + sub_dataset.to_zarr( + store="/tmp/reproducer_xr.zarr", # noqa: S108 + region=region, + mode="r+", + write_empty_chunks=False, + zarr_format=2, + ) diff --git a/tests/unit/test_compat.py b/tests/unit/test_compat.py deleted file mode 100644 index e1c950de..00000000 --- a/tests/unit/test_compat.py +++ /dev/null @@ -1,106 +0,0 @@ -"""Test MDIO compatibility with older versions.""" - -from pathlib import Path - -import numpy as np -import pytest -import zarr -from segy import SegyFile -from segy.factory import SegyFactory -from segy.standards import get_segy_standard - -from mdio import mdio_to_segy -from mdio import segy_to_mdio - -# Constants -MDIO_VERSIONS = ["0.7.4", "0.8.3"] -SEGY_REVISIONS = [0.0, 0.1, 1.0, 1.1] -INLINES = (10, 10, 11, 11) -CROSSLINES = (100, 101, 100, 101) -INDEX_BYTES = (189, 193) -API_VERSION_KEY = "api_version" -BINARY_HEADER_KEY = "binary_header" -CHUNKED_TRACE_HEADERS_KEY = "chunked_012_trace_headers" - - -def update_mdio_for_version_0_7_4(root_group: zarr.Group) -> None: - """Update MDIO metadata to mimic version 0.7.4.""" - # Update binary header revision keys - meta_group = root_group.require_group("metadata") - bin_hdr = meta_group.attrs[BINARY_HEADER_KEY] - bin_hdr["SEGYRevision"] = bin_hdr.pop("segy_revision_major") - bin_hdr["SEGYRevisionMinor"] = bin_hdr.pop("segy_revision_minor") - meta_group.attrs[BINARY_HEADER_KEY] = bin_hdr - - # Remove trace headers past field 232 (pre-0.8 schema) - orig_hdr = meta_group[CHUNKED_TRACE_HEADERS_KEY] - new_dtype = np.dtype(orig_hdr.dtype.descr[:-1]) - meta_group.create_array( - name=CHUNKED_TRACE_HEADERS_KEY, - shape=orig_hdr.shape, - dtype=new_dtype, - chunks=orig_hdr.chunks, - chunk_key_encoding={"name": "v2", "separator": "/"}, - overwrite=True, - ) - zarr.consolidate_metadata(meta_group.store) - - -@pytest.mark.parametrize("mdio_version", MDIO_VERSIONS) -@pytest.mark.parametrize("segy_revision", SEGY_REVISIONS) -def test_revision_encode_decode(mdio_version: str, segy_revision: float, tmp_path: Path) -> None: - """Test binary header major/minor revision roundtrip. - - After introducting TGSAI/segy, we changed the header names. Now we use - aliasing and MDIO has a dummy schema. The handling is slightly different - for SEG-Y revision major/minor numbers. Testing to ensure they're - (de)serialized correctly. - """ - rev1_spec = get_segy_standard(1.0) - segy_filename = tmp_path / "segy_input.sgy" - mdio_output_filename = tmp_path / "output.mdio" - roundtrip_sgy_filename = tmp_path / "roundtrip_output.sgy" - - # Make a rev1 segy - factory = SegyFactory(rev1_spec, sample_interval=1000, samples_per_trace=5) - - # We will replace the values in revision fields with these - minor, major = np.modf(segy_revision) - major, minor = int(major), int(minor * 10) - revision_code = (major << 8) | minor - - # Make fake tiny 3D dataset - txt_buffer = factory.create_textual_header() - - header = factory.create_trace_header_template(len(INLINES)) - data = factory.create_trace_sample_template(len(INLINES)) - header["inline"] = INLINES - header["crossline"] = CROSSLINES - data[:] = np.arange(len(INLINES))[:, None] - trace_buffer = factory.create_traces(header, data) - - # Update revision during bin hdr creation - bin_hdr_buffer = factory.create_binary_header(update={"segy_revision": revision_code}) - with segy_filename.open(mode="wb") as fp: - fp.write(txt_buffer) - fp.write(bin_hdr_buffer) - fp.write(trace_buffer) - - # Convert SEG-Y to MDIO - segy_to_mdio(str(segy_filename), str(mdio_output_filename), index_bytes=INDEX_BYTES) - - # Modify MDIO for specific versions - root = zarr.open_group(mdio_output_filename, mode="r+") - root.attrs[API_VERSION_KEY] = mdio_version - if mdio_version == "0.7.4": - update_mdio_for_version_0_7_4(root) - - # Convert MDIO back to SEG-Y - mdio_to_segy(str(mdio_output_filename), str(roundtrip_sgy_filename)) - - # Assert binary headers and revisions match - orig = SegyFile(segy_filename, spec=rev1_spec) - rt = SegyFile(roundtrip_sgy_filename, spec=rev1_spec) - assert orig.binary_header["segy_revision_major"] == major - assert orig.binary_header["segy_revision_minor"] == minor - assert orig.binary_header == rt.binary_header diff --git a/tests/unit/test_dimension.py b/tests/unit/test_dimension.py index fc40da22..e5645290 100644 --- a/tests/unit/test_dimension.py +++ b/tests/unit/test_dimension.py @@ -17,7 +17,7 @@ class TestDimension: def test_len(self, my_dimension: Dimension) -> None: """Test length method.""" - assert len(my_dimension) == 4 # noqa: PLR2004 + assert len(my_dimension) == 4 @pytest.mark.parametrize(("index", "expected"), [(1, 12), (-1, 16), (2, 14)]) def test_getitem(self, my_dimension: Dimension, index: int, expected: int) -> None: diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py new file mode 100644 index 00000000..0b2a9f54 --- /dev/null +++ b/tests/unit/test_schema.py @@ -0,0 +1,53 @@ +"""Test the schema for the v1 dataset.""" + +from mdio.schemas.v1 import Dataset as V1Dataset + +TEST_SCHEMA = { + "metadata": { + "name": "test_dataset", + "api_version": "1.0.0", + "created_on": "2023-01-01T00:00:00Z", + }, + "variables": [ + { + "name": "actual_variable", + "data_type": "float32", + "dimensions": ["dim0", "dim1"], + "compressor": {"name": "blosc", "level": 3}, + "coordinates": ["coord"], + "metadata": { + "chunk_grid": { + "name": "regular", + "configuration": {"chunk_shape": [10, 20]}, + }, + }, + }, + { + "name": "coord", + "data_type": "float32", + "dimensions": ["dim0", "dim1"], + "metadata": { + "chunk_grid": { + "name": "regular", + "configuration": {"chunk_shape": [10, 20]}, + }, + "units_v1": {"length": "m"}, + }, + }, + { + "name": "dim0", + "data_type": "int32", + "dimensions": [{"name": "dim0", "size": 100}], + }, + { + "name": "dim1", + "data_type": "int32", + "dimensions": [{"name": "dim1", "size": 200}], + }, + ], +} + + +def test_dataset_schema_validation() -> None: + """Test that the dataset schema validates correctly.""" + V1Dataset.model_validate(TEST_SCHEMA) diff --git a/tests/unit/v1/__init__.py b/tests/unit/v1/__init__.py new file mode 100644 index 00000000..fa2ea633 --- /dev/null +++ b/tests/unit/v1/__init__.py @@ -0,0 +1 @@ +"""Unit tests for parts of the MDIO package related to the v1 schema.""" diff --git a/tests/unit/v1/converters/test_type_converter.py b/tests/unit/v1/converters/test_type_converter.py new file mode 100644 index 00000000..31d72f15 --- /dev/null +++ b/tests/unit/v1/converters/test_type_converter.py @@ -0,0 +1,95 @@ +"""Unit tests for the type converter module.""" + +import pytest +from numpy import dtype as np_dtype + +from mdio.converters.type_converter import to_numpy_dtype +from mdio.converters.type_converter import to_scalar_type +from mdio.converters.type_converter import to_structured_type +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredField +from mdio.schemas.dtype import StructuredType + + +@pytest.fixture +def supported_scalar_types_map() -> tuple[ScalarType, str]: + """Supported scalar types and their numpy equivalents.""" + return ( + (ScalarType.INT8, "int8"), + (ScalarType.INT16, "int16"), + (ScalarType.INT32, "int32"), + (ScalarType.INT64, "int64"), + (ScalarType.UINT8, "uint8"), + (ScalarType.UINT16, "uint16"), + (ScalarType.UINT32, "uint32"), + (ScalarType.UINT64, "uint64"), + (ScalarType.FLOAT32, "float32"), + (ScalarType.FLOAT64, "float64"), + (ScalarType.COMPLEX64, "complex64"), + (ScalarType.COMPLEX128, "complex128"), + (ScalarType.BOOL, "bool"), + ) + + +@pytest.fixture +def a_structured_type() -> StructuredType: + """Sample structured type. + + Returns a structured type. + """ + return StructuredType( + fields=[ + StructuredField(name="x", format=ScalarType.FLOAT64), + StructuredField(name="y", format=ScalarType.FLOAT64), + StructuredField(name="z", format=ScalarType.FLOAT64), + StructuredField(name="id", format=ScalarType.INT32), + StructuredField(name="valid", format=ScalarType.BOOL), + ] + ) + + +def test_to_numpy_dtype( + supported_scalar_types_map: tuple[ScalarType, str], a_structured_type: StructuredType +) -> None: + """Comprehensive test for to_numpy_dtype function.""" + # Test 0: invalid input + err = "Expected ScalarType or StructuredType, got 'str'" + with pytest.raises(ValueError, match=err): + to_numpy_dtype("parameter of invalid type") + + # Test 1: ScalarType cases - all supported scalar types + for scalar_type, expected_numpy_type in supported_scalar_types_map: + result = to_numpy_dtype(scalar_type) + expected = np_dtype(expected_numpy_type) + assert result == expected + assert isinstance(result, np_dtype) + assert result.name == expected.name + + # Test 2: StructuredType with multiple fields + result_multi = to_numpy_dtype(a_structured_type) + expected_multi = np_dtype( + [("x", "float64"), ("y", "float64"), ("z", "float64"), ("id", "int32"), ("valid", "bool")] + ) + + assert result_multi == expected_multi + assert isinstance(result_multi, np_dtype) + assert len(result_multi.names) == 5 + assert set(result_multi.names) == {"x", "y", "z", "id", "valid"} + + +def test_to_scalar_type(supported_scalar_types_map: tuple[ScalarType, str]) -> None: + """Test for to_scalar_type function.""" + for expected_mdio_type, numpy_type in supported_scalar_types_map: + result = to_scalar_type(np_dtype(numpy_type)) + assert result == expected_mdio_type + + +def test_to_structured_type(a_structured_type: StructuredType) -> None: + """Test for to_structured_type function.""" + dtype = np_dtype( + [("x", "float64"), ("y", "float64"), ("z", "float64"), ("id", "int32"), ("valid", "bool")] + ) + assert a_structured_type == to_structured_type(dtype) + + dtype = np_dtype([("x", " None: + """Test the ChunkIterator class.""" + dims = ["inline", "crossline", "depth"] + chunks = (3, 4, 5) + + shape = (6, 12, 20) + iter1 = ChunkIterator(shape=shape, chunks=chunks, dim_names=dims) + assert iter1.arr_shape == shape + assert iter1.dims == dims + assert iter1.len_chunks == chunks + assert iter1.dim_chunks == (2, 3, 4) + assert iter1.num_chunks == 24 + + shape = (5, 11, 19) + iter2 = ChunkIterator(shape=shape, chunks=chunks, dim_names=dims) + assert iter2.dim_chunks == (2, 3, 4) + assert iter2.num_chunks == 24 + + # Its purpose is to confirm that all slices are created of the same size, + # even if the last slice should have been smaller. + for _ in range(13): # element index 12 + region = iter1.__next__() + assert region == { + "inline": slice(3, 6, None), + "crossline": slice(0, 4, None), + "depth": slice(0, 5, None), + } + + for _ in range(13): # element index 12 + region = iter2.__next__() + assert region == { + "inline": slice(3, 6, None), + "crossline": slice(0, 4, None), + "depth": slice(0, 5, None), + } + + +def test_chunk_iterator_returning_tuple() -> None: + """Test the ChunkIterator class.""" + chunks = (3, 4, 5) + + shape = (6, 12, 20) + iter1 = ChunkIterator(shape=shape, chunks=chunks) + assert iter1.arr_shape == shape + assert iter1.dims is None + assert iter1.len_chunks == chunks + assert iter1.dim_chunks == (2, 3, 4) + assert iter1.num_chunks == 24 + + shape = (5, 11, 19) + iter2 = ChunkIterator(shape=shape, chunks=chunks) + assert iter2.dim_chunks == (2, 3, 4) + assert iter2.num_chunks == 24 + + # Its purpose is to confirm that all slices are created of the same size, + # even if the last slice should have been smaller. + for _ in range(13): # element index 12 + region = iter1.__next__() + assert region == (slice(3, 6, None), slice(0, 4, None), slice(0, 5, None)) + + for _ in range(13): # element index 12 + region = iter2.__next__() + assert region == (slice(3, 6, None), slice(0, 4, None), slice(0, 5, None)) + + +def val(shape: tuple[int, int, int], i: int, j: int, k: int) -> int: + """Calculate the linear index in a 3D array.""" + return i * (shape[1] * shape[2]) + j * shape[2] + k + + +def mock_trace_worker( + shape: tuple[int, int, int], region: dict[str, slice], dataset: xr_Dataset, grid_map: np.array +) -> None: + """Mock trace worker function. + + Note: + Xarray, Zarr, and NumPy automatically truncates the slice to the valid bounds of the array + (see the test above, where the last chunk is always of the same size) + and does not raise an error. However, if one attempts to access an element at an index + that is out of bounds, you will get an IndexError + """ + # We used a 2D selection with 2D index_slices + assert grid_map.shape == (3, 4, 20) + # We used a 3D selection with isel() + assert tuple(dataset.dims[d] for d in region) == (3, 4, 5) + + dimension_names = list(dataset.dims) + + slice0 = region[dimension_names[0]] + slice1 = region[dimension_names[1]] + slice2 = region[dimension_names[2]] + for ii, i in enumerate(range(slice0.start, min(slice0.stop, shape[0]))): + for jj, j in enumerate(range(slice1.start, min(slice1.stop, shape[1]))): + for kk, k in enumerate(range(slice2.start, min(slice2.stop, shape[2]))): + # Validate that we've got the sample indexing right + assert dataset["amplitude"].values[ii, jj, kk] == val(shape, i, j, k) + # NOTE: grid_map is 2D, so we need to use k for the depth dimension + assert dataset["amplitude"].values[ii, jj, kk] == grid_map[ii, jj, k] + + +def test_chunk_iterator_with_dataset() -> None: + """Test the ChunkIterator with a dataset.""" + shape = (6, 12, 20) + dims = ["inline", "crossline", "depth"] + chunks = (3, 4, 5) + + data3 = np.arange(shape[0] * shape[1] * shape[2]).reshape(shape) + amplitude = xr_DataArray(data3, dims=dims, name="amplitude") + ds = xr_Dataset({"amplitude": amplitude}) + + chunk_iter = ChunkIterator(shape, chunks, dims) + for region in chunk_iter: + # If one needs both a dict and a tuple of slices, + # one can use the following line an example to strip dim names out + index_slices = tuple(region[key] for key in dims[:-1]) + # The .isel() method takes keyword arguments, region, where each keyword corresponds + # to a dimension name and the value is an integer, a slice object (our case), + # or an array-like object + mock_trace_worker(shape, region, ds.isel(region), amplitude[index_slices]) diff --git a/tests/unit/v1/core/test_storage_location.py b/tests/unit/v1/core/test_storage_location.py new file mode 100644 index 00000000..4f18e93a --- /dev/null +++ b/tests/unit/v1/core/test_storage_location.py @@ -0,0 +1,132 @@ +"""Unit tests for StorageLocation class.""" + +import tempfile +from pathlib import Path +from unittest.mock import MagicMock +from unittest.mock import Mock +from unittest.mock import patch + +import pytest + +from mdio.core.storage_location import StorageLocation + + +class TestStorageLocation: + """Test cases for StorageLocation class.""" + + @patch("fsspec.filesystem") + def test_exists(self, mock_filesystem: MagicMock, capsys: pytest.CaptureFixture[str]) -> None: + """Test the exists() method of StorageLocation.""" + # Test exists() returns True when file exists. + mock_fs = Mock() + mock_fs.exists.return_value = True + mock_filesystem.return_value = mock_fs + location = StorageLocation("/test/existing/file") + result = location.exists() + assert result is True + mock_fs.exists.assert_called_once() + + # Test exists() returns False when file does not exist. + mock_fs = Mock() + mock_fs.exists.return_value = False + mock_filesystem.return_value = mock_fs + location = StorageLocation("/test/nonexistent/file") + result = location.exists() + assert result is False + mock_fs.exists.assert_called_once() + + # Test exists() handles exceptions gracefully. + mock_fs = Mock() + mock_fs.exists.side_effect = Exception("Connection failed") + mock_filesystem.return_value = mock_fs + location = StorageLocation("s3://bucket/file") + result = location.exists() + assert result is False + captured = capsys.readouterr() + assert "Error checking existence of s3://bucket/file: Connection failed" in captured.out + + def test_representations(self) -> None: + """Test string and developer representations of StorageLocation.""" + # Test string representation of StorageLocation. + location = StorageLocation("/test/path") + assert str(location) == "/test/path" + + # Test developer representation of StorageLocation. + + uri = "s3://my-bucket/file.segy" + options = {"region": "us-west-2"} + location = StorageLocation(uri=uri, options=options) + expected = ( + "StorageLocation(uri='s3://my-bucket/file.segy', options={'region': 'us-west-2'})" + ) + assert repr(location) == expected + + def test_from_path(self) -> None: + """Test from_path class method.""" + # Test with string path. + path_str = "/home/user/data.segy" + location = StorageLocation(path_str) + # Should resolve to absolute path + expected_path = str(Path(path_str).resolve()) + assert location.uri == expected_path + assert location.options == {} + + # Test with path uri path. + location = StorageLocation(f"file://{path_str}") + # Should resolve to absolute path + expected_path = str(Path(path_str).resolve()) + assert location.uri == expected_path + assert location.options == {} + + # Test with real local file operations. + # Create a temporary file for testing + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + temp_path = Path(temp_file.name) + temp_file.write(b"test content") + try: + # Test with real local file + location = StorageLocation(str(temp_path)) + # Should exist + assert location.exists() is True + # Should have correct URI + assert location.uri == str(temp_path.resolve()) + finally: + # Clean up + temp_path.unlink() + # Now should not exist + assert location.exists() is False + + def test_from_cloud(self) -> None: + """Test class for cloud storage URIs.""" + # Test from_s3 without options. + s3_uri = "s3://bucket/file" + location = StorageLocation(s3_uri) + assert location.uri == s3_uri + assert location.options == {} + + # Test from_s3 with valid S3 URI. + s3_uri = "s3://my-bucket/path/to/file.segy" + options = {"region": "us-west-2", "aws_access_key_id": "key123"} + location = StorageLocation(s3_uri, options=options) + assert location.uri == s3_uri + assert location.options == options + + def test_options_immutability(self) -> None: + """Test that options property returns a defensive copy.""" + original_options = {"region": "us-east-1", "timeout": 30} + location = StorageLocation(uri="s3://bucket/file", options=original_options) + + # Get options through property + returned_options = location.options + + # Verify it's equal to original + assert returned_options == original_options + + # Modify the returned dict + returned_options["new_key"] = "new_value" + returned_options["timeout"] = 60 + + # Original should be unchanged + assert location.options == original_options + assert "new_key" not in location.options + assert location.options["timeout"] == 30 diff --git a/tests/unit/v1/helpers.py b/tests/unit/v1/helpers.py new file mode 100644 index 00000000..ca0383d0 --- /dev/null +++ b/tests/unit/v1/helpers.py @@ -0,0 +1,276 @@ +"""Helper methods used in unit tests.""" + +from mdio.schemas.chunk_grid import RegularChunkGrid +from mdio.schemas.chunk_grid import RegularChunkShape +from mdio.schemas.compressors import Blosc +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredField +from mdio.schemas.dtype import StructuredType +from mdio.schemas.metadata import ChunkGridMetadata +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.dataset import Dataset +from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder +from mdio.schemas.v1.dataset_builder import _BuilderState +from mdio.schemas.v1.dataset_builder import _get_named_dimension +from mdio.schemas.v1.stats import CenteredBinHistogram +from mdio.schemas.v1.stats import StatisticsMetadata +from mdio.schemas.v1.stats import SummaryStatistics +from mdio.schemas.v1.units import AllUnits +from mdio.schemas.v1.units import LengthUnitEnum +from mdio.schemas.v1.units import LengthUnitModel +from mdio.schemas.v1.units import SpeedUnitEnum +from mdio.schemas.v1.units import SpeedUnitModel +from mdio.schemas.v1.variable import Coordinate +from mdio.schemas.v1.variable import Variable + + +def validate_builder( + builder: MDIODatasetBuilder, state: _BuilderState, n_dims: int, n_coords: int, n_var: int +) -> None: + """Validate the state of the builder, the number of dimensions, coordinates, and variables.""" + assert builder._state == state + assert len(builder._dimensions) == n_dims + assert len(builder._coordinates) == n_coords + assert len(builder._variables) == n_var + + +def validate_coordinate( + builder: MDIODatasetBuilder, name: str, dims: list[tuple[str, int]], dtype: ScalarType +) -> Coordinate: + """Validate existence and the structure of the created coordinate.""" + # Validate that coordinate exists + c = next((c for c in builder._coordinates if c.name == name), None) + assert c is not None + assert isinstance(c, Coordinate) + + # Validate that dimensions are stored as NamedDimensions + for d in dims: + name = d[0] + size = d[1] + assert _get_named_dimension(c.dimensions, name, size) is not None + + assert c.data_type == dtype + return c + + +def validate_variable( + container: MDIODatasetBuilder | Dataset, + name: str, + dims: list[tuple[str, int]], + coords: list[str], + dtype: ScalarType, +) -> Variable: + """Validate existence and the structure of the created variable.""" + if isinstance(container, MDIODatasetBuilder): + var_list = container._variables + global_coord_list = container._coordinates + elif isinstance(container, Dataset): + var_list = container.variables + global_coord_list = _get_all_coordinates(container) + else: + err_msg = f"Expected MDIODatasetBuilder or Dataset, got {type(container)}" + raise TypeError(err_msg) + + # Validate that the variable exists + v = next((e for e in var_list if e.name == name), None) + assert v is not None + assert isinstance(v, Variable) + + # Validate that dimensions are stored as NamedDimensions within the variable + assert len(v.dimensions) == len(dims) + for d in dims: + name = d[0] + size = d[1] + assert _get_named_dimension(v.dimensions, name, size) is not None + + # Validate that coordinates are either embedded or can be resolved from names to Coordinate + if coords is None: + assert v.coordinates is None + else: + assert len(v.coordinates) == len(coords) + for coord_name in coords: + assert _get_coordinate(global_coord_list, v.coordinates, coord_name) is not None + + assert v.data_type == dtype + return v + + +def _get_coordinate( + global_coord_list: list[Coordinate], + coordinates_or_references: list[Coordinate] | list[str], + name: str, +) -> Coordinate | None: + """Get a coordinate by name from the list[Coordinate] | list[str]. + + The function validates that the coordinate referenced by the name can be found + in the global coordinate list. + If the coordinate is stored as a Coordinate object, it is returned directly. + """ + if coordinates_or_references is None: + return None + + for c in coordinates_or_references: + if isinstance(c, str) and c == name: + # The coordinate is stored by name (str). + cc = None + # Find the Coordinate in the global list and return it. + if global_coord_list is not None: + cc = next((cc for cc in global_coord_list if cc.name == name), None) + if cc is None: + msg = f"Pre-existing coordinate named {name!r} is not found" + raise ValueError(msg) + return cc + if isinstance(c, Coordinate) and c.name == name: + # The coordinate is stored as an embedded Coordinate object. + # Return it. + return c + + return None + + +def _get_all_coordinates(dataset: Dataset) -> list[Coordinate]: + """Get all coordinates from the dataset.""" + all_coords: dict[str, Coordinate] = {} + for v in dataset.variables: + if v.coordinates is not None: + for c in v.coordinates: + if isinstance(c, Coordinate) and c.name not in all_coords: + all_coords[c.name] = c + return list(all_coords.values()) + + +def output_path(file_dir: str, file_name: str, debugging: bool = False) -> str: + """Generate the output path for the test file-system output. + + Note: + Use debugging=True, if you need to retain the created files for debugging + purposes. Otherwise, the files will be created in-memory and not saved to disk. + """ + if debugging: + # Use the following for debugging: + file_path = f"{file_dir}/mdio-tests/{file_name}.zarr" + else: + # Use the following for normal runs: + file_path = f"memory://path_to_zarr/mdio-tests/{file_name}.zarr" + return file_path + + +def make_seismic_poststack_3d_acceptance_dataset(dataset_name: str) -> Dataset: + """Create in-memory Seismic PostStack 3D Acceptance dataset.""" + ds = MDIODatasetBuilder( + dataset_name, + attributes=UserAttributes( + attributes={ + "textHeader": [ + "C01 .......................... ", + "C02 .......................... ", + "C03 .......................... ", + ], + "foo": "bar", + } + ), + ) + + # Add dimensions + ds.add_dimension("inline", 256) + ds.add_dimension("crossline", 512) + ds.add_dimension("depth", 384) + ds.add_coordinate("inline", dimensions=["inline"], data_type=ScalarType.UINT32) + ds.add_coordinate("crossline", dimensions=["crossline"], data_type=ScalarType.UINT32) + ds.add_coordinate( + "depth", + dimensions=["depth"], + data_type=ScalarType.UINT32, + metadata_info=[AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER))], + ) + # Add coordinates + ds.add_coordinate( + "cdp_x", + dimensions=["inline", "crossline"], + data_type=ScalarType.FLOAT32, + metadata_info=[AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER))], + ) + ds.add_coordinate( + "cdp_y", + dimensions=["inline", "crossline"], + data_type=ScalarType.FLOAT32, + metadata_info=[AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER))], + ) + + # Add image variable + ds.add_variable( + name="image", + dimensions=["inline", "crossline", "depth"], + data_type=ScalarType.FLOAT32, + compressor=Blosc(algorithm="zstd"), + coordinates=["cdp_x", "cdp_y"], + metadata_info=[ + ChunkGridMetadata( + chunk_grid=RegularChunkGrid( + configuration=RegularChunkShape(chunk_shape=[128, 128, 128]) + ) + ), + StatisticsMetadata( + stats_v1=SummaryStatistics( + count=100, + sum=1215.1, + sumSquares=125.12, + min=5.61, + max=10.84, + histogram=CenteredBinHistogram(binCenters=[1, 2], counts=[10, 15]), + ) + ), + UserAttributes(attributes={"fizz": "buzz"}), + ], + ) + # Add velocity variable + ds.add_variable( + name="velocity", + dimensions=["inline", "crossline", "depth"], + data_type=ScalarType.FLOAT16, + coordinates=["cdp_x", "cdp_y"], + metadata_info=[ + ChunkGridMetadata( + chunk_grid=RegularChunkGrid( + configuration=RegularChunkShape(chunk_shape=[128, 128, 128]) + ) + ), + AllUnits(units_v1=SpeedUnitModel(speed=SpeedUnitEnum.METER_PER_SECOND)), + ], + ) + # Add inline-optimized image variable + ds.add_variable( + name="image_inline", + long_name="inline optimized version of 3d_stack", + dimensions=["inline", "crossline", "depth"], + data_type=ScalarType.FLOAT32, + compressor=Blosc(algorithm="zstd"), + coordinates=["cdp_x", "cdp_y"], + metadata_info=[ + ChunkGridMetadata( + chunk_grid=RegularChunkGrid( + configuration=RegularChunkShape(chunk_shape=[4, 512, 512]) + ) + ) + ], + ) + # Add headers variable with structured dtype + ds.add_variable( + name="image_headers", + dimensions=["inline", "crossline"], + coordinates=["cdp_x", "cdp_y"], + data_type=StructuredType( + fields=[ + StructuredField(name="cdp_x", format=ScalarType.INT32), + StructuredField(name="cdp_y", format=ScalarType.INT32), + StructuredField(name="elevation", format=ScalarType.FLOAT16), + StructuredField(name="some_scalar", format=ScalarType.FLOAT16), + ] + ), + metadata_info=[ + ChunkGridMetadata( + chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=[128, 128])) + ) + ], + ) + return ds.build() diff --git a/tests/unit/v1/templates/conftest.py b/tests/unit/v1/templates/conftest.py new file mode 100644 index 00000000..1a3e5e09 --- /dev/null +++ b/tests/unit/v1/templates/conftest.py @@ -0,0 +1,21 @@ +"""Unit tests for the conftest module in the templates directory.""" + +# conftest.py +import pytest + +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredField +from mdio.schemas.dtype import StructuredType + + +@pytest.fixture(scope="session") +def structured_headers() -> StructuredType: + """Fixture to provide structured headers for testing.""" + return StructuredType( + fields=[ + StructuredField(name="cdp_x", format=ScalarType.INT32), + StructuredField(name="cdp_y", format=ScalarType.INT32), + StructuredField(name="elevation", format=ScalarType.FLOAT16), + StructuredField(name="some_scalar", format=ScalarType.FLOAT16), + ] + ) diff --git a/tests/unit/v1/templates/test_seismic_2d_poststack.py b/tests/unit/v1/templates/test_seismic_2d_poststack.py new file mode 100644 index 00000000..89b5d065 --- /dev/null +++ b/tests/unit/v1/templates/test_seismic_2d_poststack.py @@ -0,0 +1,228 @@ +"""Unit tests for Seismic2DPostStackTemplate.""" + +from tests.unit.v1.helpers import validate_variable + +from mdio.schemas.chunk_grid import RegularChunkGrid +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType +from mdio.schemas.v1.dataset import Dataset +from mdio.schemas.v1.templates.seismic_2d_poststack import Seismic2DPostStackTemplate +from mdio.schemas.v1.units import AllUnits +from mdio.schemas.v1.units import LengthUnitEnum +from mdio.schemas.v1.units import LengthUnitModel +from mdio.schemas.v1.units import TimeUnitEnum +from mdio.schemas.v1.units import TimeUnitModel + +_UNIT_METER = AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) +_UNIT_SECOND = AllUnits(units_v1=TimeUnitModel(time=TimeUnitEnum.SECOND)) + + +def _validate_coordinates_headers_trace_mask( + dataset: Dataset, headers: StructuredType, domain: str +) -> None: + """Validate the coordinate, headers, trace_mask variables in the dataset.""" + # Verify variables + # 2 dim coords + 2 non-dim coords + 1 data + 1 trace mask + 1 headers = 6 variables + assert len(dataset.variables) == 7 + + # Verify trace headers + validate_variable( + dataset, + name="headers", + dims=[("cdp", 2048)], + coords=["cdp_x", "cdp_y"], + dtype=headers, + ) + + validate_variable( + dataset, + name="trace_mask", + dims=[("cdp", 2048)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.BOOL, + ) + + # Verify dimension coordinate variables + inline = validate_variable( + dataset, + name="cdp", + dims=[("cdp", 2048)], + coords=["cdp"], + dtype=ScalarType.INT32, + ) + assert inline.metadata is None + + domain = validate_variable( + dataset, + name=domain, + dims=[(domain, 4096)], + coords=[domain], + dtype=ScalarType.INT32, + ) + assert domain.metadata is None + + # Verify non-dimension coordinate variables + cdp_x = validate_variable( + dataset, + name="cdp_x", + dims=[("cdp", 2048)], + coords=["cdp_x"], + dtype=ScalarType.FLOAT64, + ) + assert cdp_x.metadata.units_v1.length == LengthUnitEnum.METER + + cdp_y = validate_variable( + dataset, + name="cdp_y", + dims=[("cdp", 2048)], + coords=["cdp_y"], + dtype=ScalarType.FLOAT64, + ) + assert cdp_y.metadata.units_v1.length == LengthUnitEnum.METER + + +class TestSeismic2DPostStackTemplate: + """Unit tests for Seismic2DPostStackTemplate.""" + + def test_configuration_depth(self) -> None: + """Test configuration of Seismic2DPostStackTemplate with depth domain.""" + t = Seismic2DPostStackTemplate("depth") + + # Template attributes + assert t._trace_domain == "depth" + assert t._coord_dim_names == ["cdp"] + assert t._dim_names == ["cdp", "depth"] + assert t._coord_names == ["cdp_x", "cdp_y"] + assert t._var_chunk_shape == [1024, 1024] + + # Variables instantiated when build_dataset() is called + assert t._builder is None + assert t._dim_sizes == [] + assert t._horizontal_coord_unit is None + + # Verify dataset attributes + attrs = t._load_dataset_attributes() + assert attrs.attributes == { + "surveyDimensionality": "2D", + "ensembleType": "line", + "processingStage": "post-stack", + } + + assert t.trace_variable_name == "amplitude" + + def test_configuration_time(self) -> None: + """Test configuration of Seismic2DPostStackTemplate with time domain.""" + t = Seismic2DPostStackTemplate("time") + + # Template attributes + assert t._trace_domain == "time" + assert t._coord_dim_names == ["cdp"] + assert t._dim_names == ["cdp", "time"] + assert t._coord_names == ["cdp_x", "cdp_y"] + assert t._var_chunk_shape == [1024, 1024] + + # Variables instantiated when build_dataset() is called + assert t._builder is None + assert t._dim_sizes == [] + assert t._horizontal_coord_unit is None + + # Verify dataset attributes + attrs = t._load_dataset_attributes() + assert attrs.attributes == { + "surveyDimensionality": "2D", + "ensembleType": "line", + "processingStage": "post-stack", + } + assert t.trace_variable_name == "amplitude" + + def test_domain_case_handling(self) -> None: + """Test that domain parameter handles different cases correctly.""" + # Test uppercase + t1 = Seismic2DPostStackTemplate("ELEVATION") + assert t1._trace_domain == "elevation" + assert t1.name == "PostStack2DElevation" + + # Test mixed case + t2 = Seismic2DPostStackTemplate("elevatioN") + assert t2._trace_domain == "elevation" + assert t2.name == "PostStack2DElevation" + + def test_build_dataset_depth(self, structured_headers: StructuredType) -> None: + """Test building a complete 2D depth dataset.""" + t = Seismic2DPostStackTemplate("depth") + + dataset = t.build_dataset( + "Seismic 2D Depth Line 001", + sizes=[2048, 4096], + horizontal_coord_unit=_UNIT_METER, + headers=structured_headers, + ) + + # Verify dataset metadata + assert dataset.metadata.name == "Seismic 2D Depth Line 001" + assert dataset.metadata.attributes["surveyDimensionality"] == "2D" + assert dataset.metadata.attributes["ensembleType"] == "line" + assert dataset.metadata.attributes["processingStage"] == "post-stack" + + _validate_coordinates_headers_trace_mask(dataset, structured_headers, "depth") + + # Verify seismic variable + seismic = validate_variable( + dataset, + name="amplitude", + dims=[("cdp", 2048), ("depth", 4096)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == [1024, 1024] + assert seismic.metadata.stats_v1 is None + + def test_build_dataset_time(self, structured_headers: StructuredType) -> None: + """Test building a complete 2D time dataset.""" + t = Seismic2DPostStackTemplate("time") + + dataset = t.build_dataset( + "Seismic 2D Time Line 001", + sizes=[2048, 4096], + horizontal_coord_unit=_UNIT_METER, + headers=structured_headers, + ) + + # Verify dataset metadata + assert dataset.metadata.name == "Seismic 2D Time Line 001" + assert dataset.metadata.attributes["surveyDimensionality"] == "2D" + assert dataset.metadata.attributes["ensembleType"] == "line" + assert dataset.metadata.attributes["processingStage"] == "post-stack" + + _validate_coordinates_headers_trace_mask(dataset, structured_headers, "time") + + # Verify seismic variable + v = validate_variable( + dataset, + name="amplitude", + dims=[("cdp", 2048), ("time", 4096)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT32, + ) + assert isinstance(v.metadata.chunk_grid, RegularChunkGrid) + assert v.metadata.chunk_grid.configuration.chunk_shape == [1024, 1024] + assert v.metadata.stats_v1 is None + + def test_time_vs_depth_comparison(self) -> None: + """Test differences between time and depth templates.""" + time_template = Seismic2DPostStackTemplate("time") + depth_template = Seismic2DPostStackTemplate("depth") + + # Different trace domains + assert time_template._trace_domain == "time" + assert depth_template._trace_domain == "depth" + + # Different names + assert time_template.name == "PostStack2DTime" + assert depth_template.name == "PostStack2DDepth" + + # Same other attributes + assert time_template._coord_dim_names == depth_template._coord_dim_names + assert time_template._coord_names == depth_template._coord_names + assert time_template._var_chunk_shape == depth_template._var_chunk_shape diff --git a/tests/unit/v1/templates/test_seismic_3d_poststack.py b/tests/unit/v1/templates/test_seismic_3d_poststack.py new file mode 100644 index 00000000..8f4c1ad0 --- /dev/null +++ b/tests/unit/v1/templates/test_seismic_3d_poststack.py @@ -0,0 +1,222 @@ +"""Unit tests for Seismic3DPostStackTemplate.""" + +from tests.unit.v1.helpers import validate_variable + +from mdio.schemas.chunk_grid import RegularChunkGrid +from mdio.schemas.compressors import Blosc +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType +from mdio.schemas.v1.dataset import Dataset +from mdio.schemas.v1.templates.seismic_3d_poststack import Seismic3DPostStackTemplate +from mdio.schemas.v1.units import AllUnits +from mdio.schemas.v1.units import LengthUnitEnum +from mdio.schemas.v1.units import LengthUnitModel +from mdio.schemas.v1.units import TimeUnitEnum +from mdio.schemas.v1.units import TimeUnitModel + +_UNIT_METER = AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) +_UNIT_SECOND = AllUnits(units_v1=TimeUnitModel(time=TimeUnitEnum.SECOND)) + + +def _validate_coordinates_headers_trace_mask( + dataset: Dataset, headers: StructuredType, domain: str +) -> None: + """Validate the coordinate, headers, trace_mask variables in the dataset.""" + # Verify variables + # 3 dim coords + 2 non-dim coords + 1 data + 1 trace mask + 1 headers = 7 variables + assert len(dataset.variables) == 8 + + # Verify trace headers + validate_variable( + dataset, + name="headers", + dims=[("inline", 256), ("crossline", 512)], + coords=["cdp_x", "cdp_y"], + dtype=headers, + ) + + validate_variable( + dataset, + name="trace_mask", + dims=[("inline", 256), ("crossline", 512)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.BOOL, + ) + + # Verify dimension coordinate variables + inline = validate_variable( + dataset, + name="inline", + dims=[("inline", 256)], + coords=["inline"], + dtype=ScalarType.INT32, + ) + assert inline.metadata is None + + crossline = validate_variable( + dataset, + name="crossline", + dims=[("crossline", 512)], + coords=["crossline"], + dtype=ScalarType.INT32, + ) + assert crossline.metadata is None + + domain = validate_variable( + dataset, + name=domain, + dims=[(domain, 1024)], + coords=[domain], + dtype=ScalarType.INT32, + ) + assert domain.metadata is None + + # Verify non-dimension coordinate variables + cdp_x = validate_variable( + dataset, + name="cdp_x", + dims=[("inline", 256), ("crossline", 512)], + coords=["cdp_x"], + dtype=ScalarType.FLOAT64, + ) + assert cdp_x.metadata.units_v1.length == LengthUnitEnum.METER + + cdp_y = validate_variable( + dataset, + name="cdp_y", + dims=[("inline", 256), ("crossline", 512)], + coords=["cdp_y"], + dtype=ScalarType.FLOAT64, + ) + assert cdp_y.metadata.units_v1.length == LengthUnitEnum.METER + + +class TestSeismic3DPostStackTemplate: + """Unit tests for Seismic3DPostStackTemplate.""" + + def test_configuration_depth(self) -> None: + """Unit tests for Seismic3DPostStackTemplate with depth domain.""" + t = Seismic3DPostStackTemplate(domain="depth") + + # Template attributes to be overridden by subclasses + assert t._trace_domain == "depth" # Domain should be lowercased + assert t._coord_dim_names == ["inline", "crossline"] + assert t._dim_names == ["inline", "crossline", "depth"] + assert t._coord_names == ["cdp_x", "cdp_y"] + assert t._var_chunk_shape == [128, 128, 128] + + # Variables instantiated when build_dataset() is called + assert t._builder is None + assert t._dim_sizes == [] + assert t._horizontal_coord_unit is None + + # Verify dataset attributes + attrs = t._load_dataset_attributes() + assert attrs.attributes == { + "surveyDimensionality": "3D", + "ensembleType": "line", + "processingStage": "post-stack", + } + assert t.trace_variable_name == "amplitude" + + def test_configuration_time(self) -> None: + """Unit tests for Seismic3DPostStackTemplate with time domain.""" + t = Seismic3DPostStackTemplate(domain="time") + + # Template attributes to be overridden by subclasses + assert t._trace_domain == "time" # Domain should be lowercased + assert t._coord_dim_names == ["inline", "crossline"] + assert t._dim_names == ["inline", "crossline", "time"] + assert t._coord_names == ["cdp_x", "cdp_y"] + assert t._var_chunk_shape == [128, 128, 128] + + # Variables instantiated when build_dataset() is called + assert t._builder is None + assert t._dim_sizes == [] + assert t._horizontal_coord_unit is None + + assert t._load_dataset_attributes().attributes == { + "surveyDimensionality": "3D", + "ensembleType": "line", + "processingStage": "post-stack", + } + + assert t.name == "PostStack3DTime" + + def test_domain_case_handling(self) -> None: + """Test that domain parameter handles different cases correctly.""" + # Test uppercase + t1 = Seismic3DPostStackTemplate("ELEVATION") + assert t1._trace_domain == "elevation" + assert t1.name == "PostStack3DElevation" + + # Test mixed case + t2 = Seismic3DPostStackTemplate("elevatioN") + assert t2._trace_domain == "elevation" + assert t2.name == "PostStack3DElevation" + + def test_build_dataset_depth(self, structured_headers: StructuredType) -> None: + """Unit tests for Seismic3DPostStackTemplate build with depth domain.""" + t = Seismic3DPostStackTemplate(domain="depth") + + assert t.name == "PostStack3DDepth" + dataset = t.build_dataset( + "Seismic 3D", + sizes=[256, 512, 1024], + horizontal_coord_unit=_UNIT_METER, + headers=structured_headers, + ) + + assert dataset.metadata.name == "Seismic 3D" + assert dataset.metadata.attributes["surveyDimensionality"] == "3D" + assert dataset.metadata.attributes["ensembleType"] == "line" + assert dataset.metadata.attributes["processingStage"] == "post-stack" + + _validate_coordinates_headers_trace_mask(dataset, structured_headers, "depth") + + # Verify seismic variable + seismic = validate_variable( + dataset, + name="amplitude", + dims=[("inline", 256), ("crossline", 512), ("depth", 1024)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.compressor, Blosc) + assert seismic.compressor.algorithm == "zstd" + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == [128, 128, 128] + assert seismic.metadata.stats_v1 is None + + def test_build_dataset_time(self, structured_headers: StructuredType) -> None: + """Unit tests for Seismic3DPostStackTimeTemplate build with time domain.""" + t = Seismic3DPostStackTemplate(domain="time") + + assert t.name == "PostStack3DTime" + dataset = t.build_dataset( + "Seismic 3D", + sizes=[256, 512, 1024], + horizontal_coord_unit=_UNIT_METER, + headers=structured_headers, + ) + + assert dataset.metadata.name == "Seismic 3D" + assert dataset.metadata.attributes["surveyDimensionality"] == "3D" + assert dataset.metadata.attributes["ensembleType"] == "line" + assert dataset.metadata.attributes["processingStage"] == "post-stack" + + _validate_coordinates_headers_trace_mask(dataset, structured_headers, "time") + + # Verify seismic variable + seismic = validate_variable( + dataset, + name="amplitude", + dims=[("inline", 256), ("crossline", 512), ("time", 1024)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.compressor, Blosc) + assert seismic.compressor.algorithm == "zstd" + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == [128, 128, 128] + assert seismic.metadata.stats_v1 is None diff --git a/tests/unit/v1/templates/test_seismic_3d_prestack_cdp.py b/tests/unit/v1/templates/test_seismic_3d_prestack_cdp.py new file mode 100644 index 00000000..07af3183 --- /dev/null +++ b/tests/unit/v1/templates/test_seismic_3d_prestack_cdp.py @@ -0,0 +1,233 @@ +"""Unit tests for Seismic3DPreStackCDPTemplate.""" + +from tests.unit.v1.helpers import validate_variable + +from mdio.schemas.chunk_grid import RegularChunkGrid +from mdio.schemas.compressors import Blosc +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType +from mdio.schemas.v1.dataset import Dataset +from mdio.schemas.v1.templates.seismic_3d_prestack_cdp import Seismic3DPreStackCDPTemplate +from mdio.schemas.v1.units import AllUnits +from mdio.schemas.v1.units import LengthUnitEnum +from mdio.schemas.v1.units import LengthUnitModel +from mdio.schemas.v1.units import TimeUnitEnum +from mdio.schemas.v1.units import TimeUnitModel + +_UNIT_METER = AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) +_UNIT_SECOND = AllUnits(units_v1=TimeUnitModel(time=TimeUnitEnum.SECOND)) + + +def validate_coordinates_headers_trace_mask( + dataset: Dataset, headers: StructuredType, domain: str +) -> None: + """A helper method to validate coordinates, headers, and trace mask.""" + # Verify variables + # 4 dim coords + 2 non-dim coords + 1 data + 1 trace mask + 1 headers = 8 variables + assert len(dataset.variables) == 9 + + # Verify trace headers + validate_variable( + dataset, + name="headers", + dims=[("inline", 512), ("crossline", 768), ("offset", 36)], + coords=["cdp_x", "cdp_y"], + dtype=headers, + ) + + validate_variable( + dataset, + name="trace_mask", + dims=[("inline", 512), ("crossline", 768), ("offset", 36)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.BOOL, + ) + + # Verify dimension coordinate variables + inline = validate_variable( + dataset, + name="inline", + dims=[("inline", 512)], + coords=["inline"], + dtype=ScalarType.INT32, + ) + assert inline.metadata is None + + crossline = validate_variable( + dataset, + name="crossline", + dims=[("crossline", 768)], + coords=["crossline"], + dtype=ScalarType.INT32, + ) + assert crossline.metadata is None + + crossline = validate_variable( + dataset, + name="offset", + dims=[("offset", 36)], + coords=["offset"], + dtype=ScalarType.INT32, + ) + assert crossline.metadata is None + + domain = validate_variable( + dataset, + name=domain, + dims=[(domain, 1536)], + coords=[domain], + dtype=ScalarType.INT32, + ) + assert domain.metadata is None + + # Verify non-dimension coordinate variables + cdp_x = validate_variable( + dataset, + name="cdp_x", + dims=[("inline", 512), ("crossline", 768), ("offset", 36)], + coords=["cdp_x"], + dtype=ScalarType.FLOAT64, + ) + assert cdp_x.metadata.units_v1.length == LengthUnitEnum.METER + + cdp_y = validate_variable( + dataset, + name="cdp_y", + dims=[("inline", 512), ("crossline", 768), ("offset", 36)], + coords=["cdp_y"], + dtype=ScalarType.FLOAT64, + ) + assert cdp_y.metadata.units_v1.length == LengthUnitEnum.METER + + +class TestSeismic3DPreStackCDPTemplate: + """Unit tests for Seismic3DPreStackCDPTemplate.""" + + def test_configuration_depth(self) -> None: + """Unit tests for Seismic3DPreStackCDPTemplate.""" + t = Seismic3DPreStackCDPTemplate(domain="DEPTH") + + # Template attributes for prestack CDP + assert t._trace_domain == "depth" + assert t._coord_dim_names == ["inline", "crossline", "offset"] + assert t._dim_names == ["inline", "crossline", "offset", "depth"] + assert t._coord_names == ["cdp_x", "cdp_y"] + assert t._var_chunk_shape == [1, 1, 512, 4096] + + # Variables instantiated when build_dataset() is called + assert t._builder is None + assert t._dim_sizes == [] + assert t._horizontal_coord_unit is None + + # Verify prestack CDP attributes + attrs = t._load_dataset_attributes() + assert attrs.attributes == { + "surveyDimensionality": "3D", + "ensembleType": "cdp", + "processingStage": "pre-stack", + } + assert t.trace_variable_name == "amplitude" + + def test_configuration_time(self) -> None: + """Unit tests for Seismic3DPreStackCDPTemplate.""" + t = Seismic3DPreStackCDPTemplate(domain="TIME") + + # Template attributes for prestack CDP + assert t._trace_domain == "time" + assert t._coord_dim_names == ["inline", "crossline", "offset"] + assert t._dim_names == ["inline", "crossline", "offset", "time"] + assert t._coord_names == ["cdp_x", "cdp_y"] + assert t._var_chunk_shape == [1, 1, 512, 4096] + + # Variables instantiated when build_dataset() is called + assert t._builder is None + assert t._dim_sizes == [] + assert t._horizontal_coord_unit is None + + # Verify prestack CDP attributes + attrs = t._load_dataset_attributes() + assert attrs.attributes == { + "surveyDimensionality": "3D", + "ensembleType": "cdp", + "processingStage": "pre-stack", + } + + assert t.name == "PreStackCdpGathers3DTime" + + def test_domain_case_handling(self) -> None: + """Test that domain parameter handles different cases correctly.""" + # Test uppercase + t1 = Seismic3DPreStackCDPTemplate("ELEVATION") + assert t1._trace_domain == "elevation" + assert t1.name == "PreStackCdpGathers3DElevation" + + # Test mixed case + t2 = Seismic3DPreStackCDPTemplate("elevatioN") + assert t2._trace_domain == "elevation" + assert t2.name == "PreStackCdpGathers3DElevation" + + def test_build_dataset_depth(self, structured_headers: StructuredType) -> None: + """Unit tests for Seismic3DPreStackCDPDepthTemplate build with depth domain.""" + t = Seismic3DPreStackCDPTemplate(domain="depth") + + assert t.name == "PreStackCdpGathers3DDepth" + dataset = t.build_dataset( + "North Sea 3D Prestack Depth", + sizes=[512, 768, 36, 1536], + horizontal_coord_unit=_UNIT_METER, + headers=structured_headers, + ) + + assert dataset.metadata.name == "North Sea 3D Prestack Depth" + assert dataset.metadata.attributes["surveyDimensionality"] == "3D" + assert dataset.metadata.attributes["ensembleType"] == "cdp" + assert dataset.metadata.attributes["processingStage"] == "pre-stack" + + validate_coordinates_headers_trace_mask(dataset, structured_headers, "depth") + + # Verify seismic variable (prestack depth data) + seismic = validate_variable( + dataset, + name="amplitude", + dims=[("inline", 512), ("crossline", 768), ("offset", 36), ("depth", 1536)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.compressor, Blosc) + assert seismic.compressor.algorithm == "zstd" + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == [1, 1, 512, 4096] + assert seismic.metadata.stats_v1 is None + + def test_build_dataset_time(self, structured_headers: StructuredType) -> None: + """Unit tests for Seismic3DPreStackCDPTimeTemplate build with time domain.""" + t = Seismic3DPreStackCDPTemplate(domain="time") + + assert t.name == "PreStackCdpGathers3DTime" + dataset = t.build_dataset( + "Santos Basin 3D Prestack", + sizes=[512, 768, 36, 1536], + horizontal_coord_unit=_UNIT_METER, + headers=structured_headers, + ) + + assert dataset.metadata.name == "Santos Basin 3D Prestack" + assert dataset.metadata.attributes["surveyDimensionality"] == "3D" + assert dataset.metadata.attributes["ensembleType"] == "cdp" + assert dataset.metadata.attributes["processingStage"] == "pre-stack" + + validate_coordinates_headers_trace_mask(dataset, structured_headers, "time") + + # Verify seismic variable (prestack time data) + seismic = validate_variable( + dataset, + name="amplitude", + dims=[("inline", 512), ("crossline", 768), ("offset", 36), ("time", 1536)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.compressor, Blosc) + assert seismic.compressor.algorithm == "zstd" + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == [1, 1, 512, 4096] + assert seismic.metadata.stats_v1 is None diff --git a/tests/unit/v1/templates/test_seismic_3d_prestack_shot.py b/tests/unit/v1/templates/test_seismic_3d_prestack_shot.py new file mode 100644 index 00000000..5d4b51c2 --- /dev/null +++ b/tests/unit/v1/templates/test_seismic_3d_prestack_shot.py @@ -0,0 +1,259 @@ +"""Unit tests for Seismic3DPreStackShotTemplate.""" + +from tests.unit.v1.helpers import validate_variable + +from mdio.schemas.chunk_grid import RegularChunkGrid +from mdio.schemas.compressors import Blosc +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredType +from mdio.schemas.v1.dataset import Dataset +from mdio.schemas.v1.templates.seismic_3d_prestack_shot import Seismic3DPreStackShotTemplate +from mdio.schemas.v1.units import AllUnits +from mdio.schemas.v1.units import LengthUnitEnum +from mdio.schemas.v1.units import LengthUnitModel +from mdio.schemas.v1.units import TimeUnitEnum +from mdio.schemas.v1.units import TimeUnitModel + +_UNIT_METER = AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER)) +_UNIT_SECOND = AllUnits(units_v1=TimeUnitModel(time=TimeUnitEnum.SECOND)) + + +def _validate_coordinates_headers_trace_mask( + dataset: Dataset, headers: StructuredType, domain: str +) -> None: + """Validate the coordinate, headers, trace_mask variables in the dataset.""" + # Verify variables + # 4 dim coords + 5 non-dim coords + 1 data + 1 trace mask + 1 headers = 12 variables + assert len(dataset.variables) == 12 + + # Verify trace headers + validate_variable( + dataset, + name="headers", + dims=[("shot_point", 256), ("cable", 512), ("channel", 24)], + coords=["gun", "shot-x", "shot-y", "receiver-x", "receiver-y"], + dtype=headers, + ) + + validate_variable( + dataset, + name="trace_mask", + dims=[("shot_point", 256), ("cable", 512), ("channel", 24)], + coords=["gun", "shot-x", "shot-y", "receiver-x", "receiver-y"], + dtype=ScalarType.BOOL, + ) + + # Verify dimension coordinate variables + inline = validate_variable( + dataset, + name="shot_point", + dims=[("shot_point", 256)], + coords=["shot_point"], + dtype=ScalarType.INT32, + ) + assert inline.metadata is None + + crossline = validate_variable( + dataset, + name="cable", + dims=[("cable", 512)], + coords=["cable"], + dtype=ScalarType.INT32, + ) + assert crossline.metadata is None + + crossline = validate_variable( + dataset, + name="channel", + dims=[("channel", 24)], + coords=["channel"], + dtype=ScalarType.INT32, + ) + assert crossline.metadata is None + + domain = validate_variable( + dataset, + name=domain, + dims=[(domain, 2048)], + coords=[domain], + dtype=ScalarType.INT32, + ) + assert domain.metadata is None + + # Verify non-dimension coordinate variables + validate_variable( + dataset, + name="gun", + dims=[("shot_point", 256)], + coords=["gun"], + dtype=ScalarType.UINT8, + ) + + shot_x = validate_variable( + dataset, + name="shot-x", + dims=[("shot_point", 256)], + coords=["shot-x"], + dtype=ScalarType.FLOAT64, + ) + assert shot_x.metadata.units_v1.length == LengthUnitEnum.METER + + shot_y = validate_variable( + dataset, + name="shot-y", + dims=[("shot_point", 256)], + coords=["shot-y"], + dtype=ScalarType.FLOAT64, + ) + assert shot_y.metadata.units_v1.length == LengthUnitEnum.METER + + receiver_x = validate_variable( + dataset, + name="receiver-x", + dims=[("shot_point", 256), ("cable", 512), ("channel", 24)], + coords=["receiver-x"], + dtype=ScalarType.FLOAT64, + ) + assert receiver_x.metadata.units_v1.length == LengthUnitEnum.METER + + receiver_y = validate_variable( + dataset, + name="receiver-y", + dims=[("shot_point", 256), ("cable", 512), ("channel", 24)], + coords=["receiver-y"], + dtype=ScalarType.FLOAT64, + ) + assert receiver_y.metadata.units_v1.length == LengthUnitEnum.METER + + +class TestSeismic3DPreStackShotTemplate: + """Unit tests for Seismic3DPreStackShotTemplate.""" + + def test_configuration_depth(self) -> None: + """Unit tests for Seismic3DPreStackShotTemplate in depth domain.""" + t = Seismic3DPreStackShotTemplate(domain="DEPTH") + + # Template attributes for prestack shot + assert t._trace_domain == "depth" + assert t._coord_dim_names == ["shot_point", "cable", "channel"] + assert t._dim_names == ["shot_point", "cable", "channel", "depth"] + assert t._coord_names == ["gun", "shot-x", "shot-y", "receiver-x", "receiver-y"] + assert t._var_chunk_shape == [1, 1, 512, 4096] + + # Variables instantiated when build_dataset() is called + assert t._builder is None + assert t._dim_sizes == [] + assert t._horizontal_coord_unit is None + + # Verify prestack shot attributes + attrs = t._load_dataset_attributes() + assert attrs.attributes == { + "surveyDimensionality": "3D", + "ensembleType": "shot", + "processingStage": "pre-stack", + } + assert t.trace_variable_name == "amplitude" + + def test_configuration_time(self) -> None: + """Unit tests for Seismic3DPreStackShotTemplate in time domain.""" + t = Seismic3DPreStackShotTemplate(domain="TIME") + + # Template attributes for prestack shot + assert t._trace_domain == "time" + assert t._coord_dim_names == ["shot_point", "cable", "channel"] + assert t._dim_names == ["shot_point", "cable", "channel", "time"] + assert t._coord_names == ["gun", "shot-x", "shot-y", "receiver-x", "receiver-y"] + assert t._var_chunk_shape == [1, 1, 512, 4096] + + # Variables instantiated when build_dataset() is called + assert t._builder is None + assert t._dim_sizes == [] + assert t._horizontal_coord_unit is None + + # Verify prestack shot attributes + attrs = t._load_dataset_attributes() + assert attrs.attributes == { + "surveyDimensionality": "3D", + "ensembleType": "shot", + "processingStage": "pre-stack", + } + + assert t.name == "PreStackShotGathers3DTime" + + def test_domain_case_handling(self) -> None: + """Test that domain parameter handles different cases correctly.""" + # Test uppercase + t1 = Seismic3DPreStackShotTemplate("ELEVATION") + assert t1._trace_domain == "elevation" + assert t1.name == "PreStackShotGathers3DElevation" + + # Test mixed case + t2 = Seismic3DPreStackShotTemplate("elevatioN") + assert t2._trace_domain == "elevation" + assert t2.name == "PreStackShotGathers3DElevation" + + def test_build_dataset_depth(self, structured_headers: StructuredType) -> None: + """Unit tests for Seismic3DPreStackShotTemplate build in depth domain.""" + t = Seismic3DPreStackShotTemplate(domain="depth") + + assert t.name == "PreStackShotGathers3DDepth" + dataset = t.build_dataset( + "Gulf of Mexico 3D Shot Depth", + sizes=[256, 512, 24, 2048], + horizontal_coord_unit=_UNIT_METER, + headers=structured_headers, + ) + + assert dataset.metadata.name == "Gulf of Mexico 3D Shot Depth" + assert dataset.metadata.attributes["surveyDimensionality"] == "3D" + assert dataset.metadata.attributes["ensembleType"] == "shot" + assert dataset.metadata.attributes["processingStage"] == "pre-stack" + + _validate_coordinates_headers_trace_mask(dataset, structured_headers, "depth") + + # Verify seismic variable (prestack shot depth data) + seismic = validate_variable( + dataset, + name="amplitude", + dims=[("shot_point", 256), ("cable", 512), ("channel", 24), ("depth", 2048)], + coords=["gun", "shot-x", "shot-y", "receiver-x", "receiver-y"], + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.compressor, Blosc) + assert seismic.compressor.algorithm == "zstd" + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == [1, 1, 512, 4096] + assert seismic.metadata.stats_v1 is None + + def test_build_dataset_time(self, structured_headers: StructuredType) -> None: + """Unit tests for Seismic3DPreStackShotTemplate build in time domain.""" + t = Seismic3DPreStackShotTemplate(domain="time") + + assert t.name == "PreStackShotGathers3DTime" + dataset = t.build_dataset( + "North Sea 3D Shot Time", + sizes=[256, 512, 24, 2048], + horizontal_coord_unit=_UNIT_METER, + headers=structured_headers, + ) + + assert dataset.metadata.name == "North Sea 3D Shot Time" + assert dataset.metadata.attributes["surveyDimensionality"] == "3D" + assert dataset.metadata.attributes["ensembleType"] == "shot" + assert dataset.metadata.attributes["processingStage"] == "pre-stack" + + _validate_coordinates_headers_trace_mask(dataset, structured_headers, "time") + + # Verify seismic variable (prestack shot time data) + seismic = validate_variable( + dataset, + name="amplitude", + dims=[("shot_point", 256), ("cable", 512), ("channel", 24), ("time", 2048)], + coords=["gun", "shot-x", "shot-y", "receiver-x", "receiver-y"], + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.compressor, Blosc) + assert seismic.compressor.algorithm == "zstd" + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == [1, 1, 512, 4096] + assert seismic.metadata.stats_v1 is None diff --git a/tests/unit/v1/templates/test_seismic_templates.py b/tests/unit/v1/templates/test_seismic_templates.py new file mode 100644 index 00000000..c0b48709 --- /dev/null +++ b/tests/unit/v1/templates/test_seismic_templates.py @@ -0,0 +1,101 @@ +"""Unit tests for concrete seismic dataset template implementations.""" + +# Import all concrete template classes +from tests.unit.v1.helpers import validate_variable +from tests.unit.v1.templates.test_seismic_2d_poststack import _UNIT_METER + +from mdio.schemas.dtype import ScalarType +from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate +from mdio.schemas.v1.templates.seismic_2d_poststack import Seismic2DPostStackTemplate +from mdio.schemas.v1.templates.seismic_3d_poststack import Seismic3DPostStackTemplate +from mdio.schemas.v1.templates.seismic_3d_prestack_cdp import Seismic3DPreStackCDPTemplate +from mdio.schemas.v1.templates.seismic_3d_prestack_shot import Seismic3DPreStackShotTemplate + + +class TestSeismicTemplates: + """Test cases for Seismic2DPostStackTemplate.""" + + def test_custom_data_variable_name(self) -> None: + """Test get_data_variable_name with custom names.""" + + # Define a template with a custom data variable name 'velocity' + class Velocity2DPostStackTemplate(Seismic2DPostStackTemplate): + def __init__(self, domain: str): + super().__init__(domain=domain) + + @property + def _trace_variable_name(self) -> str: + return "velocity" + + @property + def _name(self) -> str: + return f"Velocity2D{self._trace_domain.capitalize()}" + + t = Velocity2DPostStackTemplate("depth") + assert t.name == "Velocity2DDepth" + assert t.trace_variable_name == "velocity" + + dataset = t.build_dataset( + "Velocity 2D Depth Line 001", sizes=[2048, 4096], horizontal_coord_unit=_UNIT_METER + ) + + # Verify velocity variable + validate_variable( + dataset, + name="velocity", + dims=[("cdp", 2048), ("depth", 4096)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT32, + ) + + def test_get_name_time(self) -> None: + """Test get_name with domain.""" + time_template = Seismic2DPostStackTemplate("time") + dpth_template = Seismic2DPostStackTemplate("depth") + + assert time_template.name == "PostStack2DTime" + assert dpth_template.name == "PostStack2DDepth" + + time_template = Seismic3DPostStackTemplate("time") + dpth_template = Seismic3DPostStackTemplate("depth") + + assert time_template.name == "PostStack3DTime" + assert dpth_template.name == "PostStack3DDepth" + + time_template = Seismic3DPreStackCDPTemplate("time") + dpth_template = Seismic3DPreStackCDPTemplate("depth") + + assert time_template.name == "PreStackCdpGathers3DTime" + assert dpth_template.name == "PreStackCdpGathers3DDepth" + + time_template = Seismic3DPreStackShotTemplate("time") + dpth_template = Seismic3DPreStackShotTemplate("depth") + + assert time_template.name == "PreStackShotGathers3DTime" + assert dpth_template.name == "PreStackShotGathers3DDepth" + + def test_all_templates_inherit_from_abstract(self) -> None: + """Test that all concrete templates inherit from AbstractDatasetTemplate.""" + templates = [ + Seismic2DPostStackTemplate("time"), + Seismic3DPostStackTemplate("time"), + Seismic3DPreStackCDPTemplate("time"), + Seismic3DPreStackShotTemplate("time"), + Seismic2DPostStackTemplate("depth"), + Seismic3DPostStackTemplate("depth"), + Seismic3DPreStackCDPTemplate("depth"), + Seismic3DPreStackShotTemplate("depth"), + ] + + for template in templates: + assert isinstance(template, AbstractDatasetTemplate) + # That each template has the required properties and methods + assert hasattr(template, "name") + assert hasattr(template, "trace_variable_name") + assert hasattr(template, "trace_domain") + assert hasattr(template, "dimension_names") + assert hasattr(template, "coordinate_names") + assert hasattr(template, "build_dataset") + + names = [template.name for template in templates] + assert len(names) == len(set(names)), f"Duplicate template names found: {names}" diff --git a/tests/unit/v1/templates/test_template_registry.py b/tests/unit/v1/templates/test_template_registry.py new file mode 100644 index 00000000..e8c61fe6 --- /dev/null +++ b/tests/unit/v1/templates/test_template_registry.py @@ -0,0 +1,396 @@ +"""Tests for the singleton TemplateRegistry implementation.""" + +import threading +import time + +import pytest + +from mdio.schemas.v1.templates.abstract_dataset_template import AbstractDatasetTemplate +from mdio.schemas.v1.templates.template_registry import TemplateRegistry +from mdio.schemas.v1.templates.template_registry import get_template +from mdio.schemas.v1.templates.template_registry import get_template_registry +from mdio.schemas.v1.templates.template_registry import is_template_registered +from mdio.schemas.v1.templates.template_registry import list_templates +from mdio.schemas.v1.templates.template_registry import register_template + + +class MockDatasetTemplate(AbstractDatasetTemplate): + """Mock template for testing.""" + + def __init__(self, name: str): + super().__init__() + self.template_name = name + + @property + def _name(self) -> str: + return self.template_name + + def _load_dataset_attributes(self) -> None: + return None # Mock implementation + + def create_dataset(self) -> str: + """Create a mock dataset. + + Returns: + str: A message indicating the dataset creation. + """ + return f"Mock dataset created by {self.template_name}" + + +class TestTemplateRegistrySingleton: + """Test cases for TemplateRegistry singleton pattern.""" + + def setup_method(self) -> None: + """Reset singleton before each test.""" + TemplateRegistry._reset_instance() + + def teardown_method(self) -> None: + """Clean up after each test.""" + if TemplateRegistry._instance: + TemplateRegistry._instance.clear() + TemplateRegistry._reset_instance() + + def test_singleton_same_instance(self) -> None: + """Test that multiple instantiations return the same instance.""" + registry1 = TemplateRegistry() + registry2 = TemplateRegistry() + registry3 = TemplateRegistry.get_instance() + + assert registry1 is registry2 + assert registry2 is registry3 + assert id(registry1) == id(registry2) == id(registry3) + + def test_singleton_thread_safety(self) -> None: + """Test that singleton is thread-safe during creation.""" + instances = [] + errors = [] + + def create_instance() -> None: + try: + instance = TemplateRegistry() + instances.append(instance) + time.sleep(0.001) # Small delay to increase contention + except Exception as e: + errors.append(e) + + # Create multiple threads trying to create instances + threads = [threading.Thread(target=create_instance) for _ in range(10)] + + for thread in threads: + thread.start() + + for thread in threads: + thread.join() + + # All instances should be the same + assert len(errors) == 0 + assert len(instances) == 10 + assert all(instance is instances[0] for instance in instances) + + def test_initialization_only_once(self) -> None: + """Test that internal state is initialized only once.""" + registry1 = TemplateRegistry() + template1 = MockDatasetTemplate("test_template") + registry1.register(template1) + + # Create another instance - should have same templates + registry2 = TemplateRegistry() + + assert registry1 is registry2 + assert registry2.is_registered("test_template") + assert registry2.get("test_template") is template1 + + def test_register_template(self) -> None: + """Test template registration.""" + registry = TemplateRegistry() + template = MockDatasetTemplate("test") + + name = registry.register(template) + + assert name == "test" # Should be the template name + assert registry.is_registered("test") + assert registry.get("test") is template + + def test_register_duplicate_template(self) -> None: + """Test error when registering duplicate template.""" + registry = TemplateRegistry() + template1 = MockDatasetTemplate("duplicate") + template2 = MockDatasetTemplate("duplicate") + + registry.register(template1) + + with pytest.raises(ValueError, match="Template 'duplicate' is already registered"): + registry.register(template2) + + def test_get_nonexistent_template(self) -> None: + """Test error when getting non-existent template.""" + registry = TemplateRegistry() + + with pytest.raises(KeyError, match="Template 'nonexistent' is not registered"): + registry.get("nonexistent") + + def test_unregister_template(self) -> None: + """Test template unregistration.""" + registry = TemplateRegistry() + template = MockDatasetTemplate("test_template") + + registry.register(template) + assert registry.is_registered("test_template") + + registry.unregister("test_template") + assert not registry.is_registered("test_template") + + def test_unregister_nonexistent_template(self) -> None: + """Test error when unregistering non-existent template.""" + registry = TemplateRegistry() + + with pytest.raises(KeyError, match="Template 'nonexistent' is not registered"): + registry.unregister("nonexistent") + + def test_list_all_templates(self) -> None: + """Test listing all registered templates.""" + registry = TemplateRegistry() + + # Default templates are always installed + templates = list_templates() + assert len(templates) == 8 + assert "PostStack2DTime" in templates + assert "PostStack3DTime" in templates + assert "PreStackCdpGathers3DTime" in templates + assert "PreStackShotGathers3DTime" in templates + + assert "PostStack2DDepth" in templates + assert "PostStack3DDepth" in templates + assert "PreStackCdpGathers3DDepth" in templates + assert "PreStackShotGathers3DDepth" in templates + + # Add some templates + template1 = MockDatasetTemplate("Template_One") + template2 = MockDatasetTemplate("Template_Two") + + registry.register(template1) + registry.register(template2) + + templates = registry.list_all_templates() + assert len(templates) == 10 + assert "Template_One" in templates + assert "Template_Two" in templates + + def test_clear_templates(self) -> None: + """Test clearing all templates.""" + registry = TemplateRegistry() + + # Default templates are always installed + templates = list_templates() + assert len(templates) == 8 + + # Add some templates + template1 = MockDatasetTemplate("Template1") + template2 = MockDatasetTemplate("Template2") + + registry.register(template1) + registry.register(template2) + + assert len(registry.list_all_templates()) == 10 + + # Clear all + registry.clear() + + assert len(registry.list_all_templates()) == 0 + assert not registry.is_registered("Template1") + assert not registry.is_registered("Template2") + # default templates are also cleared + assert not registry.is_registered("PostStack2DTime") + assert not registry.is_registered("PostStack3DTime") + assert not registry.is_registered("PreStackCdpGathers3DTime") + assert not registry.is_registered("PreStackShotGathers3DTime") + assert not registry.is_registered("PostStack2DDepth") + assert not registry.is_registered("PostStack3DDepth") + assert not registry.is_registered("PreStackCdpGathers3DDepth") + assert not registry.is_registered("PreStackShotGathers3DDepth") + + def test_reset_instance(self) -> None: + """Test resetting the singleton instance.""" + registry1 = TemplateRegistry() + template = MockDatasetTemplate("test") + registry1.register(template) + + # Reset the instance + TemplateRegistry._reset_instance() + + # New instance should be different and contain default templates only + registry2 = TemplateRegistry() + + assert registry1 is not registry2 + assert not registry2.is_registered("test") + + # default templates are registered + assert len(registry2.list_all_templates()) == 8 + assert registry2.is_registered("PostStack2DTime") + assert registry2.is_registered("PostStack3DTime") + assert registry2.is_registered("PreStackCdpGathers3DTime") + assert registry2.is_registered("PreStackShotGathers3DTime") + assert registry2.is_registered("PostStack2DDepth") + assert registry2.is_registered("PostStack3DDepth") + assert registry2.is_registered("PreStackCdpGathers3DDepth") + assert registry2.is_registered("PreStackShotGathers3DDepth") + + +class TestGlobalFunctions: + """Test cases for global convenience functions.""" + + def setup_method(self) -> None: + """Reset singleton before each test.""" + TemplateRegistry._reset_instance() + + def teardown_method(self) -> None: + """Clean up after each test.""" + if TemplateRegistry._instance: + TemplateRegistry._instance.clear() + TemplateRegistry._reset_instance() + + def test_get_template_registry(self) -> None: + """Test global registry getter.""" + registry1 = get_template_registry() + registry2 = get_template_registry() + direct_registry = TemplateRegistry() + + assert registry1 is registry2 + assert registry1 is direct_registry + + def test_register_template_global(self) -> None: + """Test global template registration.""" + template = MockDatasetTemplate("global_test") + + name = register_template(template) + + assert name == "global_test" + assert is_template_registered("global_test") + assert get_template("global_test") is template + + def test_list_templates_global(self) -> None: + """Test global template listing.""" + # Default templates are always installed + templates = list_templates() + assert len(templates) == 8 + assert "PostStack2DTime" in templates + assert "PostStack3DTime" in templates + assert "PreStackCdpGathers3DTime" in templates + assert "PreStackShotGathers3DTime" in templates + + assert "PostStack2DDepth" in templates + assert "PostStack3DDepth" in templates + assert "PreStackCdpGathers3DDepth" in templates + assert "PreStackShotGathers3DDepth" in templates + + template1 = MockDatasetTemplate("template1") + template2 = MockDatasetTemplate("template2") + + register_template(template1) + register_template(template2) + + templates = list_templates() + assert len(templates) == 10 + assert "template1" in templates + assert "template2" in templates + + +class TestConcurrentAccess: + """Test concurrent access to the singleton registry.""" + + def setup_method(self) -> None: + """Reset singleton before each test.""" + TemplateRegistry._reset_instance() + + def teardown_method(self) -> None: + """Clean up after each test.""" + if TemplateRegistry._instance: + TemplateRegistry._instance.clear() + TemplateRegistry._reset_instance() + + def test_concurrent_registration(self) -> None: + """Test concurrent template registration.""" + registry = TemplateRegistry() + results = [] + errors = [] + + def register_template_worker(template_id: int) -> None: + try: + template = MockDatasetTemplate(f"template_{template_id}") + name = registry.register(template) + results.append((template_id, name)) + time.sleep(0.001) # Small delay + except Exception as e: + errors.append((template_id, e)) + + # Create multiple threads registering different templates + threads = [threading.Thread(target=register_template_worker, args=(i,)) for i in range(10)] + + for thread in threads: + thread.start() + + for thread in threads: + thread.join() + + # All registrations should succeed + assert len(errors) == 0 + assert len(results) == 10 + # Including 8 default templates + assert len(registry.list_all_templates()) == 18 + + # Check all templates are registered + for i in range(10): + assert registry.is_registered(f"template_{i}") + + def test_concurrent_access_mixed_operations(self) -> None: + """Test concurrent mixed operations (register, get, list).""" + registry = TemplateRegistry() + + # Pre-register some templates + for i in range(5): + template = MockDatasetTemplate(f"initial_{i}") + registry.register(template) + + results = [] + errors = [] + + def mixed_operations_worker(worker_id: int) -> None: + try: + operations_results = [] + + # Get existing template + if worker_id % 2 == 0: + template = registry.get("initial_0") + operations_results.append(("get", template.template_name)) + + # Register new template + if worker_id % 3 == 0: + new_template = MockDatasetTemplate(f"worker_{worker_id}") + name = registry.register(new_template) + operations_results.append(("register", name)) + + # List templates + templates = registry.list_all_templates() + operations_results.append(("list", len(templates))) + + results.append((worker_id, operations_results)) + + except Exception as e: + errors.append((worker_id, e)) + + # Run concurrent operations + threads = [threading.Thread(target=mixed_operations_worker, args=(i,)) for i in range(15)] + + for thread in threads: + thread.start() + + for thread in threads: + thread.join() + + # Check that operations completed without errors + assert len(errors) == 0 + assert len(results) == 15 + + # Verify final state is consistent + final_templates = registry.list_all_templates() + assert len(final_templates) >= 5 # At least the initial templates diff --git a/tests/unit/v1/test_dataset_builder_add_coordinate.py b/tests/unit/v1/test_dataset_builder_add_coordinate.py new file mode 100644 index 00000000..5ad4c70c --- /dev/null +++ b/tests/unit/v1/test_dataset_builder_add_coordinate.py @@ -0,0 +1,137 @@ +"""Tests the schema v1 dataset_builder.add_coordinate() public API.""" + +import pytest + +from mdio.schemas.compressors import Blosc +from mdio.schemas.dtype import ScalarType +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder +from mdio.schemas.v1.dataset_builder import _BuilderState +from mdio.schemas.v1.units import AllUnits +from mdio.schemas.v1.units import LengthUnitEnum +from mdio.schemas.v1.units import LengthUnitModel +from mdio.schemas.v1.variable import VariableMetadata + +from .helpers import validate_builder +from .helpers import validate_coordinate +from .helpers import validate_variable + + +def test_add_coordinate() -> None: + """Test adding coordinates. Check the state transition and validate required parameters.""" + builder = MDIODatasetBuilder("test_dataset") + assert builder._state == _BuilderState.INITIAL + + msg = "Must add at least one dimension before adding coordinates" + with pytest.raises(ValueError, match=msg): + builder.add_coordinate( + "cdp", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32 + ) + + builder.add_dimension("inline", 100) + builder.add_dimension("crossline", 200) + + # Validate required parameters + bad_name = None + with pytest.raises(ValueError, match="'name' must be a non-empty string"): + builder.add_coordinate(bad_name, dimensions=["speed"], data_type=ScalarType.FLOAT32) + with pytest.raises(ValueError, match="'name' must be a non-empty string"): + builder.add_coordinate("", dimensions=["speed"], data_type=ScalarType.FLOAT32) + with pytest.raises(ValueError, match="'dimensions' must be a non-empty list"): + builder.add_coordinate("cdp_x", dimensions=None, data_type=ScalarType.FLOAT32) + with pytest.raises(ValueError, match="'dimensions' must be a non-empty list"): + builder.add_coordinate("cdp_x", dimensions=[], data_type=ScalarType.FLOAT32) + + # Add a variable using non-existent dimensions + msg = "Pre-existing dimension named 'xline' is not found" + with pytest.raises(ValueError, match=msg): + builder.add_coordinate( + "bad_cdp-x", dimensions=["inline", "xline"], data_type=ScalarType.FLOAT32 + ) + + # Validate state transition + builder.add_coordinate( + "cdp_x", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32 + ) + validate_builder(builder, _BuilderState.HAS_COORDINATES, n_dims=2, n_coords=1, n_var=1) + validate_variable( + builder, + name="cdp_x", + dims=[("inline", 100), ("crossline", 200)], + coords=["cdp_x"], + dtype=ScalarType.FLOAT32, + ) + + # Adding coordinate with the same name twice + msg = "Adding coordinate with the same name twice is not allowed" + with pytest.raises(ValueError, match=msg): + builder.add_coordinate( + "cdp_x", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32 + ) + + +def test_add_coordinate_with_defaults() -> None: + """Test adding coordinates with default arguments.""" + builder = MDIODatasetBuilder("test_dataset") + builder.add_dimension("inline", 100) + builder.add_dimension("crossline", 200) + + # Add coordinate using defaults + builder.add_coordinate("cdp", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32) + validate_builder(builder, _BuilderState.HAS_COORDINATES, n_dims=2, n_coords=1, n_var=1) + validate_coordinate( + builder, name="cdp", dims=[("inline", 100), ("crossline", 200)], dtype=ScalarType.FLOAT32 + ) + v = validate_variable( + builder, + name="cdp", + dims=[("inline", 100), ("crossline", 200)], + coords=["cdp"], + dtype=ScalarType.FLOAT32, + ) + assert v.long_name is None # Default value + assert v.compressor is None # Default value + assert v.metadata is None # Default value + + +def test_coordinate_with_full_parameters() -> None: + """Test adding coordinates with all metadata.""" + builder = MDIODatasetBuilder("test_dataset") + builder.add_dimension("inline", 100) + builder.add_dimension("crossline", 200) + + # Add coordinate with all metadata + builder.add_coordinate( + "cdp", + long_name="Common Depth Point", + dimensions=["inline", "crossline"], + data_type=ScalarType.FLOAT16, + compressor=Blosc(algorithm="zstd"), + metadata_info=[ + AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.FOOT)), + UserAttributes(attributes={"MGA": 51, "UnitSystem": "Imperial"}), + ], + ) + validate_builder(builder, _BuilderState.HAS_COORDINATES, n_dims=2, n_coords=1, n_var=1) + c = validate_coordinate( + builder, name="cdp", dims=[("inline", 100), ("crossline", 200)], dtype=ScalarType.FLOAT16 + ) + assert c.long_name == "Common Depth Point" + assert isinstance(c.compressor, Blosc) + assert c.compressor.algorithm == "zstd" + assert c.metadata.attributes["MGA"] == 51 + assert c.metadata.attributes["UnitSystem"] == "Imperial" + assert c.metadata.units_v1.length == LengthUnitEnum.FOOT + v = validate_variable( + builder, + name="cdp", + dims=[("inline", 100), ("crossline", 200)], + coords=["cdp"], + dtype=ScalarType.FLOAT16, + ) + assert isinstance(v.compressor, Blosc) + assert v.compressor.algorithm == "zstd" + assert isinstance(v.metadata, VariableMetadata) + assert v.metadata.units_v1.length == LengthUnitEnum.FOOT + assert v.metadata.attributes["MGA"] == 51 + assert v.metadata.attributes["UnitSystem"] == "Imperial" diff --git a/tests/unit/v1/test_dataset_builder_add_dimension.py b/tests/unit/v1/test_dataset_builder_add_dimension.py new file mode 100644 index 00000000..112e7c9c --- /dev/null +++ b/tests/unit/v1/test_dataset_builder_add_dimension.py @@ -0,0 +1,39 @@ +"""Tests the schema v1 dataset_builder.add_dimension() public API.""" + +import pytest + +from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder +from mdio.schemas.v1.dataset_builder import _BuilderState +from mdio.schemas.v1.dataset_builder import _get_named_dimension + +from .helpers import validate_builder + + +def test_add_dimension() -> None: + """Test adding dimension. Check the state transition and validate required parameters.""" + builder = MDIODatasetBuilder("test_dataset") + assert builder._state == _BuilderState.INITIAL + + # Validate required parameters + bad_name = None + with pytest.raises(ValueError, match="'name' must be a non-empty string"): + builder.add_dimension(bad_name, 200) + with pytest.raises(ValueError, match="'name' must be a non-empty string"): + builder.add_dimension("", 200) + + # First dimension should change state to HAS_DIMENSIONS and create a variable + builder.add_dimension("x", 100) + validate_builder(builder, _BuilderState.HAS_DIMENSIONS, n_dims=1, n_coords=0, n_var=0) + assert _get_named_dimension(builder._dimensions, "x", 100) is not None + + # Validate that we can't add a dimension with the same name twice + with pytest.raises( + ValueError, + match="Adding dimension with the same name twice is not allowed", + ): + builder.add_dimension("x", 200) + + # Adding dimension with the same name twice + msg = "Adding dimension with the same name twice is not allowed" + with pytest.raises(ValueError, match=msg): + builder.add_dimension("x", 200) diff --git a/tests/unit/v1/test_dataset_builder_add_variable.py b/tests/unit/v1/test_dataset_builder_add_variable.py new file mode 100644 index 00000000..9bcb97ff --- /dev/null +++ b/tests/unit/v1/test_dataset_builder_add_variable.py @@ -0,0 +1,246 @@ +"""Tests the schema v1 dataset_builder.add_variable() public API.""" + +import pytest + +from mdio.schemas.chunk_grid import RegularChunkGrid +from mdio.schemas.chunk_grid import RegularChunkShape +from mdio.schemas.compressors import Blosc +from mdio.schemas.dtype import ScalarType +from mdio.schemas.metadata import ChunkGridMetadata +from mdio.schemas.metadata import UserAttributes +from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder +from mdio.schemas.v1.dataset_builder import _BuilderState +from mdio.schemas.v1.stats import CenteredBinHistogram +from mdio.schemas.v1.stats import StatisticsMetadata +from mdio.schemas.v1.stats import SummaryStatistics +from mdio.schemas.v1.units import AllUnits +from mdio.schemas.v1.units import LengthUnitEnum +from mdio.schemas.v1.units import LengthUnitModel +from mdio.schemas.v1.variable import VariableMetadata + +from .helpers import validate_builder +from .helpers import validate_variable + + +def test_add_variable_no_coords() -> None: + """Test adding variable. Check the state transition and validate required parameters..""" + builder = MDIODatasetBuilder("test_dataset") + validate_builder(builder, _BuilderState.INITIAL, n_dims=0, n_coords=0, n_var=0) + + # Validate: Must add at least one dimension before adding variables + msg = "Must add at least one dimension before adding variables" + with pytest.raises(ValueError, match=msg): + builder.add_variable("amplitude", dimensions=["speed"], data_type=ScalarType.FLOAT32) + + # Add dimension before we can add a data variable + builder.add_dimension("inline", 100) + builder.add_dimension("crossline", 200) + builder.add_dimension("depth", 300) + + # Validate: required parameters must be preset + bad_name = None + with pytest.raises(ValueError, match="'name' must be a non-empty string"): + builder.add_variable(bad_name, dimensions=["speed"], data_type=ScalarType.FLOAT32) + with pytest.raises(ValueError, match="'name' must be a non-empty string"): + builder.add_variable("", dimensions=["speed"], data_type=ScalarType.FLOAT32) + with pytest.raises(ValueError, match="'dimensions' must be a non-empty list"): + builder.add_variable("bad_amplitude", dimensions=None, data_type=ScalarType.FLOAT32) + with pytest.raises(ValueError, match="'dimensions' must be a non-empty list"): + builder.add_variable("bad_amplitude", dimensions=[], data_type=ScalarType.FLOAT32) + + # Validate: Add a variable using non-existent dimensions is not allowed + msg = "Pre-existing dimension named 'il' is not found" + with pytest.raises(ValueError, match=msg): + builder.add_variable( + "bad_amplitude", dimensions=["il", "xl", "depth"], data_type=ScalarType.FLOAT32 + ) + + # Add a variable without coordinates + builder.add_variable( + "amplitude", dimensions=["inline", "crossline", "depth"], data_type=ScalarType.FLOAT32 + ) + validate_builder(builder, _BuilderState.HAS_VARIABLES, n_dims=3, n_coords=0, n_var=1) + validate_variable( + builder, + "amplitude", + dims=[("inline", 100), ("crossline", 200), ("depth", 300)], + coords=None, + dtype=ScalarType.FLOAT32, + ) + + # Validate: adding a variable with the same name twice is not allowed + msg = "Adding variable with the same name twice is not allowed" + with pytest.raises(ValueError, match=msg): + builder.add_variable( + "amplitude", dimensions=["inline", "crossline", "depth"], data_type=ScalarType.FLOAT32 + ) + + +def test_add_variable_with_coords() -> None: + """Test adding variable. Check the state transition and validate required parameters..""" + builder = MDIODatasetBuilder("test_dataset") + builder.add_dimension("inline", 100) + builder.add_dimension("crossline", 200) + builder.add_dimension("depth", 300) + + # Add dimension coordinates before we can add a data variable + builder.add_coordinate("inline", dimensions=["inline"], data_type=ScalarType.UINT32) + builder.add_coordinate("crossline", dimensions=["crossline"], data_type=ScalarType.UINT32) + + # Validate: adding a variable with a coordinate that has not been pre-created is not allowed + msg = "Pre-existing coordinate named 'depth' is not found" + with pytest.raises(ValueError, match=msg): + builder.add_variable( + "ampl", + dimensions=["inline", "crossline", "depth"], + coordinates=["inline", "crossline", "depth"], + data_type=ScalarType.FLOAT32, + ) + + # Add a variable with pre-defined dimension coordinates + builder.add_variable( + "ampl", + dimensions=["inline", "crossline", "depth"], + coordinates=["inline", "crossline"], + data_type=ScalarType.FLOAT32, + ) + validate_builder(builder, _BuilderState.HAS_VARIABLES, n_dims=3, n_coords=2, n_var=3) + validate_variable( + builder, + "ampl", + dims=[("inline", 100), ("crossline", 200), ("depth", 300)], + coords=["inline", "crossline"], + dtype=ScalarType.FLOAT32, + ) + + # Add non-dim coordinates (e.g., 2D coordinates) + builder.add_coordinate( + "cdp_x", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32 + ) + builder.add_coordinate( + "cdp_y", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32 + ) + + # Add a variable with pre-defined dimension and non-dimension coordinates + builder.add_variable( + "ampl2", + dimensions=["inline", "crossline", "depth"], + coordinates=["inline", "crossline", "cdp_x", "cdp_y"], + data_type=ScalarType.FLOAT32, + ) + validate_builder(builder, _BuilderState.HAS_VARIABLES, n_dims=3, n_coords=4, n_var=6) + validate_variable( + builder, + "ampl2", + dims=[("inline", 100), ("crossline", 200), ("depth", 300)], + coords=["inline", "crossline", "cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT32, + ) + + +def test_add_variable_with_defaults() -> None: + """Test adding variable with default arguments.""" + builder = MDIODatasetBuilder("test_dataset") + # Add dimensions before we can add a data variables + builder.add_dimension("inline", 100) + builder.add_dimension("crossline", 200) + builder.add_dimension("depth", 300) + # Add dimension coordinates + builder.add_coordinate("inline", dimensions=["inline"], data_type=ScalarType.UINT32) + builder.add_coordinate("crossline", dimensions=["crossline"], data_type=ScalarType.UINT32) + builder.add_coordinate( + "depth", + dimensions=["depth"], + data_type=ScalarType.UINT32, + metadata_info=[AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.METER))], + ) + + # Add data variable using defaults + builder.add_variable( + "ampl", dimensions=["inline", "crossline", "depth"], data_type=ScalarType.FLOAT32 + ) + validate_builder(builder, _BuilderState.HAS_VARIABLES, n_dims=3, n_coords=3, n_var=4) + v = validate_variable( + builder, + "ampl", + dims=[("inline", 100), ("crossline", 200), ("depth", 300)], + coords=None, + dtype=ScalarType.FLOAT32, + ) + assert v.long_name is None # Default value + assert v.compressor is None # Default value + assert v.coordinates is None # Default value + assert v.metadata is None # Default value + + +def test_add_variable_full_parameters() -> None: + """Test adding variable with full parameters.""" + builder = MDIODatasetBuilder("test_dataset") + # Add dimensions before we can add a data variables + builder.add_dimension("inline", 100) + builder.add_dimension("crossline", 200) + builder.add_dimension("depth", 300) + # Add dimension coordinates + builder.add_coordinate("inline", dimensions=["inline"], data_type=ScalarType.UINT32) + builder.add_coordinate("crossline", dimensions=["crossline"], data_type=ScalarType.UINT32) + builder.add_coordinate("depth", dimensions=["depth"], data_type=ScalarType.UINT32) + # Add coordinates before we can add a data variable + builder.add_coordinate( + "cdp_x", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT64 + ) + builder.add_coordinate( + "cdp_y", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT64 + ) + + # Add data variable with full parameters + builder.add_variable( + "ampl", + long_name="Amplitude (dimensionless)", + dimensions=["inline", "crossline", "depth"], + data_type=ScalarType.FLOAT32, + compressor=Blosc(algorithm="zstd"), + coordinates=["inline", "crossline", "depth", "cdp_x", "cdp_y"], + metadata_info=[ + AllUnits(units_v1=LengthUnitModel(length=LengthUnitEnum.FOOT)), + UserAttributes(attributes={"MGA": 51, "UnitSystem": "Imperial"}), + ChunkGridMetadata( + chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=[20])) + ), + StatisticsMetadata( + stats_v1=SummaryStatistics( + count=100, + sum=1215.1, + sumSquares=125.12, + min=5.61, + max=10.84, + histogram=CenteredBinHistogram(binCenters=[1, 2], counts=[10, 15]), + ) + ), + ], + ) + validate_builder(builder, _BuilderState.HAS_VARIABLES, n_dims=3, n_coords=5, n_var=6) + v = validate_variable( + builder, + "ampl", + dims=[("inline", 100), ("crossline", 200), ("depth", 300)], + coords=["inline", "crossline", "depth", "cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT32, + ) + assert v.long_name == "Amplitude (dimensionless)" + assert isinstance(v.compressor, Blosc) + assert v.compressor.algorithm == "zstd" + assert len(v.coordinates) == 5 + assert v.metadata.stats_v1.count == 100 + assert isinstance(v.metadata, VariableMetadata) + assert v.metadata.units_v1.length == LengthUnitEnum.FOOT + assert v.metadata.attributes["MGA"] == 51 + assert v.metadata.attributes["UnitSystem"] == "Imperial" + assert v.metadata.chunk_grid.name == "regular" + assert v.metadata.chunk_grid.configuration.chunk_shape == [20] + assert v.metadata.stats_v1.count == 100 + assert v.metadata.stats_v1.sum == 1215.1 + assert v.metadata.stats_v1.sum_squares == 125.12 + assert v.metadata.stats_v1.min == 5.61 + assert v.metadata.stats_v1.max == 10.84 + assert v.metadata.stats_v1.histogram.bin_centers == [1, 2] + assert v.metadata.stats_v1.histogram.counts == [10, 15] diff --git a/tests/unit/v1/test_dataset_builder_build.py b/tests/unit/v1/test_dataset_builder_build.py new file mode 100644 index 00000000..a5d817e1 --- /dev/null +++ b/tests/unit/v1/test_dataset_builder_build.py @@ -0,0 +1,152 @@ +"""Tests the schema v1 dataset_builder.build() public API.""" + +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredField +from mdio.schemas.dtype import StructuredType +from mdio.schemas.v1.dataset import Dataset +from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder +from mdio.schemas.v1.units import LengthUnitEnum +from mdio.schemas.v1.units import SpeedUnitEnum + +from .helpers import make_seismic_poststack_3d_acceptance_dataset +from .helpers import validate_variable + + +def test_build() -> None: + """Test building a complete dataset.""" + dataset = ( + MDIODatasetBuilder("test_dataset") + .add_dimension("inline", 100) + .add_dimension("crossline", 200) + .add_coordinate("inline", dimensions=["inline"], data_type=ScalarType.FLOAT64) + .add_coordinate("crossline", dimensions=["crossline"], data_type=ScalarType.FLOAT64) + .add_coordinate("x_coord", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32) + .add_coordinate("y_coord", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32) + .add_variable( + "data", + long_name="Test Data", + dimensions=["inline", "crossline"], + coordinates=["inline", "crossline", "x_coord", "y_coord"], + data_type=ScalarType.FLOAT32, + ) + .build() + ) + + assert isinstance(dataset, Dataset) + assert dataset.metadata.name == "test_dataset" + # 2 dim coord var + 2 non-dim coord var + 1 data variables = 5 variables + assert len(dataset.variables) == 5 + assert next(v for v in dataset.variables if v.name == "inline") is not None + assert next(v for v in dataset.variables if v.name == "crossline") is not None + assert next(v for v in dataset.variables if v.name == "x_coord") is not None + assert next(v for v in dataset.variables if v.name == "y_coord") is not None + assert next(v for v in dataset.variables if v.name == "data") is not None + + +def test_build_seismic_poststack_3d_acceptance_dataset() -> None: # noqa: PLR0915 Too many statements (57 > 50) + """Test building a Seismic PostStack 3D Acceptance dataset.""" + dataset = make_seismic_poststack_3d_acceptance_dataset("Seismic") + + # Verify dataset structure + assert dataset.metadata.name == "Seismic" + assert dataset.metadata.api_version == "1.0.0a1" + assert dataset.metadata.attributes["foo"] == "bar" + assert len(dataset.metadata.attributes["textHeader"]) == 3 + + # Verify variables (including dimension variables) + # 3 dimension variables + 4 data variables + 2 coordinate variables + assert len(dataset.variables) == 9 + + # Verify dimension coordinate variables + validate_variable( + dataset, name="inline", dims=[("inline", 256)], coords=["inline"], dtype=ScalarType.UINT32 + ) + + validate_variable( + dataset, + name="crossline", + dims=[("crossline", 512)], + coords=["crossline"], + dtype=ScalarType.UINT32, + ) + + depth = validate_variable( + dataset, name="depth", dims=[("depth", 384)], coords=["depth"], dtype=ScalarType.UINT32 + ) + assert depth.metadata.units_v1.length == LengthUnitEnum.METER + + # Verify coordinate variables + cdp_x = validate_variable( + dataset, + name="cdp_x", + dims=[("inline", 256), ("crossline", 512)], + coords=["cdp_x"], + dtype=ScalarType.FLOAT32, + ) + assert cdp_x.metadata.units_v1.length == LengthUnitEnum.METER + + cdp_y = validate_variable( + dataset, + name="cdp_y", + dims=[("inline", 256), ("crossline", 512)], + coords=["cdp_y"], + dtype=ScalarType.FLOAT32, + ) + assert cdp_y.metadata.units_v1.length == LengthUnitEnum.METER + + # Verify data variables + image = validate_variable( + dataset, + name="image", + dims=[("inline", 256), ("crossline", 512), ("depth", 384)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT32, + ) + assert image.metadata.units_v1 is None # No units defined for image + assert image.compressor.algorithm == "zstd" + assert image.metadata.chunk_grid.configuration.chunk_shape == [128, 128, 128] + assert image.metadata.stats_v1.count == 100 + + velocity = validate_variable( + dataset, + name="velocity", + dims=[("inline", 256), ("crossline", 512), ("depth", 384)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT16, + ) + assert velocity.compressor is None + assert velocity.metadata.chunk_grid.configuration.chunk_shape == [128, 128, 128] + assert velocity.metadata.units_v1.speed == SpeedUnitEnum.METER_PER_SECOND + + image_inline = validate_variable( + dataset, + name="image_inline", + dims=[("inline", 256), ("crossline", 512), ("depth", 384)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT32, + ) + assert image_inline.long_name == "inline optimized version of 3d_stack" + assert image_inline.compressor.algorithm == "zstd" + assert image_inline.metadata.chunk_grid.configuration.chunk_shape == [4, 512, 512] + + # Verify image_headers variable + headers = next(v for v in dataset.variables if v.name == "image_headers") + assert isinstance(headers.data_type, StructuredType) + assert len(headers.data_type.fields) == 4 + assert headers.data_type.fields[0].name == "cdp_x" + + headers = validate_variable( + dataset, + name="image_headers", + dims=[("inline", 256), ("crossline", 512)], + coords=["cdp_x", "cdp_y"], + dtype=StructuredType( + fields=[ + StructuredField(name="cdp_x", format=ScalarType.INT32), + StructuredField(name="cdp_y", format=ScalarType.INT32), + StructuredField(name="elevation", format=ScalarType.FLOAT16), + StructuredField(name="some_scalar", format=ScalarType.FLOAT16), + ] + ), + ) + assert headers.metadata.chunk_grid.configuration.chunk_shape == [128, 128] diff --git a/tests/unit/v1/test_dataset_builder_helpers.py b/tests/unit/v1/test_dataset_builder_helpers.py new file mode 100644 index 00000000..619321d8 --- /dev/null +++ b/tests/unit/v1/test_dataset_builder_helpers.py @@ -0,0 +1,94 @@ +"""Tests the schema v1 dataset_builder internal methods.""" + +from datetime import UTC +from datetime import datetime + +import pytest +from pydantic import Field + +from mdio.schemas.core import StrictModel +from mdio.schemas.dimension import NamedDimension +from mdio.schemas.v1.dataset_builder import _get_named_dimension +from mdio.schemas.v1.dataset_builder import _to_dictionary + + +def test__get_named_dimension() -> None: + """Test getting a dimension by name from the list of dimensions.""" + dimensions = [NamedDimension(name="inline", size=2), NamedDimension(name="crossline", size=3)] + + assert _get_named_dimension([], "inline") is None + assert _get_named_dimension(dimensions, "inline") == NamedDimension(name="inline", size=2) + assert _get_named_dimension(dimensions, "crossline") == NamedDimension(name="crossline", size=3) + assert _get_named_dimension(dimensions, "time") is None + + with pytest.raises(TypeError, match="Expected str, got NoneType"): + _get_named_dimension(dimensions, None) + with pytest.raises(TypeError, match="Expected str, got int"): + _get_named_dimension(dimensions, 42) + with pytest.raises( + ValueError, match="Dimension 'inline' found but size 2 does not match expected size 200" + ): + _get_named_dimension(dimensions, "inline", size=200) + + +def test__to_dictionary() -> None: + """Test converting a dictionary, list or pydantic BaseModel to a dictionary.""" + # Validate inputs + with pytest.raises(TypeError, match="Expected BaseModel, dict or list, got datetime"): + _to_dictionary(datetime.now(UTC)) + + # Convert None to None + result = _to_dictionary(None) + assert result is None + + # Validate conversion of a Pydantic BaseModel + class SomeModel(StrictModel): + count: int = Field(default=None, description="Samples count") + samples: list[float] = Field(default_factory=list, description="Samples.") + created: datetime = Field( + default_factory=datetime.now, description="Creation time with TZ info." + ) + + md = SomeModel( + count=3, samples=[1.0, 2.0, 3.0], created=datetime(2023, 10, 1, 12, 0, 0, tzinfo=UTC) + ) + result = _to_dictionary(md) + assert isinstance(result, dict) + assert result == {"count": 3, "created": "2023-10-01T12:00:00Z", "samples": [1.0, 2.0, 3.0]} + + # Validate conversion of a dictionary + dct = { + "count": 3, + "samples": [1.0, 2.0, 3.0], + "created": datetime(2023, 10, 1, 12, 0, 0, tzinfo=UTC), + } + result = _to_dictionary(dct) + assert isinstance(result, dict) + assert result == { + "count": 3, + "samples": [1.0, 2.0, 3.0], + "created": datetime(2023, 10, 1, 12, 0, 0, tzinfo=UTC), + } + + # Validate conversion of a dictionary + lst = [ + None, + SomeModel( + count=3, samples=[1.0, 2.0, 3.0], created=datetime(2023, 10, 1, 12, 0, 0, tzinfo=UTC) + ), + { + "count2": 3, + "samples2": [1.0, 2.0, 3.0], + "created2": datetime(2023, 10, 1, 12, 0, 0, tzinfo=UTC), + }, + ] + result = _to_dictionary(lst) + assert isinstance(result, dict) + assert result == { + "count": 3, + "samples": [1.0, 2.0, 3.0], + "created": "2023-10-01T12:00:00Z", + "count2": 3, + "samples2": [1.0, 2.0, 3.0], + "created2": datetime(2023, 10, 1, 12, 0, 0, tzinfo=UTC), + } diff --git a/tests/unit/v1/test_dataset_serializer.py b/tests/unit/v1/test_dataset_serializer.py new file mode 100644 index 00000000..4ad2040e --- /dev/null +++ b/tests/unit/v1/test_dataset_serializer.py @@ -0,0 +1,459 @@ +"""Tests the schema v1 dataset_serializer public API.""" + +from pathlib import Path + +import pytest +from dask import array as dask_array +from numpy import array as np_array +from numpy import dtype as np_dtype +from numpy import isnan as np_isnan +from numpy import zeros as np_zeros +from xarray import DataArray as xr_DataArray +from zarr import zeros as zarr_zeros + +from mdio.constants import fill_value_map +from mdio.schemas.chunk_grid import RegularChunkGrid +from mdio.schemas.chunk_grid import RegularChunkShape +from mdio.schemas.dimension import NamedDimension +from mdio.schemas.dtype import ScalarType +from mdio.schemas.dtype import StructuredField +from mdio.schemas.dtype import StructuredType +from mdio.schemas.metadata import ChunkGridMetadata +from mdio.schemas.v1.dataset import Dataset +from mdio.schemas.v1.dataset import DatasetInfo +from mdio.schemas.v1.dataset_builder import MDIODatasetBuilder +from mdio.schemas.v1.dataset_builder import _to_dictionary +from mdio.schemas.v1.dataset_serializer import _convert_compressor +from mdio.schemas.v1.dataset_serializer import _get_all_named_dimensions +from mdio.schemas.v1.dataset_serializer import _get_coord_names +from mdio.schemas.v1.dataset_serializer import _get_dimension_names +from mdio.schemas.v1.dataset_serializer import _get_fill_value +from mdio.schemas.v1.dataset_serializer import _get_zarr_chunks +from mdio.schemas.v1.dataset_serializer import _get_zarr_shape +from mdio.schemas.v1.dataset_serializer import to_xarray_dataset +from mdio.schemas.v1.variable import Coordinate +from mdio.schemas.v1.variable import Variable + +from .helpers import make_seismic_poststack_3d_acceptance_dataset +from .helpers import output_path + +try: + from zfpy import ZFPY + + HAS_ZFPY = True +except ImportError: + ZFPY = None + HAS_ZFPY = False + +from numcodecs import Blosc as nc_Blosc + +from mdio.schemas.compressors import ZFP as MDIO_ZFP +from mdio.schemas.compressors import Blosc as mdio_Blosc +from mdio.schemas.compressors import BloscAlgorithm as mdio_BloscAlgorithm +from mdio.schemas.compressors import BloscShuffle as mdio_BloscShuffle +from mdio.schemas.compressors import ZFPMode as mdio_ZFPMode + + +def test_get_all_named_dimensions() -> None: + """Test _get_all_named_dimensions function.""" + dim1 = NamedDimension(name="inline", size=100) + dim2 = NamedDimension(name="crossline", size=200) + dim3 = NamedDimension(name="depth", size=300) + v1 = Variable(name="named_dims", data_type=ScalarType.FLOAT32, dimensions=[dim1, dim2, dim3]) + v2 = Variable( + name="string_dims", + data_type=ScalarType.FLOAT32, + dimensions=["inline", "crossline", "depth"], + ) + v3 = Variable(name="unresolved_dims", data_type=ScalarType.FLOAT32, dimensions=["x", "y", "z"]) + ds = Dataset( + variables=[v1, v2, v3], + metadata=_to_dictionary( + [ + DatasetInfo( + name="test_dataset", api_version="1.0.0", created_on="2023-10-01T00:00:00Z" + ) + ] + ), + ) + + all_dims = _get_all_named_dimensions(ds) + # Only 3 named dimensions could be resolved. + # The dimension names "x", "y', "z" are unresolvable. + assert set(all_dims) == {"inline", "crossline", "depth"} + + +def test_get_dimension_names() -> None: + """Test _get_dimension_names function with various dimension types.""" + dim1 = NamedDimension(name="inline", size=100) + dim2 = NamedDimension(name="crossline", size=200) + + # Test case 1: Variable with NamedDimension + var_named_dims = Variable( + name="Variable with NamedDimension dimensions", + data_type=ScalarType.FLOAT32, + dimensions=[dim1, dim2], + ) + assert set(_get_dimension_names(var_named_dims)) == {"inline", "crossline"} + + # Test case 2: Variable with string dimensions + var_string_dims = Variable( + name="Variable with string dimensions", + data_type=ScalarType.FLOAT32, + dimensions=["x", "y", "z"], + ) + assert set(_get_dimension_names(var_string_dims)) == {"x", "y", "z"} + + # Test case 3: Mixed NamedDimension and string dimensions + # NOTE: mixing NamedDimension and string dimensions is not allowed by the Variable schema + + +def test_get_coord_names() -> None: + """Comprehensive test for _get_coord_names function covering all scenarios.""" + dim1 = NamedDimension(name="inline", size=100) + dim2 = NamedDimension(name="crossline", size=200) + + # Test 1: Variable with Coordinate objects + coord1 = Coordinate(name="x_coord", dimensions=[dim1, dim2], data_type=ScalarType.FLOAT32) + coord2 = Coordinate(name="y_coord", dimensions=[dim1, dim2], data_type=ScalarType.FLOAT64) + variable_coords = Variable( + name="Variable with Coordinate objects", + data_type=ScalarType.FLOAT32, + dimensions=[dim1, dim2], + coordinates=[coord1, coord2], + ) + assert set(_get_coord_names(variable_coords)) == {"x_coord", "y_coord"} + + # Test 2: Variable with string coordinates + variable_strings = Variable( + name="Variable with string coordinates", + data_type=ScalarType.FLOAT32, + dimensions=[dim1, dim2], + coordinates=["lat", "lon", "time"], + ) + assert set(_get_coord_names(variable_strings)) == {"lat", "lon", "time"} + + # Test 3: Variable with mixed coordinate types + # NOTE: mixing Coordinate objects and coordinate name strings is not allowed by the + # Variable schema + + +def test_get_zarr_shape() -> None: + """Test for _get_zarr_shape function.""" + d1 = NamedDimension(name="inline", size=100) + d2 = NamedDimension(name="crossline", size=200) + d3 = NamedDimension(name="depth", size=300) + all_named_dims = {"inline": d1, "crossline": d2, "depth": d3} + v1 = Variable(name="named dims var", data_type=ScalarType.FLOAT32, dimensions=[d1, d2, d3]) + v2 = Variable( + name="str var", data_type=ScalarType.FLOAT32, dimensions=["inline", "crossline", "depth"] + ) + Dataset( + variables=[v1, v2], + metadata=_to_dictionary( + [ + DatasetInfo( + name="test_dataset", api_version="1.0.0", created_on="2023-10-01T00:00:00Z" + ) + ] + ), + ) + + assert _get_zarr_shape(v1, all_named_dims) == (100, 200, 300) + assert _get_zarr_shape(v2, all_named_dims) == (100, 200, 300) + + +def test_get_zarr_chunks() -> None: + """Test for _get_zarr_chunks function.""" + d1 = NamedDimension(name="inline", size=100) + d2 = NamedDimension(name="crossline", size=200) + d3 = NamedDimension(name="depth", size=300) + + # Test 1: Variable with chunk defined in metadata + v = Variable( + name="seismic 3d var", + data_type=ScalarType.FLOAT32, + dimensions=[d1, d2, d3], + metadata=_to_dictionary( + ChunkGridMetadata( + chunk_grid=RegularChunkGrid( + configuration=RegularChunkShape(chunk_shape=[10, 20, 30]) + ) + ) + ), + ) + assert _get_zarr_chunks(v, all_named_dims=[d1, d2, d3]) == (10, 20, 30) + + # Test 2: Variable with no chunks defined + v = Variable(name="seismic 3d var", data_type=ScalarType.FLOAT32, dimensions=[d1, d2, d3]) + assert _get_zarr_chunks(v, all_named_dims=[d1, d2, d3]) == (100, 200, 300) + + +def test_get_fill_value() -> None: + """Test for _get_fill_value function.""" + # Test 1: ScalarType cases - should return values from fill_value_map + scalar_types = [ + ScalarType.BOOL, + ] + for scalar_type in scalar_types: + assert _get_fill_value(scalar_type) is None + + scalar_types = [ + ScalarType.FLOAT16, + ScalarType.FLOAT32, + ScalarType.FLOAT64, + ] + for scalar_type in scalar_types: + assert np_isnan(_get_fill_value(scalar_type)) + + scalar_types = [ + ScalarType.UINT8, + ScalarType.UINT16, + ScalarType.UINT32, + ScalarType.INT8, + ScalarType.INT16, + ScalarType.INT32, + ] + for scalar_type in scalar_types: + fill_value = _get_fill_value(scalar_type) + assert fill_value_map[scalar_type] == fill_value + + scalar_types = [ + ScalarType.COMPLEX64, + ScalarType.COMPLEX128, + ScalarType.COMPLEX256, + ] + for scalar_type in scalar_types: + val = _get_fill_value(scalar_type) + assert isinstance(val, complex) + assert np_isnan(val.real) + assert np_isnan(val.imag) + + # Test 2: StructuredType + f1 = StructuredField(name="cdp_x", format=ScalarType.INT32) + f2 = StructuredField(name="cdp_y", format=ScalarType.INT32) + f3 = StructuredField(name="elevation", format=ScalarType.FLOAT16) + f4 = StructuredField(name="some_scalar", format=ScalarType.FLOAT16) + structured_type = StructuredType(fields=[f1, f2, f3, f4]) + + expected = np_array( + (0, 0, 0.0, 0.0), + dtype=np_dtype( + [("cdp_x", " None: + """Simple test for _convert_compressor function covering basic scenarios.""" + # Test 1: None input - should return None + result_none = _convert_compressor(None) + assert result_none is None + + # Test 2: mdio_Blosc compressor - should return nc_Blosc + result_blosc = _convert_compressor( + mdio_Blosc( + algorithm=mdio_BloscAlgorithm.LZ4, + level=5, + shuffle=mdio_BloscShuffle.AUTOSHUFFLE, + blocksize=1024, + ) + ) + assert isinstance(result_blosc, nc_Blosc) + assert result_blosc.cname == "lz4" # BloscAlgorithm.LZ4.value + assert result_blosc.clevel == 5 + assert result_blosc.shuffle == -1 # BloscShuffle.UTOSHUFFLE = -1 + assert result_blosc.blocksize == 1024 + + # Test 3: mdio_Blosc with blocksize 0 - should use 0 as blocksize + result_blosc_zero = _convert_compressor( + mdio_Blosc( + algorithm=mdio_BloscAlgorithm.ZSTD, + level=3, + shuffle=mdio_BloscShuffle.AUTOSHUFFLE, + blocksize=0, + ) + ) + assert isinstance(result_blosc_zero, nc_Blosc) + assert result_blosc_zero.blocksize == 0 + + # Test 4: mdio_ZFP compressor - should return zfpy_ZFPY if available + zfp_compressor = MDIO_ZFP(mode=mdio_ZFPMode.FIXED_RATE, tolerance=0.01, rate=8.0, precision=16) + + if HAS_ZFPY: + result_zfp = _convert_compressor(zfp_compressor) + assert isinstance(result_zfp, ZFPY) + assert result_zfp.mode == 1 # ZFPMode.FIXED_RATE.value = "fixed_rate" + assert result_zfp.tolerance == 0.01 + assert result_zfp.rate == 8.0 + assert result_zfp.precision == 16 + else: + # Test 5: mdio_ZFP without zfpy installed - should raise ImportError + with pytest.raises(ImportError) as exc_info: + _convert_compressor(zfp_compressor) + error_message = str(exc_info.value) + assert "zfpy and numcodecs are required to use ZFP compression" in error_message + + # Test 6: Unsupported compressor type - should raise TypeError + unsupported_compressor = "invalid_compressor" + with pytest.raises(TypeError) as exc_info: + _convert_compressor(unsupported_compressor) + error_message = str(exc_info.value) + assert "Unsupported compressor model" in error_message + assert "" in error_message + + +def test_to_xarray_dataset(tmp_path: Path) -> None: + """Test building a complete dataset.""" + dataset = ( + MDIODatasetBuilder("test_dataset") + .add_dimension("inline", 100) + .add_dimension("crossline", 200) + .add_dimension("depth", 300) + .add_coordinate("inline", dimensions=["inline"], data_type=ScalarType.FLOAT64) + .add_coordinate("crossline", dimensions=["crossline"], data_type=ScalarType.FLOAT64) + .add_coordinate("x_coord", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32) + .add_coordinate("y_coord", dimensions=["inline", "crossline"], data_type=ScalarType.FLOAT32) + .add_variable( + "data", + long_name="Test Data", + dimensions=["inline", "crossline", "depth"], + coordinates=["inline", "crossline", "x_coord", "y_coord"], + data_type=ScalarType.FLOAT32, + ) + .build() + ) + + xr_ds = to_xarray_dataset(dataset) + + file_path = output_path(tmp_path, f"{xr_ds.attrs['name']}", debugging=False) + xr_ds.to_zarr(store=file_path, mode="w", zarr_format=2, compute=False) + + +def test_seismic_poststack_3d_acceptance_to_xarray_dataset(tmp_path: Path) -> None: + """Test building a complete dataset.""" + dataset = make_seismic_poststack_3d_acceptance_dataset("Seismic") + + xr_ds = to_xarray_dataset(dataset) + + file_path = output_path(tmp_path, f"{xr_ds.attrs['name']}", debugging=False) + xr_ds.to_zarr(store=file_path, mode="w", zarr_format=2, compute=False) + + +@pytest.mark.skip(reason="Bug reproducer for the issue 582") +def test_buf_reproducer_dask_to_zarr(tmp_path: Path) -> None: + """Bug reproducer for the issue https://github.com/TGSAI/mdio-python/issues/582.""" + # TODO(Dmitriy Repin): Remove this test after the bug is fixed + # https://github.com/TGSAI/mdio-python/issues/582 + + # Create a data type and the fill value + dtype = np_dtype([("inline", "int32"), ("cdp_x", "float64")]) + dtype_fill_value = np_zeros((), dtype=dtype) + + # Use '_FillValue' instead of 'fill_value' + # 'fill_value' is not a valid encoding key in Zarr v2 + my_attr_encoding = { + "_FillValue": dtype_fill_value, + "chunk_key_encoding": {"name": "v2", "separator": "/"}, + } + + # Create a dask array using the data type + # Do not specify encoding as the array attribute + data = dask_array.zeros((36,), dtype=dtype, chunks=(36,)) + aa = xr_DataArray(name="myattr", data=data) + + # Specify encoding per array + encoding = {"myattr": my_attr_encoding} + file_path = output_path(tmp_path, "to_zarr/zarr_dask", debugging=False) + aa.to_zarr(file_path, mode="w", zarr_format=2, encoding=encoding, compute=False) + + +def test_to_zarr_from_zarr_zeros_1(tmp_path: Path) -> None: + """Test writing XArray dataset with data as Zarr zero array to Zarr. + + Set encoding in as DataArray attributes + """ + # Create a data type and the fill value + dtype = np_dtype([("inline", "int32"), ("cdp_x", "float64")]) + dtype_fill_value = np_zeros((), dtype=dtype) + + # Use '_FillValue' instead of 'fill_value' + # 'fill_value' is not a valid encoding key in Zarr v2 + my_attr_encoding = { + "_FillValue": dtype_fill_value, + "chunk_key_encoding": {"name": "v2", "separator": "/"}, + } + + # Create a zarr array using the data type, + # Specify encoding as the array attribute + data = zarr_zeros((36, 36), dtype=dtype, zarr_format=2) + aa = xr_DataArray(name="myattr", data=data) + aa.encoding = my_attr_encoding + + file_path = output_path(tmp_path, "to_zarr/zarr_zarr_zerros_1", debugging=False) + aa.to_zarr(file_path, mode="w", zarr_format=2, compute=False) + + +def test_to_zarr_from_zarr_zeros_2(tmp_path: Path) -> None: + """Test writing XArray dataset with data as Zarr zero array to Zarr. + + Set encoding in the to_zar method + """ + # Create a data type and the fill value + dtype = np_dtype([("inline", "int32"), ("cdp_x", "float64")]) + dtype_fill_value = np_zeros((), dtype=dtype) + + # Use '_FillValue' instead of 'fill_value' + # 'fill_value' is not a valid encoding key in Zarr v2 + my_attr_encoding = { + "_FillValue": dtype_fill_value, + "chunk_key_encoding": {"name": "v2", "separator": "/"}, + } + + # Create a zarr array using the data type, + # Do not specify encoding as the array attribute + data = zarr_zeros((36, 36), dtype=dtype, zarr_format=2) + aa = xr_DataArray(name="myattr", data=data) + + file_path = output_path(tmp_path, "to_zarr/zarr_zarr_zerros_2", debugging=False) + # Specify encoding per array + encoding = {"myattr": my_attr_encoding} + aa.to_zarr(file_path, mode="w", zarr_format=2, encoding=encoding, compute=False) + + +def test_to_zarr_from_np(tmp_path: Path) -> None: + """Test writing XArray dataset with data as NumPy array to Zarr.""" + # Create a data type and the fill value + dtype = np_dtype([("inline", "int32"), ("cdp_x", "float64")]) + dtype_fill_value = np_zeros((), dtype=dtype) + + # Use '_FillValue' instead of 'fill_value' + # 'fill_value' is not a valid encoding key in Zarr v2 + my_attr_encoding = { + "_FillValue": dtype_fill_value, + "chunk_key_encoding": {"name": "v2", "separator": "/"}, + } + + # Create a zarr array using the data type + # Do not specify encoding as the array attribute + data = np_zeros((36, 36), dtype=dtype) + aa = xr_DataArray(name="myattr", data=data) + + file_path = output_path(tmp_path, "to_zarr/zarr_np", debugging=False) + # Specify encoding per array + encoding = {"myattr": my_attr_encoding} + aa.to_zarr(file_path, mode="w", zarr_format=2, encoding=encoding, compute=False) diff --git a/uv.lock b/uv.lock index f2de669f..9bb4ff52 100644 --- a/uv.lock +++ b/uv.lock @@ -212,18 +212,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" }, ] -[[package]] -name = "authlib" -version = "1.6.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cryptography" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8e/a1/d8d1c6f8bc922c0b87ae0d933a8ed57be1bef6970894ed79c2852a153cd3/authlib-1.6.1.tar.gz", hash = "sha256:4dffdbb1460ba6ec8c17981a4c67af7d8af131231b5a36a88a1e8c80c111cdfd", size = 159988, upload-time = "2025-07-20T07:38:42.834Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/58/cc6a08053f822f98f334d38a27687b69c6655fb05cd74a7a5e70a2aeed95/authlib-1.6.1-py2.py3-none-any.whl", hash = "sha256:e9d2031c34c6309373ab845afc24168fe9e93dc52d252631f52642f21f5ed06e", size = 239299, upload-time = "2025-07-20T07:38:39.259Z" }, -] - [[package]] name = "azure-core" version = "1.35.0" @@ -745,23 +733,23 @@ wheels = [ [[package]] name = "debugpy" -version = "1.8.15" +version = "1.8.16" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/3a9a28ddb750a76eaec445c7f4d3147ea2c579a97dbd9e25d39001b92b21/debugpy-1.8.15.tar.gz", hash = "sha256:58d7a20b7773ab5ee6bdfb2e6cf622fdf1e40c9d5aef2857d85391526719ac00", size = 1643279, upload-time = "2025-07-15T16:43:29.135Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/d4/722d0bcc7986172ac2ef3c979ad56a1030e3afd44ced136d45f8142b1f4a/debugpy-1.8.16.tar.gz", hash = "sha256:31e69a1feb1cf6b51efbed3f6c9b0ef03bc46ff050679c4be7ea6d2e23540870", size = 1643809, upload-time = "2025-08-06T18:00:02.647Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/b3/1c44a2ed311199ab11c2299c9474a6c7cd80d19278defd333aeb7c287995/debugpy-1.8.15-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:babc4fb1962dd6a37e94d611280e3d0d11a1f5e6c72ac9b3d87a08212c4b6dd3", size = 2183442, upload-time = "2025-07-15T16:43:36.733Z" }, - { url = "https://files.pythonhosted.org/packages/f6/69/e2dcb721491e1c294d348681227c9b44fb95218f379aa88e12a19d85528d/debugpy-1.8.15-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f778e68f2986a58479d0ac4f643e0b8c82fdd97c2e200d4d61e7c2d13838eb53", size = 3134215, upload-time = "2025-07-15T16:43:38.116Z" }, - { url = "https://files.pythonhosted.org/packages/17/76/4ce63b95d8294dcf2fd1820860b300a420d077df4e93afcaa25a984c2ca7/debugpy-1.8.15-cp311-cp311-win32.whl", hash = "sha256:f9d1b5abd75cd965e2deabb1a06b0e93a1546f31f9f621d2705e78104377c702", size = 5154037, upload-time = "2025-07-15T16:43:39.471Z" }, - { url = "https://files.pythonhosted.org/packages/c2/a7/e5a7c784465eb9c976d84408873d597dc7ce74a0fc69ed009548a1a94813/debugpy-1.8.15-cp311-cp311-win_amd64.whl", hash = "sha256:62954fb904bec463e2b5a415777f6d1926c97febb08ef1694da0e5d1463c5c3b", size = 5178133, upload-time = "2025-07-15T16:43:40.969Z" }, - { url = "https://files.pythonhosted.org/packages/ab/4a/4508d256e52897f5cdfee6a6d7580974811e911c6d01321df3264508a5ac/debugpy-1.8.15-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:3dcc7225cb317469721ab5136cda9ff9c8b6e6fb43e87c9e15d5b108b99d01ba", size = 2511197, upload-time = "2025-07-15T16:43:42.343Z" }, - { url = "https://files.pythonhosted.org/packages/99/8d/7f6ef1097e7fecf26b4ef72338d08e41644a41b7ee958a19f494ffcffc29/debugpy-1.8.15-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:047a493ca93c85ccede1dbbaf4e66816794bdc214213dde41a9a61e42d27f8fc", size = 4229517, upload-time = "2025-07-15T16:43:44.14Z" }, - { url = "https://files.pythonhosted.org/packages/3f/e8/e8c6a9aa33a9c9c6dacbf31747384f6ed2adde4de2e9693c766bdf323aa3/debugpy-1.8.15-cp312-cp312-win32.whl", hash = "sha256:b08e9b0bc260cf324c890626961dad4ffd973f7568fbf57feb3c3a65ab6b6327", size = 5276132, upload-time = "2025-07-15T16:43:45.529Z" }, - { url = "https://files.pythonhosted.org/packages/e9/ad/231050c6177b3476b85fcea01e565dac83607b5233d003ff067e2ee44d8f/debugpy-1.8.15-cp312-cp312-win_amd64.whl", hash = "sha256:e2a4fe357c92334272eb2845fcfcdbec3ef9f22c16cf613c388ac0887aed15fa", size = 5317645, upload-time = "2025-07-15T16:43:46.968Z" }, - { url = "https://files.pythonhosted.org/packages/28/70/2928aad2310726d5920b18ed9f54b9f06df5aa4c10cf9b45fa18ff0ab7e8/debugpy-1.8.15-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:f5e01291ad7d6649aed5773256c5bba7a1a556196300232de1474c3c372592bf", size = 2495538, upload-time = "2025-07-15T16:43:48.927Z" }, - { url = "https://files.pythonhosted.org/packages/9e/c6/9b8ffb4ca91fac8b2877eef63c9cc0e87dd2570b1120054c272815ec4cd0/debugpy-1.8.15-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94dc0f0d00e528d915e0ce1c78e771475b2335b376c49afcc7382ee0b146bab6", size = 4221874, upload-time = "2025-07-15T16:43:50.282Z" }, - { url = "https://files.pythonhosted.org/packages/55/8a/9b8d59674b4bf489318c7c46a1aab58e606e583651438084b7e029bf3c43/debugpy-1.8.15-cp313-cp313-win32.whl", hash = "sha256:fcf0748d4f6e25f89dc5e013d1129ca6f26ad4da405e0723a4f704583896a709", size = 5275949, upload-time = "2025-07-15T16:43:52.079Z" }, - { url = "https://files.pythonhosted.org/packages/72/83/9e58e6fdfa8710a5e6ec06c2401241b9ad48b71c0a7eb99570a1f1edb1d3/debugpy-1.8.15-cp313-cp313-win_amd64.whl", hash = "sha256:73c943776cb83e36baf95e8f7f8da765896fd94b05991e7bc162456d25500683", size = 5317720, upload-time = "2025-07-15T16:43:53.703Z" }, - { url = "https://files.pythonhosted.org/packages/07/d5/98748d9860e767a1248b5e31ffa7ce8cb7006e97bf8abbf3d891d0a8ba4e/debugpy-1.8.15-py2.py3-none-any.whl", hash = "sha256:bce2e6c5ff4f2e00b98d45e7e01a49c7b489ff6df5f12d881c67d2f1ac635f3d", size = 5282697, upload-time = "2025-07-15T16:44:07.996Z" }, + { url = "https://files.pythonhosted.org/packages/63/d6/ad70ba8b49b23fa286fb21081cf732232cc19374af362051da9c7537ae52/debugpy-1.8.16-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:67371b28b79a6a12bcc027d94a06158f2fde223e35b5c4e0783b6f9d3b39274a", size = 2184063, upload-time = "2025-08-06T18:00:11.885Z" }, + { url = "https://files.pythonhosted.org/packages/aa/49/7b03e88dea9759a4c7910143f87f92beb494daaae25560184ff4ae883f9e/debugpy-1.8.16-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2abae6dd02523bec2dee16bd6b0781cccb53fd4995e5c71cc659b5f45581898", size = 3134837, upload-time = "2025-08-06T18:00:13.782Z" }, + { url = "https://files.pythonhosted.org/packages/5d/52/b348930316921de7565fbe37a487d15409041713004f3d74d03eb077dbd4/debugpy-1.8.16-cp311-cp311-win32.whl", hash = "sha256:f8340a3ac2ed4f5da59e064aa92e39edd52729a88fbde7bbaa54e08249a04493", size = 5159142, upload-time = "2025-08-06T18:00:15.391Z" }, + { url = "https://files.pythonhosted.org/packages/d8/ef/9aa9549ce1e10cea696d980292e71672a91ee4a6a691ce5f8629e8f48c49/debugpy-1.8.16-cp311-cp311-win_amd64.whl", hash = "sha256:70f5fcd6d4d0c150a878d2aa37391c52de788c3dc680b97bdb5e529cb80df87a", size = 5183117, upload-time = "2025-08-06T18:00:17.251Z" }, + { url = "https://files.pythonhosted.org/packages/61/fb/0387c0e108d842c902801bc65ccc53e5b91d8c169702a9bbf4f7efcedf0c/debugpy-1.8.16-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:b202e2843e32e80b3b584bcebfe0e65e0392920dc70df11b2bfe1afcb7a085e4", size = 2511822, upload-time = "2025-08-06T18:00:18.526Z" }, + { url = "https://files.pythonhosted.org/packages/37/44/19e02745cae22bf96440141f94e15a69a1afaa3a64ddfc38004668fcdebf/debugpy-1.8.16-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64473c4a306ba11a99fe0bb14622ba4fbd943eb004847d9b69b107bde45aa9ea", size = 4230135, upload-time = "2025-08-06T18:00:19.997Z" }, + { url = "https://files.pythonhosted.org/packages/f3/0b/19b1ba5ee4412f303475a2c7ad5858efb99c90eae5ec627aa6275c439957/debugpy-1.8.16-cp312-cp312-win32.whl", hash = "sha256:833a61ed446426e38b0dd8be3e9d45ae285d424f5bf6cd5b2b559c8f12305508", size = 5281271, upload-time = "2025-08-06T18:00:21.281Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e0/bc62e2dc141de53bd03e2c7cb9d7011de2e65e8bdcdaa26703e4d28656ba/debugpy-1.8.16-cp312-cp312-win_amd64.whl", hash = "sha256:75f204684581e9ef3dc2f67687c3c8c183fde2d6675ab131d94084baf8084121", size = 5323149, upload-time = "2025-08-06T18:00:23.033Z" }, + { url = "https://files.pythonhosted.org/packages/62/66/607ab45cc79e60624df386e233ab64a6d8d39ea02e7f80e19c1d451345bb/debugpy-1.8.16-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:85df3adb1de5258dca910ae0bb185e48c98801ec15018a263a92bb06be1c8787", size = 2496157, upload-time = "2025-08-06T18:00:24.361Z" }, + { url = "https://files.pythonhosted.org/packages/4d/a0/c95baae08a75bceabb79868d663a0736655e427ab9c81fb848da29edaeac/debugpy-1.8.16-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bee89e948bc236a5c43c4214ac62d28b29388453f5fd328d739035e205365f0b", size = 4222491, upload-time = "2025-08-06T18:00:25.806Z" }, + { url = "https://files.pythonhosted.org/packages/5b/2f/1c8db6ddd8a257c3cd2c46413b267f1d5fa3df910401c899513ce30392d6/debugpy-1.8.16-cp313-cp313-win32.whl", hash = "sha256:cf358066650439847ec5ff3dae1da98b5461ea5da0173d93d5e10f477c94609a", size = 5281126, upload-time = "2025-08-06T18:00:27.207Z" }, + { url = "https://files.pythonhosted.org/packages/d3/ba/c3e154ab307366d6c5a9c1b68de04914e2ce7fa2f50d578311d8cc5074b2/debugpy-1.8.16-cp313-cp313-win_amd64.whl", hash = "sha256:b5aea1083f6f50023e8509399d7dc6535a351cc9f2e8827d1e093175e4d9fa4c", size = 5323094, upload-time = "2025-08-06T18:00:29.03Z" }, + { url = "https://files.pythonhosted.org/packages/52/57/ecc9ae29fa5b2d90107cd1d9bf8ed19aacb74b2264d986ae9d44fe9bdf87/debugpy-1.8.16-py2.py3-none-any.whl", hash = "sha256:19c9521962475b87da6f673514f7fd610328757ec993bf7ec0d8c96f9a325f9e", size = 5287700, upload-time = "2025-08-06T18:00:42.333Z" }, ] [[package]] @@ -841,18 +829,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/d5/c5db1ea3394c6e1732fb3286b3bd878b59507a8f77d32a2cebda7d7b7cd4/donfig-0.8.1.post1-py3-none-any.whl", hash = "sha256:2a3175ce74a06109ff9307d90a230f81215cbac9a751f4d1c6194644b8204f9d", size = 21592, upload-time = "2024-05-23T14:13:55.283Z" }, ] -[[package]] -name = "dparse" -version = "0.6.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "packaging" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/29/ee/96c65e17222b973f0d3d0aa9bad6a59104ca1b0eb5b659c25c2900fccd85/dparse-0.6.4.tar.gz", hash = "sha256:90b29c39e3edc36c6284c82c4132648eaf28a01863eb3c231c2512196132201a", size = 27912, upload-time = "2024-11-08T16:52:06.444Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/56/26/035d1c308882514a1e6ddca27f9d3e570d67a0e293e7b4d910a70c8fe32b/dparse-0.6.4-py3-none-any.whl", hash = "sha256:fbab4d50d54d0e739fbb4dedfc3d92771003a5b9aa8545ca7a7045e3b174af57", size = 11925, upload-time = "2024-11-08T16:52:03.844Z" }, -] - [[package]] name = "executing" version = "2.2.0" @@ -873,11 +849,35 @@ wheels = [ [[package]] name = "filelock" -version = "3.16.1" +version = "3.18.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9d/db/3ef5bb276dae18d6ec2124224403d1d67bccdbefc17af4cc8f553e341ab1/filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435", size = 18037, upload-time = "2024-09-17T19:02:01.779Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload-time = "2025-03-14T07:11:40.47Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163, upload-time = "2024-09-17T19:02:00.268Z" }, + { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" }, +] + +[[package]] +name = "flexcache" +version = "0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/b0/8a21e330561c65653d010ef112bf38f60890051d244ede197ddaa08e50c1/flexcache-0.3.tar.gz", hash = "sha256:18743bd5a0621bfe2cf8d519e4c3bfdf57a269c15d1ced3fb4b64e0ff4600656", size = 15816, upload-time = "2024-03-09T03:21:07.555Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/cd/c883e1a7c447479d6e13985565080e3fea88ab5a107c21684c813dba1875/flexcache-0.3-py3-none-any.whl", hash = "sha256:d43c9fea82336af6e0115e308d9d33a185390b8346a017564611f1466dcd2e32", size = 13263, upload-time = "2024-03-09T03:21:05.635Z" }, +] + +[[package]] +name = "flexparser" +version = "0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/82/99/b4de7e39e8eaf8207ba1a8fa2241dd98b2ba72ae6e16960d8351736d8702/flexparser-0.4.tar.gz", hash = "sha256:266d98905595be2ccc5da964fe0a2c3526fbbffdc45b65b3146d75db992ef6b2", size = 31799, upload-time = "2024-11-07T02:00:56.249Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/5e/3be305568fe5f34448807976dc82fc151d76c3e0e03958f34770286278c1/flexparser-0.4-py3-none-any.whl", hash = "sha256:3738b456192dcb3e15620f324c447721023c0293f6af9955b481e91d00179846", size = 27625, upload-time = "2024-11-07T02:00:54.523Z" }, ] [[package]] @@ -959,11 +959,11 @@ wheels = [ [[package]] name = "fsspec" -version = "2024.12.0" +version = "2025.7.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ee/11/de70dee31455c546fbc88301971ec03c328f3d1138cfba14263f651e9551/fsspec-2024.12.0.tar.gz", hash = "sha256:670700c977ed2fb51e0d9f9253177ed20cbde4a3e5c0283cc5385b5870c8533f", size = 291600, upload-time = "2024-12-19T19:57:30.333Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/02/0835e6ab9cfc03916fe3f78c0956cfcdb6ff2669ffa6651065d5ebf7fc98/fsspec-2025.7.0.tar.gz", hash = "sha256:786120687ffa54b8283d942929540d8bc5ccfa820deb555a2b5d0ed2b737bf58", size = 304432, upload-time = "2025-07-15T16:05:21.19Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/de/86/5486b0188d08aa643e127774a99bac51ffa6cf343e3deb0583956dca5b22/fsspec-2024.12.0-py3-none-any.whl", hash = "sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2", size = 183862, upload-time = "2024-12-19T19:57:28.258Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e0/014d5d9d7a4564cf1c40b5039bc882db69fd881111e03ab3657ac0b218e2/fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21", size = 199597, upload-time = "2025-07-15T16:05:19.529Z" }, ] [[package]] @@ -984,7 +984,7 @@ wheels = [ [[package]] name = "gcsfs" -version = "2024.12.0" +version = "2025.7.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -995,9 +995,9 @@ dependencies = [ { name = "google-cloud-storage" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/20/a2/310a6f1cfdb39b2385af38620f299a8acb427b5c1fc36aa1037a81eea0d8/gcsfs-2024.12.0.tar.gz", hash = "sha256:e672413922108300ebc1fe78b8f99f3c7c1b94e7e088f5a6dc88de6d5a93d156", size = 80258, upload-time = "2024-12-19T20:17:59.549Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5b/d7/5eafe9f09f1bb09433a473cef7984cd52c398592c8fd09974e0ad87cfea4/gcsfs-2025.7.0.tar.gz", hash = "sha256:ad3ff66cf189ae8fc375ac8a2af409003dbca02357621cb94a66e457e02ba420", size = 82659, upload-time = "2025-07-15T16:49:21.647Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6b/0e/fb76438001cf6910be9f47b3ee1e3e1490caf0d44e2ff34625a97e3fbf63/gcsfs-2024.12.0-py2.py3-none-any.whl", hash = "sha256:ec88e48f77e466723705458af85dda238e43aa69fac071efd98829d06e9f095a", size = 35488, upload-time = "2024-12-19T20:17:56.983Z" }, + { url = "https://files.pythonhosted.org/packages/21/f5/54bccbee01efbc25581db6aafefb6f6c277d880930f7a083b10052382463/gcsfs-2025.7.0-py2.py3-none-any.whl", hash = "sha256:653503331d58cb02bb34e725d4595d166e93f7f2f3ff88e4c66ef535ae66eae5", size = 36815, upload-time = "2025-07-15T16:49:20.333Z" }, ] [[package]] @@ -1126,37 +1126,37 @@ wheels = [ [[package]] name = "greenlet" -version = "3.2.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c9/92/bb85bd6e80148a4d2e0c59f7c0c2891029f8fd510183afc7d8d2feeed9b6/greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365", size = 185752, upload-time = "2025-06-05T16:16:09.955Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fc/2e/d4fcb2978f826358b673f779f78fa8a32ee37df11920dc2bb5589cbeecef/greenlet-3.2.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:784ae58bba89fa1fa5733d170d42486580cab9decda3484779f4759345b29822", size = 270219, upload-time = "2025-06-05T16:10:10.414Z" }, - { url = "https://files.pythonhosted.org/packages/16/24/929f853e0202130e4fe163bc1d05a671ce8dcd604f790e14896adac43a52/greenlet-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0921ac4ea42a5315d3446120ad48f90c3a6b9bb93dd9b3cf4e4d84a66e42de83", size = 630383, upload-time = "2025-06-05T16:38:51.785Z" }, - { url = "https://files.pythonhosted.org/packages/d1/b2/0320715eb61ae70c25ceca2f1d5ae620477d246692d9cc284c13242ec31c/greenlet-3.2.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d2971d93bb99e05f8c2c0c2f4aa9484a18d98c4c3bd3c62b65b7e6ae33dfcfaf", size = 642422, upload-time = "2025-06-05T16:41:35.259Z" }, - { url = "https://files.pythonhosted.org/packages/bd/49/445fd1a210f4747fedf77615d941444349c6a3a4a1135bba9701337cd966/greenlet-3.2.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c667c0bf9d406b77a15c924ef3285e1e05250948001220368e039b6aa5b5034b", size = 638375, upload-time = "2025-06-05T16:48:18.235Z" }, - { url = "https://files.pythonhosted.org/packages/7e/c8/ca19760cf6eae75fa8dc32b487e963d863b3ee04a7637da77b616703bc37/greenlet-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:592c12fb1165be74592f5de0d70f82bc5ba552ac44800d632214b76089945147", size = 637627, upload-time = "2025-06-05T16:13:02.858Z" }, - { url = "https://files.pythonhosted.org/packages/65/89/77acf9e3da38e9bcfca881e43b02ed467c1dedc387021fc4d9bd9928afb8/greenlet-3.2.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29e184536ba333003540790ba29829ac14bb645514fbd7e32af331e8202a62a5", size = 585502, upload-time = "2025-06-05T16:12:49.642Z" }, - { url = "https://files.pythonhosted.org/packages/97/c6/ae244d7c95b23b7130136e07a9cc5aadd60d59b5951180dc7dc7e8edaba7/greenlet-3.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:93c0bb79844a367782ec4f429d07589417052e621aa39a5ac1fb99c5aa308edc", size = 1114498, upload-time = "2025-06-05T16:36:46.598Z" }, - { url = "https://files.pythonhosted.org/packages/89/5f/b16dec0cbfd3070658e0d744487919740c6d45eb90946f6787689a7efbce/greenlet-3.2.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:751261fc5ad7b6705f5f76726567375bb2104a059454e0226e1eef6c756748ba", size = 1139977, upload-time = "2025-06-05T16:12:38.262Z" }, - { url = "https://files.pythonhosted.org/packages/66/77/d48fb441b5a71125bcac042fc5b1494c806ccb9a1432ecaa421e72157f77/greenlet-3.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:83a8761c75312361aa2b5b903b79da97f13f556164a7dd2d5448655425bd4c34", size = 297017, upload-time = "2025-06-05T16:25:05.225Z" }, - { url = "https://files.pythonhosted.org/packages/f3/94/ad0d435f7c48debe960c53b8f60fb41c2026b1d0fa4a99a1cb17c3461e09/greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d", size = 271992, upload-time = "2025-06-05T16:11:23.467Z" }, - { url = "https://files.pythonhosted.org/packages/93/5d/7c27cf4d003d6e77749d299c7c8f5fd50b4f251647b5c2e97e1f20da0ab5/greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b", size = 638820, upload-time = "2025-06-05T16:38:52.882Z" }, - { url = "https://files.pythonhosted.org/packages/c6/7e/807e1e9be07a125bb4c169144937910bf59b9d2f6d931578e57f0bce0ae2/greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d", size = 653046, upload-time = "2025-06-05T16:41:36.343Z" }, - { url = "https://files.pythonhosted.org/packages/9d/ab/158c1a4ea1068bdbc78dba5a3de57e4c7aeb4e7fa034320ea94c688bfb61/greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264", size = 647701, upload-time = "2025-06-05T16:48:19.604Z" }, - { url = "https://files.pythonhosted.org/packages/cc/0d/93729068259b550d6a0288da4ff72b86ed05626eaf1eb7c0d3466a2571de/greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688", size = 649747, upload-time = "2025-06-05T16:13:04.628Z" }, - { url = "https://files.pythonhosted.org/packages/f6/f6/c82ac1851c60851302d8581680573245c8fc300253fc1ff741ae74a6c24d/greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb", size = 605461, upload-time = "2025-06-05T16:12:50.792Z" }, - { url = "https://files.pythonhosted.org/packages/98/82/d022cf25ca39cf1200650fc58c52af32c90f80479c25d1cbf57980ec3065/greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c", size = 1121190, upload-time = "2025-06-05T16:36:48.59Z" }, - { url = "https://files.pythonhosted.org/packages/f5/e1/25297f70717abe8104c20ecf7af0a5b82d2f5a980eb1ac79f65654799f9f/greenlet-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:93d48533fade144203816783373f27a97e4193177ebaaf0fc396db19e5d61163", size = 1149055, upload-time = "2025-06-05T16:12:40.457Z" }, - { url = "https://files.pythonhosted.org/packages/1f/8f/8f9e56c5e82eb2c26e8cde787962e66494312dc8cb261c460e1f3a9c88bc/greenlet-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:7454d37c740bb27bdeddfc3f358f26956a07d5220818ceb467a483197d84f849", size = 297817, upload-time = "2025-06-05T16:29:49.244Z" }, - { url = "https://files.pythonhosted.org/packages/b1/cf/f5c0b23309070ae93de75c90d29300751a5aacefc0a3ed1b1d8edb28f08b/greenlet-3.2.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:500b8689aa9dd1ab26872a34084503aeddefcb438e2e7317b89b11eaea1901ad", size = 270732, upload-time = "2025-06-05T16:10:08.26Z" }, - { url = "https://files.pythonhosted.org/packages/48/ae/91a957ba60482d3fecf9be49bc3948f341d706b52ddb9d83a70d42abd498/greenlet-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a07d3472c2a93117af3b0136f246b2833fdc0b542d4a9799ae5f41c28323faef", size = 639033, upload-time = "2025-06-05T16:38:53.983Z" }, - { url = "https://files.pythonhosted.org/packages/6f/df/20ffa66dd5a7a7beffa6451bdb7400d66251374ab40b99981478c69a67a8/greenlet-3.2.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8704b3768d2f51150626962f4b9a9e4a17d2e37c8a8d9867bbd9fa4eb938d3b3", size = 652999, upload-time = "2025-06-05T16:41:37.89Z" }, - { url = "https://files.pythonhosted.org/packages/51/b4/ebb2c8cb41e521f1d72bf0465f2f9a2fd803f674a88db228887e6847077e/greenlet-3.2.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5035d77a27b7c62db6cf41cf786cfe2242644a7a337a0e155c80960598baab95", size = 647368, upload-time = "2025-06-05T16:48:21.467Z" }, - { url = "https://files.pythonhosted.org/packages/8e/6a/1e1b5aa10dced4ae876a322155705257748108b7fd2e4fae3f2a091fe81a/greenlet-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2d8aa5423cd4a396792f6d4580f88bdc6efcb9205891c9d40d20f6e670992efb", size = 650037, upload-time = "2025-06-05T16:13:06.402Z" }, - { url = "https://files.pythonhosted.org/packages/26/f2/ad51331a157c7015c675702e2d5230c243695c788f8f75feba1af32b3617/greenlet-3.2.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2c724620a101f8170065d7dded3f962a2aea7a7dae133a009cada42847e04a7b", size = 608402, upload-time = "2025-06-05T16:12:51.91Z" }, - { url = "https://files.pythonhosted.org/packages/26/bc/862bd2083e6b3aff23300900a956f4ea9a4059de337f5c8734346b9b34fc/greenlet-3.2.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:873abe55f134c48e1f2a6f53f7d1419192a3d1a4e873bace00499a4e45ea6af0", size = 1119577, upload-time = "2025-06-05T16:36:49.787Z" }, - { url = "https://files.pythonhosted.org/packages/86/94/1fc0cc068cfde885170e01de40a619b00eaa8f2916bf3541744730ffb4c3/greenlet-3.2.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:024571bbce5f2c1cfff08bf3fbaa43bbc7444f580ae13b0099e95d0e6e67ed36", size = 1147121, upload-time = "2025-06-05T16:12:42.527Z" }, - { url = "https://files.pythonhosted.org/packages/27/1a/199f9587e8cb08a0658f9c30f3799244307614148ffe8b1e3aa22f324dea/greenlet-3.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:5195fb1e75e592dd04ce79881c8a22becdfa3e6f500e7feb059b1e6fdd54d3e3", size = 297603, upload-time = "2025-06-05T16:20:12.651Z" }, +version = "3.2.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/b8/704d753a5a45507a7aab61f18db9509302ed3d0a27ac7e0359ec2905b1a6/greenlet-3.2.4.tar.gz", hash = "sha256:0dca0d95ff849f9a364385f36ab49f50065d76964944638be9691e1832e9f86d", size = 188260, upload-time = "2025-08-07T13:24:33.51Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" }, + { url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" }, + { url = "https://files.pythonhosted.org/packages/ae/8f/95d48d7e3d433e6dae5b1682e4292242a53f22df82e6d3dda81b1701a960/greenlet-3.2.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:94abf90142c2a18151632371140b3dba4dee031633fe614cb592dbb6c9e17bc3", size = 644646, upload-time = "2025-08-07T13:45:26.523Z" }, + { url = "https://files.pythonhosted.org/packages/d5/5e/405965351aef8c76b8ef7ad370e5da58d57ef6068df197548b015464001a/greenlet-3.2.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:4d1378601b85e2e5171b99be8d2dc85f594c79967599328f95c1dc1a40f1c633", size = 640519, upload-time = "2025-08-07T13:53:13.928Z" }, + { url = "https://files.pythonhosted.org/packages/25/5d/382753b52006ce0218297ec1b628e048c4e64b155379331f25a7316eb749/greenlet-3.2.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0db5594dce18db94f7d1650d7489909b57afde4c580806b8d9203b6e79cdc079", size = 639707, upload-time = "2025-08-07T13:18:27.146Z" }, + { url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" }, + { url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" }, + { url = "https://files.pythonhosted.org/packages/3f/cc/b07000438a29ac5cfb2194bfc128151d52f333cee74dd7dfe3fb733fc16c/greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa", size = 1142073, upload-time = "2025-08-07T13:18:21.737Z" }, + { url = "https://files.pythonhosted.org/packages/d8/0f/30aef242fcab550b0b3520b8e3561156857c94288f0332a79928c31a52cf/greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9", size = 299100, upload-time = "2025-08-07T13:44:12.287Z" }, + { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, + { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, + { url = "https://files.pythonhosted.org/packages/3b/16/035dcfcc48715ccd345f3a93183267167cdd162ad123cd93067d86f27ce4/greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968", size = 655185, upload-time = "2025-08-07T13:45:27.624Z" }, + { url = "https://files.pythonhosted.org/packages/31/da/0386695eef69ffae1ad726881571dfe28b41970173947e7c558d9998de0f/greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9", size = 649926, upload-time = "2025-08-07T13:53:15.251Z" }, + { url = "https://files.pythonhosted.org/packages/68/88/69bf19fd4dc19981928ceacbc5fd4bb6bc2215d53199e367832e98d1d8fe/greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6", size = 651839, upload-time = "2025-08-07T13:18:30.281Z" }, + { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, + { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" }, + { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" }, + { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, + { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, + { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" }, + { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" }, + { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" }, + { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, + { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, + { url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" }, + { url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" }, ] [[package]] @@ -1168,34 +1168,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] -[[package]] -name = "httpcore" -version = "1.0.9" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "h11" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, -] - -[[package]] -name = "httpx" -version = "0.28.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "anyio" }, - { name = "certifi" }, - { name = "httpcore" }, - { name = "idna" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, -] - [[package]] name = "identify" version = "2.6.12" @@ -1344,15 +1316,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" }, ] -[[package]] -name = "joblib" -version = "1.5.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/dc/fe/0f5a938c54105553436dbff7a61dc4fed4b1b2c98852f8833beaf4d5968f/joblib-1.5.1.tar.gz", hash = "sha256:f4f86e351f39fe3d0d32a9f2c3d8af1ee4cec285aafcb27003dda5205576b444", size = 330475, upload-time = "2025-05-23T12:04:37.097Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7d/4f/1195bbac8e0c2acc5f740661631d8d750dc38d4a32b23ee5df3cde6f4e0d/joblib-1.5.1-py3-none-any.whl", hash = "sha256:4719a31f054c7d766948dcd83e9613686b27114f190f717cec7eaa2084f8a74a", size = 307746, upload-time = "2025-05-23T12:04:35.124Z" }, -] - [[package]] name = "jsonschema" version = "4.25.0" @@ -1533,15 +1496,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739, upload-time = "2024-10-18T15:21:42.784Z" }, ] -[[package]] -name = "marshmallow" -version = "4.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1e/ff/26df5a9f5ac57ccf693a5854916ab47243039d2aa9e0fe5f5a0331e7b74b/marshmallow-4.0.0.tar.gz", hash = "sha256:3b6e80aac299a7935cfb97ed01d1854fb90b5079430969af92118ea1b12a8d55", size = 220507, upload-time = "2025-04-17T02:25:54.925Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d6/26/6cc45d156f44dbe1d5696d9e54042e4dcaf7b946c0b86df6a97d29706f32/marshmallow-4.0.0-py3-none-any.whl", hash = "sha256:e7b0528337e9990fd64950f8a6b3a1baabed09ad17a0dfb844d701151f92d203", size = 48420, upload-time = "2025-04-17T02:25:53.375Z" }, -] - [[package]] name = "matplotlib-inline" version = "0.1.7" @@ -1722,17 +1676,21 @@ wheels = [ [[package]] name = "multidimio" -version = "0.9.3" +version = "1.0.0a1" source = { editable = "." } dependencies = [ { name = "click" }, { name = "click-params" }, { name = "dask" }, { name = "fsspec" }, + { name = "pint" }, { name = "psutil" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, { name = "rich" }, { name = "segy" }, { name = "tqdm" }, + { name = "xarray" }, { name = "zarr" }, ] @@ -1760,7 +1718,6 @@ dev = [ { name = "pytest" }, { name = "pytest-dependency" }, { name = "ruff" }, - { name = "safety" }, { name = "typeguard" }, { name = "xdoctest", extra = ["colors"] }, ] @@ -1777,36 +1734,39 @@ docs = [ [package.metadata] requires-dist = [ - { name = "adlfs", marker = "extra == 'cloud'", specifier = ">=2024.7.0" }, - { name = "bokeh", marker = "extra == 'distributed'", specifier = ">=3.4.2,<4.0.0" }, - { name = "click", specifier = ">=8.1.7,<9.0.0" }, + { name = "adlfs", marker = "extra == 'cloud'", specifier = ">=2024.12.0" }, + { name = "bokeh", marker = "extra == 'distributed'", specifier = ">=3.7.3,<4.0.0" }, + { name = "click", specifier = ">=8.2.1,<9.0.0" }, { name = "click-params", specifier = ">=0.5.0,<0.6.0" }, - { name = "dask", specifier = ">=2024.12.0" }, - { name = "distributed", marker = "extra == 'distributed'", specifier = ">=2024.12.0" }, - { name = "fsspec", specifier = ">=2024.10.0" }, - { name = "gcsfs", marker = "extra == 'cloud'", specifier = ">=2024.10.0" }, - { name = "psutil", specifier = ">=6.1.0,<7.0.0" }, - { name = "rich", specifier = ">=13.9.4,<14.0.0" }, - { name = "s3fs", marker = "extra == 'cloud'", specifier = "==2024.12.0" }, - { name = "segy", specifier = ">=0.4.0,<0.5.0" }, - { name = "tqdm", specifier = ">=4.67.0,<5.0.0" }, - { name = "zarr", specifier = ">=3.1.0,<4.0.0" }, + { name = "dask", specifier = ">=2025.7.0" }, + { name = "distributed", marker = "extra == 'distributed'", specifier = ">=2025.7.0" }, + { name = "fsspec", specifier = ">=2025.7.0" }, + { name = "gcsfs", marker = "extra == 'cloud'", specifier = ">=2025.7.0" }, + { name = "pint", specifier = ">=0.24.4,<0.25" }, + { name = "psutil", specifier = ">=7.0.0,<8.0.0" }, + { name = "pydantic", specifier = ">=2.11.7,<3.0.0" }, + { name = "pydantic-settings", specifier = ">=2.10.1,<3.0.0" }, + { name = "rich", specifier = ">=14.1.0,<15.0.0" }, + { name = "s3fs", marker = "extra == 'cloud'", specifier = ">=2025.7.0" }, + { name = "segy", specifier = ">=0.4.2,<0.5.0" }, + { name = "tqdm", specifier = ">=4.67.1,<5.0.0" }, + { name = "xarray", specifier = ">=2025.7.1" }, + { name = "zarr", specifier = ">=3.1.1,<4.0.0" }, { name = "zfpy", marker = "extra == 'lossy'", specifier = ">=1.0.1,<2.0.0" }, ] provides-extras = ["cloud", "distributed", "lossy"] [package.metadata.requires-dev] dev = [ - { name = "coverage", extras = ["toml"], specifier = ">=7.6.7,<8" }, - { name = "mypy", specifier = ">=1.13.0,<2" }, - { name = "pre-commit", specifier = ">=4.0.1,<5" }, + { name = "coverage", extras = ["toml"], specifier = ">=7.9.1,<8" }, + { name = "mypy", specifier = ">=1.16.1,<2" }, + { name = "pre-commit", specifier = ">=4.2.0,<5" }, { name = "pre-commit-hooks", specifier = ">=5.0.0,<6" }, - { name = "pygments", specifier = ">=2.18.0,<3" }, - { name = "pytest", specifier = ">=8.3.3,<9" }, + { name = "pygments", specifier = ">=2.19.2,<3" }, + { name = "pytest", specifier = ">=8.4.1,<9" }, { name = "pytest-dependency", specifier = ">=0.6.0,<0.7" }, - { name = "ruff", specifier = ">=0.11.8" }, - { name = "safety", specifier = ">=3.2.3,<4" }, - { name = "typeguard", specifier = ">=4.4.1,<5" }, + { name = "ruff", specifier = ">=0.12.1" }, + { name = "typeguard", specifier = ">=4.4.4,<5" }, { name = "xdoctest", extras = ["colors"], specifier = ">=1.2.0,<2" }, ] docs = [ @@ -1947,21 +1907,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" }, ] -[[package]] -name = "nltk" -version = "3.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "joblib" }, - { name = "regex" }, - { name = "tqdm" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/3c/87/db8be88ad32c2d042420b6fd9ffd4a149f9a0d7f0e86b3f543be2eeeedd2/nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868", size = 2904691, upload-time = "2024-08-18T19:48:37.769Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/66/7d9e26593edda06e8cb531874633f7c2372279c3b0f46235539fe546df8b/nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1", size = 1505442, upload-time = "2024-08-18T19:48:21.909Z" }, -] - [[package]] name = "nodeenv" version = "1.9.1" @@ -2242,6 +2187,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/34/e7/ae39f538fd6844e982063c3a5e4598b8ced43b9633baa3a85ef33af8c05c/pillow-11.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c84d689db21a1c397d001aa08241044aa2069e7587b398c8cc63020390b1c1b8", size = 6984598, upload-time = "2025-07-01T09:16:27.732Z" }, ] +[[package]] +name = "pint" +version = "0.24.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "flexcache" }, + { name = "flexparser" }, + { name = "platformdirs" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/20/bb/52b15ddf7b7706ed591134a895dbf6e41c8348171fb635e655e0a4bbb0ea/pint-0.24.4.tar.gz", hash = "sha256:35275439b574837a6cd3020a5a4a73645eb125ce4152a73a2f126bf164b91b80", size = 342225, upload-time = "2024-11-07T16:29:46.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/16/bd2f5904557265882108dc2e04f18abc05ab0c2b7082ae9430091daf1d5c/Pint-0.24.4-py3-none-any.whl", hash = "sha256:aa54926c8772159fcf65f82cc0d34de6768c151b32ad1deb0331291c38fe7659", size = 302029, upload-time = "2024-11-07T16:29:43.976Z" }, +] + [[package]] name = "platformdirs" version = "4.3.8" @@ -2401,17 +2361,17 @@ wheels = [ [[package]] name = "psutil" -version = "6.1.1" +version = "7.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1f/5a/07871137bb752428aa4b659f910b399ba6f291156bdea939be3e96cae7cb/psutil-6.1.1.tar.gz", hash = "sha256:cf8496728c18f2d0b45198f06895be52f36611711746b7f30c464b422b50e2f5", size = 508502, upload-time = "2024-12-19T18:21:20.568Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2a/80/336820c1ad9286a4ded7e845b2eccfcb27851ab8ac6abece774a6ff4d3de/psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456", size = 497003, upload-time = "2025-02-13T21:54:07.946Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/61/99/ca79d302be46f7bdd8321089762dd4476ee725fce16fc2b2e1dbba8cac17/psutil-6.1.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:fc0ed7fe2231a444fc219b9c42d0376e0a9a1a72f16c5cfa0f68d19f1a0663e8", size = 247511, upload-time = "2024-12-19T18:21:45.163Z" }, - { url = "https://files.pythonhosted.org/packages/0b/6b/73dbde0dd38f3782905d4587049b9be64d76671042fdcaf60e2430c6796d/psutil-6.1.1-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:0bdd4eab935276290ad3cb718e9809412895ca6b5b334f5a9111ee6d9aff9377", size = 248985, upload-time = "2024-12-19T18:21:49.254Z" }, - { url = "https://files.pythonhosted.org/packages/17/38/c319d31a1d3f88c5b79c68b3116c129e5133f1822157dd6da34043e32ed6/psutil-6.1.1-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b6e06c20c05fe95a3d7302d74e7097756d4ba1247975ad6905441ae1b5b66003", size = 284488, upload-time = "2024-12-19T18:21:51.638Z" }, - { url = "https://files.pythonhosted.org/packages/9c/39/0f88a830a1c8a3aba27fededc642da37613c57cbff143412e3536f89784f/psutil-6.1.1-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97f7cb9921fbec4904f522d972f0c0e1f4fabbdd4e0287813b21215074a0f160", size = 287477, upload-time = "2024-12-19T18:21:55.306Z" }, - { url = "https://files.pythonhosted.org/packages/47/da/99f4345d4ddf2845cb5b5bd0d93d554e84542d116934fde07a0c50bd4e9f/psutil-6.1.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33431e84fee02bc84ea36d9e2c4a6d395d479c9dd9bba2376c1f6ee8f3a4e0b3", size = 289017, upload-time = "2024-12-19T18:21:57.875Z" }, - { url = "https://files.pythonhosted.org/packages/38/53/bd755c2896f4461fd4f36fa6a6dcb66a88a9e4b9fd4e5b66a77cf9d4a584/psutil-6.1.1-cp37-abi3-win32.whl", hash = "sha256:eaa912e0b11848c4d9279a93d7e2783df352b082f40111e078388701fd479e53", size = 250602, upload-time = "2024-12-19T18:22:08.808Z" }, - { url = "https://files.pythonhosted.org/packages/7b/d7/7831438e6c3ebbfa6e01a927127a6cb42ad3ab844247f3c5b96bea25d73d/psutil-6.1.1-cp37-abi3-win_amd64.whl", hash = "sha256:f35cfccb065fff93529d2afb4a2e89e363fe63ca1e4a5da22b603a85833c2649", size = 254444, upload-time = "2024-12-19T18:22:11.335Z" }, + { url = "https://files.pythonhosted.org/packages/ed/e6/2d26234410f8b8abdbf891c9da62bee396583f713fb9f3325a4760875d22/psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25", size = 238051, upload-time = "2025-02-13T21:54:12.36Z" }, + { url = "https://files.pythonhosted.org/packages/04/8b/30f930733afe425e3cbfc0e1468a30a18942350c1a8816acfade80c005c4/psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da", size = 239535, upload-time = "2025-02-13T21:54:16.07Z" }, + { url = "https://files.pythonhosted.org/packages/2a/ed/d362e84620dd22876b55389248e522338ed1bf134a5edd3b8231d7207f6d/psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91", size = 275004, upload-time = "2025-02-13T21:54:18.662Z" }, + { url = "https://files.pythonhosted.org/packages/bf/b9/b0eb3f3cbcb734d930fdf839431606844a825b23eaf9a6ab371edac8162c/psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34", size = 277986, upload-time = "2025-02-13T21:54:21.811Z" }, + { url = "https://files.pythonhosted.org/packages/eb/a2/709e0fe2f093556c17fbafda93ac032257242cabcc7ff3369e2cb76a97aa/psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993", size = 279544, upload-time = "2025-02-13T21:54:24.68Z" }, + { url = "https://files.pythonhosted.org/packages/50/e6/eecf58810b9d12e6427369784efe814a1eec0f492084ce8eb8f4d89d6d61/psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99", size = 241053, upload-time = "2025-02-13T21:54:34.31Z" }, + { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885, upload-time = "2025-02-13T21:54:37.486Z" }, ] [[package]] @@ -2464,63 +2424,82 @@ wheels = [ [[package]] name = "pydantic" -version = "2.9.2" +version = "2.11.7" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-types" }, { name = "pydantic-core" }, { name = "typing-extensions" }, + { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a9/b7/d9e3f12af310e1120c21603644a1cd86f59060e040ec5c3a80b8f05fae30/pydantic-2.9.2.tar.gz", hash = "sha256:d155cef71265d1e9807ed1c32b4c8deec042a44a50a4188b25ac67ecd81a9c0f", size = 769917, upload-time = "2024-09-17T15:59:54.273Z" } +sdist = { url = "https://files.pythonhosted.org/packages/00/dd/4325abf92c39ba8623b5af936ddb36ffcfe0beae70405d456ab1fb2f5b8c/pydantic-2.11.7.tar.gz", hash = "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db", size = 788350, upload-time = "2025-06-14T08:33:17.137Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/df/e4/ba44652d562cbf0bf320e0f3810206149c8a4e99cdbf66da82e97ab53a15/pydantic-2.9.2-py3-none-any.whl", hash = "sha256:f048cec7b26778210e28a0459867920654d48e5e62db0958433636cde4254f12", size = 434928, upload-time = "2024-09-17T15:59:51.827Z" }, + { url = "https://files.pythonhosted.org/packages/6a/c0/ec2b1c8712ca690e5d61979dee872603e92b8a32f94cc1b72d53beab008a/pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b", size = 444782, upload-time = "2025-06-14T08:33:14.905Z" }, ] [[package]] name = "pydantic-core" -version = "2.23.4" +version = "2.33.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e2/aa/6b6a9b9f8537b872f552ddd46dd3da230367754b6f707b8e1e963f515ea3/pydantic_core-2.23.4.tar.gz", hash = "sha256:2584f7cf844ac4d970fba483a717dbe10c1c1c96a969bf65d61ffe94df1b2863", size = 402156, upload-time = "2024-09-16T16:06:44.786Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5d/30/890a583cd3f2be27ecf32b479d5d615710bb926d92da03e3f7838ff3e58b/pydantic_core-2.23.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:77733e3892bb0a7fa797826361ce8a9184d25c8dffaec60b7ffe928153680ba8", size = 1865160, upload-time = "2024-09-16T16:04:18.628Z" }, - { url = "https://files.pythonhosted.org/packages/1d/9a/b634442e1253bc6889c87afe8bb59447f106ee042140bd57680b3b113ec7/pydantic_core-2.23.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b84d168f6c48fabd1f2027a3d1bdfe62f92cade1fb273a5d68e621da0e44e6d", size = 1776777, upload-time = "2024-09-16T16:04:20.038Z" }, - { url = "https://files.pythonhosted.org/packages/75/9a/7816295124a6b08c24c96f9ce73085032d8bcbaf7e5a781cd41aa910c891/pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df49e7a0861a8c36d089c1ed57d308623d60416dab2647a4a17fe050ba85de0e", size = 1799244, upload-time = "2024-09-16T16:04:21.799Z" }, - { url = "https://files.pythonhosted.org/packages/a9/8f/89c1405176903e567c5f99ec53387449e62f1121894aa9fc2c4fdc51a59b/pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff02b6d461a6de369f07ec15e465a88895f3223eb75073ffea56b84d9331f607", size = 1805307, upload-time = "2024-09-16T16:04:23.324Z" }, - { url = "https://files.pythonhosted.org/packages/d5/a5/1a194447d0da1ef492e3470680c66048fef56fc1f1a25cafbea4bc1d1c48/pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:996a38a83508c54c78a5f41456b0103c30508fed9abcad0a59b876d7398f25fd", size = 2000663, upload-time = "2024-09-16T16:04:25.203Z" }, - { url = "https://files.pythonhosted.org/packages/13/a5/1df8541651de4455e7d587cf556201b4f7997191e110bca3b589218745a5/pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d97683ddee4723ae8c95d1eddac7c192e8c552da0c73a925a89fa8649bf13eea", size = 2655941, upload-time = "2024-09-16T16:04:27.211Z" }, - { url = "https://files.pythonhosted.org/packages/44/31/a3899b5ce02c4316865e390107f145089876dff7e1dfc770a231d836aed8/pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:216f9b2d7713eb98cb83c80b9c794de1f6b7e3145eef40400c62e86cee5f4e1e", size = 2052105, upload-time = "2024-09-16T16:04:28.611Z" }, - { url = "https://files.pythonhosted.org/packages/1b/aa/98e190f8745d5ec831f6d5449344c48c0627ac5fed4e5340a44b74878f8e/pydantic_core-2.23.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f783e0ec4803c787bcea93e13e9932edab72068f68ecffdf86a99fd5918878b", size = 1919967, upload-time = "2024-09-16T16:04:30.045Z" }, - { url = "https://files.pythonhosted.org/packages/ae/35/b6e00b6abb2acfee3e8f85558c02a0822e9a8b2f2d812ea8b9079b118ba0/pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d0776dea117cf5272382634bd2a5c1b6eb16767c223c6a5317cd3e2a757c61a0", size = 1964291, upload-time = "2024-09-16T16:04:32.376Z" }, - { url = "https://files.pythonhosted.org/packages/13/46/7bee6d32b69191cd649bbbd2361af79c472d72cb29bb2024f0b6e350ba06/pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d5f7a395a8cf1621939692dba2a6b6a830efa6b3cee787d82c7de1ad2930de64", size = 2109666, upload-time = "2024-09-16T16:04:33.923Z" }, - { url = "https://files.pythonhosted.org/packages/39/ef/7b34f1b122a81b68ed0a7d0e564da9ccdc9a2924c8d6c6b5b11fa3a56970/pydantic_core-2.23.4-cp311-none-win32.whl", hash = "sha256:74b9127ffea03643e998e0c5ad9bd3811d3dac8c676e47db17b0ee7c3c3bf35f", size = 1732940, upload-time = "2024-09-16T16:04:35.467Z" }, - { url = "https://files.pythonhosted.org/packages/2f/76/37b7e76c645843ff46c1d73e046207311ef298d3f7b2f7d8f6ac60113071/pydantic_core-2.23.4-cp311-none-win_amd64.whl", hash = "sha256:98d134c954828488b153d88ba1f34e14259284f256180ce659e8d83e9c05eaa3", size = 1916804, upload-time = "2024-09-16T16:04:37.06Z" }, - { url = "https://files.pythonhosted.org/packages/74/7b/8e315f80666194b354966ec84b7d567da77ad927ed6323db4006cf915f3f/pydantic_core-2.23.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f3e0da4ebaef65158d4dfd7d3678aad692f7666877df0002b8a522cdf088f231", size = 1856459, upload-time = "2024-09-16T16:04:38.438Z" }, - { url = "https://files.pythonhosted.org/packages/14/de/866bdce10ed808323d437612aca1ec9971b981e1c52e5e42ad9b8e17a6f6/pydantic_core-2.23.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f69a8e0b033b747bb3e36a44e7732f0c99f7edd5cea723d45bc0d6e95377ffee", size = 1770007, upload-time = "2024-09-16T16:04:40.229Z" }, - { url = "https://files.pythonhosted.org/packages/dc/69/8edd5c3cd48bb833a3f7ef9b81d7666ccddd3c9a635225214e044b6e8281/pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723314c1d51722ab28bfcd5240d858512ffd3116449c557a1336cbe3919beb87", size = 1790245, upload-time = "2024-09-16T16:04:41.794Z" }, - { url = "https://files.pythonhosted.org/packages/80/33/9c24334e3af796ce80d2274940aae38dd4e5676298b4398eff103a79e02d/pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb2802e667b7051a1bebbfe93684841cc9351004e2badbd6411bf357ab8d5ac8", size = 1801260, upload-time = "2024-09-16T16:04:43.991Z" }, - { url = "https://files.pythonhosted.org/packages/a5/6f/e9567fd90104b79b101ca9d120219644d3314962caa7948dd8b965e9f83e/pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d18ca8148bebe1b0a382a27a8ee60350091a6ddaf475fa05ef50dc35b5df6327", size = 1996872, upload-time = "2024-09-16T16:04:45.593Z" }, - { url = "https://files.pythonhosted.org/packages/2d/ad/b5f0fe9e6cfee915dd144edbd10b6e9c9c9c9d7a56b69256d124b8ac682e/pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33e3d65a85a2a4a0dc3b092b938a4062b1a05f3a9abde65ea93b233bca0e03f2", size = 2661617, upload-time = "2024-09-16T16:04:47.3Z" }, - { url = "https://files.pythonhosted.org/packages/06/c8/7d4b708f8d05a5cbfda3243aad468052c6e99de7d0937c9146c24d9f12e9/pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:128585782e5bfa515c590ccee4b727fb76925dd04a98864182b22e89a4e6ed36", size = 2071831, upload-time = "2024-09-16T16:04:48.893Z" }, - { url = "https://files.pythonhosted.org/packages/89/4d/3079d00c47f22c9a9a8220db088b309ad6e600a73d7a69473e3a8e5e3ea3/pydantic_core-2.23.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:68665f4c17edcceecc112dfed5dbe6f92261fb9d6054b47d01bf6371a6196126", size = 1917453, upload-time = "2024-09-16T16:04:51.099Z" }, - { url = "https://files.pythonhosted.org/packages/e9/88/9df5b7ce880a4703fcc2d76c8c2d8eb9f861f79d0c56f4b8f5f2607ccec8/pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20152074317d9bed6b7a95ade3b7d6054845d70584216160860425f4fbd5ee9e", size = 1968793, upload-time = "2024-09-16T16:04:52.604Z" }, - { url = "https://files.pythonhosted.org/packages/e3/b9/41f7efe80f6ce2ed3ee3c2dcfe10ab7adc1172f778cc9659509a79518c43/pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9261d3ce84fa1d38ed649c3638feefeae23d32ba9182963e465d58d62203bd24", size = 2116872, upload-time = "2024-09-16T16:04:54.41Z" }, - { url = "https://files.pythonhosted.org/packages/63/08/b59b7a92e03dd25554b0436554bf23e7c29abae7cce4b1c459cd92746811/pydantic_core-2.23.4-cp312-none-win32.whl", hash = "sha256:4ba762ed58e8d68657fc1281e9bb72e1c3e79cc5d464be146e260c541ec12d84", size = 1738535, upload-time = "2024-09-16T16:04:55.828Z" }, - { url = "https://files.pythonhosted.org/packages/88/8d/479293e4d39ab409747926eec4329de5b7129beaedc3786eca070605d07f/pydantic_core-2.23.4-cp312-none-win_amd64.whl", hash = "sha256:97df63000f4fea395b2824da80e169731088656d1818a11b95f3b173747b6cd9", size = 1917992, upload-time = "2024-09-16T16:04:57.395Z" }, - { url = "https://files.pythonhosted.org/packages/ad/ef/16ee2df472bf0e419b6bc68c05bf0145c49247a1095e85cee1463c6a44a1/pydantic_core-2.23.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7530e201d10d7d14abce4fb54cfe5b94a0aefc87da539d0346a484ead376c3cc", size = 1856143, upload-time = "2024-09-16T16:04:59.062Z" }, - { url = "https://files.pythonhosted.org/packages/da/fa/bc3dbb83605669a34a93308e297ab22be82dfb9dcf88c6cf4b4f264e0a42/pydantic_core-2.23.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df933278128ea1cd77772673c73954e53a1c95a4fdf41eef97c2b779271bd0bd", size = 1770063, upload-time = "2024-09-16T16:05:00.522Z" }, - { url = "https://files.pythonhosted.org/packages/4e/48/e813f3bbd257a712303ebdf55c8dc46f9589ec74b384c9f652597df3288d/pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cb3da3fd1b6a5d0279a01877713dbda118a2a4fc6f0d821a57da2e464793f05", size = 1790013, upload-time = "2024-09-16T16:05:02.619Z" }, - { url = "https://files.pythonhosted.org/packages/b4/e0/56eda3a37929a1d297fcab1966db8c339023bcca0b64c5a84896db3fcc5c/pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c6dcb030aefb668a2b7009c85b27f90e51e6a3b4d5c9bc4c57631292015b0d", size = 1801077, upload-time = "2024-09-16T16:05:04.154Z" }, - { url = "https://files.pythonhosted.org/packages/04/be/5e49376769bfbf82486da6c5c1683b891809365c20d7c7e52792ce4c71f3/pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:696dd8d674d6ce621ab9d45b205df149399e4bb9aa34102c970b721554828510", size = 1996782, upload-time = "2024-09-16T16:05:06.931Z" }, - { url = "https://files.pythonhosted.org/packages/bc/24/e3ee6c04f1d58cc15f37bcc62f32c7478ff55142b7b3e6d42ea374ea427c/pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2971bb5ffe72cc0f555c13e19b23c85b654dd2a8f7ab493c262071377bfce9f6", size = 2661375, upload-time = "2024-09-16T16:05:08.773Z" }, - { url = "https://files.pythonhosted.org/packages/c1/f8/11a9006de4e89d016b8de74ebb1db727dc100608bb1e6bbe9d56a3cbbcce/pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8394d940e5d400d04cad4f75c0598665cbb81aecefaca82ca85bd28264af7f9b", size = 2071635, upload-time = "2024-09-16T16:05:10.456Z" }, - { url = "https://files.pythonhosted.org/packages/7c/45/bdce5779b59f468bdf262a5bc9eecbae87f271c51aef628d8c073b4b4b4c/pydantic_core-2.23.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dff76e0602ca7d4cdaacc1ac4c005e0ce0dcfe095d5b5259163a80d3a10d327", size = 1916994, upload-time = "2024-09-16T16:05:12.051Z" }, - { url = "https://files.pythonhosted.org/packages/d8/fa/c648308fe711ee1f88192cad6026ab4f925396d1293e8356de7e55be89b5/pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7d32706badfe136888bdea71c0def994644e09fff0bfe47441deaed8e96fdbc6", size = 1968877, upload-time = "2024-09-16T16:05:14.021Z" }, - { url = "https://files.pythonhosted.org/packages/16/16/b805c74b35607d24d37103007f899abc4880923b04929547ae68d478b7f4/pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed541d70698978a20eb63d8c5d72f2cc6d7079d9d90f6b50bad07826f1320f5f", size = 2116814, upload-time = "2024-09-16T16:05:15.684Z" }, - { url = "https://files.pythonhosted.org/packages/d1/58/5305e723d9fcdf1c5a655e6a4cc2a07128bf644ff4b1d98daf7a9dbf57da/pydantic_core-2.23.4-cp313-none-win32.whl", hash = "sha256:3d5639516376dce1940ea36edf408c554475369f5da2abd45d44621cb616f769", size = 1738360, upload-time = "2024-09-16T16:05:17.258Z" }, - { url = "https://files.pythonhosted.org/packages/a5/ae/e14b0ff8b3f48e02394d8acd911376b7b66e164535687ef7dc24ea03072f/pydantic_core-2.23.4-cp313-none-win_amd64.whl", hash = "sha256:5a1504ad17ba4210df3a045132a7baeeba5a200e930f57512ee02909fc5c4cb5", size = 1919411, upload-time = "2024-09-16T16:05:18.934Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/ad/88/5f2260bdfae97aabf98f1778d43f69574390ad787afb646292a638c923d4/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", size = 435195, upload-time = "2025-04-23T18:33:52.104Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/8d/71db63483d518cbbf290261a1fc2839d17ff89fce7089e08cad07ccfce67/pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7", size = 2028584, upload-time = "2025-04-23T18:31:03.106Z" }, + { url = "https://files.pythonhosted.org/packages/24/2f/3cfa7244ae292dd850989f328722d2aef313f74ffc471184dc509e1e4e5a/pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246", size = 1855071, upload-time = "2025-04-23T18:31:04.621Z" }, + { url = "https://files.pythonhosted.org/packages/b3/d3/4ae42d33f5e3f50dd467761304be2fa0a9417fbf09735bc2cce003480f2a/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f", size = 1897823, upload-time = "2025-04-23T18:31:06.377Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f3/aa5976e8352b7695ff808599794b1fba2a9ae2ee954a3426855935799488/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc", size = 1983792, upload-time = "2025-04-23T18:31:07.93Z" }, + { url = "https://files.pythonhosted.org/packages/d5/7a/cda9b5a23c552037717f2b2a5257e9b2bfe45e687386df9591eff7b46d28/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de", size = 2136338, upload-time = "2025-04-23T18:31:09.283Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9f/b8f9ec8dd1417eb9da784e91e1667d58a2a4a7b7b34cf4af765ef663a7e5/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a", size = 2730998, upload-time = "2025-04-23T18:31:11.7Z" }, + { url = "https://files.pythonhosted.org/packages/47/bc/cd720e078576bdb8255d5032c5d63ee5c0bf4b7173dd955185a1d658c456/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef", size = 2003200, upload-time = "2025-04-23T18:31:13.536Z" }, + { url = "https://files.pythonhosted.org/packages/ca/22/3602b895ee2cd29d11a2b349372446ae9727c32e78a94b3d588a40fdf187/pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e", size = 2113890, upload-time = "2025-04-23T18:31:15.011Z" }, + { url = "https://files.pythonhosted.org/packages/ff/e6/e3c5908c03cf00d629eb38393a98fccc38ee0ce8ecce32f69fc7d7b558a7/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d", size = 2073359, upload-time = "2025-04-23T18:31:16.393Z" }, + { url = "https://files.pythonhosted.org/packages/12/e7/6a36a07c59ebefc8777d1ffdaf5ae71b06b21952582e4b07eba88a421c79/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30", size = 2245883, upload-time = "2025-04-23T18:31:17.892Z" }, + { url = "https://files.pythonhosted.org/packages/16/3f/59b3187aaa6cc0c1e6616e8045b284de2b6a87b027cce2ffcea073adf1d2/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf", size = 2241074, upload-time = "2025-04-23T18:31:19.205Z" }, + { url = "https://files.pythonhosted.org/packages/e0/ed/55532bb88f674d5d8f67ab121a2a13c385df382de2a1677f30ad385f7438/pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51", size = 1910538, upload-time = "2025-04-23T18:31:20.541Z" }, + { url = "https://files.pythonhosted.org/packages/fe/1b/25b7cccd4519c0b23c2dd636ad39d381abf113085ce4f7bec2b0dc755eb1/pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab", size = 1952909, upload-time = "2025-04-23T18:31:22.371Z" }, + { url = "https://files.pythonhosted.org/packages/49/a9/d809358e49126438055884c4366a1f6227f0f84f635a9014e2deb9b9de54/pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65", size = 1897786, upload-time = "2025-04-23T18:31:24.161Z" }, + { url = "https://files.pythonhosted.org/packages/18/8a/2b41c97f554ec8c71f2a8a5f85cb56a8b0956addfe8b0efb5b3d77e8bdc3/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", size = 2009000, upload-time = "2025-04-23T18:31:25.863Z" }, + { url = "https://files.pythonhosted.org/packages/a1/02/6224312aacb3c8ecbaa959897af57181fb6cf3a3d7917fd44d0f2917e6f2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", size = 1847996, upload-time = "2025-04-23T18:31:27.341Z" }, + { url = "https://files.pythonhosted.org/packages/d6/46/6dcdf084a523dbe0a0be59d054734b86a981726f221f4562aed313dbcb49/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", size = 1880957, upload-time = "2025-04-23T18:31:28.956Z" }, + { url = "https://files.pythonhosted.org/packages/ec/6b/1ec2c03837ac00886ba8160ce041ce4e325b41d06a034adbef11339ae422/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011", size = 1964199, upload-time = "2025-04-23T18:31:31.025Z" }, + { url = "https://files.pythonhosted.org/packages/2d/1d/6bf34d6adb9debd9136bd197ca72642203ce9aaaa85cfcbfcf20f9696e83/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f", size = 2120296, upload-time = "2025-04-23T18:31:32.514Z" }, + { url = "https://files.pythonhosted.org/packages/e0/94/2bd0aaf5a591e974b32a9f7123f16637776c304471a0ab33cf263cf5591a/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88", size = 2676109, upload-time = "2025-04-23T18:31:33.958Z" }, + { url = "https://files.pythonhosted.org/packages/f9/41/4b043778cf9c4285d59742281a769eac371b9e47e35f98ad321349cc5d61/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1", size = 2002028, upload-time = "2025-04-23T18:31:39.095Z" }, + { url = "https://files.pythonhosted.org/packages/cb/d5/7bb781bf2748ce3d03af04d5c969fa1308880e1dca35a9bd94e1a96a922e/pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b", size = 2100044, upload-time = "2025-04-23T18:31:41.034Z" }, + { url = "https://files.pythonhosted.org/packages/fe/36/def5e53e1eb0ad896785702a5bbfd25eed546cdcf4087ad285021a90ed53/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1", size = 2058881, upload-time = "2025-04-23T18:31:42.757Z" }, + { url = "https://files.pythonhosted.org/packages/01/6c/57f8d70b2ee57fc3dc8b9610315949837fa8c11d86927b9bb044f8705419/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6", size = 2227034, upload-time = "2025-04-23T18:31:44.304Z" }, + { url = "https://files.pythonhosted.org/packages/27/b9/9c17f0396a82b3d5cbea4c24d742083422639e7bb1d5bf600e12cb176a13/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea", size = 2234187, upload-time = "2025-04-23T18:31:45.891Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6a/adf5734ffd52bf86d865093ad70b2ce543415e0e356f6cacabbc0d9ad910/pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290", size = 1892628, upload-time = "2025-04-23T18:31:47.819Z" }, + { url = "https://files.pythonhosted.org/packages/43/e4/5479fecb3606c1368d496a825d8411e126133c41224c1e7238be58b87d7e/pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2", size = 1955866, upload-time = "2025-04-23T18:31:49.635Z" }, + { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894, upload-time = "2025-04-23T18:31:51.609Z" }, + { url = "https://files.pythonhosted.org/packages/46/8c/99040727b41f56616573a28771b1bfa08a3d3fe74d3d513f01251f79f172/pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f", size = 2015688, upload-time = "2025-04-23T18:31:53.175Z" }, + { url = "https://files.pythonhosted.org/packages/3a/cc/5999d1eb705a6cefc31f0b4a90e9f7fc400539b1a1030529700cc1b51838/pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6", size = 1844808, upload-time = "2025-04-23T18:31:54.79Z" }, + { url = "https://files.pythonhosted.org/packages/6f/5e/a0a7b8885c98889a18b6e376f344da1ef323d270b44edf8174d6bce4d622/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef", size = 1885580, upload-time = "2025-04-23T18:31:57.393Z" }, + { url = "https://files.pythonhosted.org/packages/3b/2a/953581f343c7d11a304581156618c3f592435523dd9d79865903272c256a/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a", size = 1973859, upload-time = "2025-04-23T18:31:59.065Z" }, + { url = "https://files.pythonhosted.org/packages/e6/55/f1a813904771c03a3f97f676c62cca0c0a4138654107c1b61f19c644868b/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916", size = 2120810, upload-time = "2025-04-23T18:32:00.78Z" }, + { url = "https://files.pythonhosted.org/packages/aa/c3/053389835a996e18853ba107a63caae0b9deb4a276c6b472931ea9ae6e48/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a", size = 2676498, upload-time = "2025-04-23T18:32:02.418Z" }, + { url = "https://files.pythonhosted.org/packages/eb/3c/f4abd740877a35abade05e437245b192f9d0ffb48bbbbd708df33d3cda37/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d", size = 2000611, upload-time = "2025-04-23T18:32:04.152Z" }, + { url = "https://files.pythonhosted.org/packages/59/a7/63ef2fed1837d1121a894d0ce88439fe3e3b3e48c7543b2a4479eb99c2bd/pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56", size = 2107924, upload-time = "2025-04-23T18:32:06.129Z" }, + { url = "https://files.pythonhosted.org/packages/04/8f/2551964ef045669801675f1cfc3b0d74147f4901c3ffa42be2ddb1f0efc4/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5", size = 2063196, upload-time = "2025-04-23T18:32:08.178Z" }, + { url = "https://files.pythonhosted.org/packages/26/bd/d9602777e77fc6dbb0c7db9ad356e9a985825547dce5ad1d30ee04903918/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e", size = 2236389, upload-time = "2025-04-23T18:32:10.242Z" }, + { url = "https://files.pythonhosted.org/packages/42/db/0e950daa7e2230423ab342ae918a794964b053bec24ba8af013fc7c94846/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162", size = 2239223, upload-time = "2025-04-23T18:32:12.382Z" }, + { url = "https://files.pythonhosted.org/packages/58/4d/4f937099c545a8a17eb52cb67fe0447fd9a373b348ccfa9a87f141eeb00f/pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849", size = 1900473, upload-time = "2025-04-23T18:32:14.034Z" }, + { url = "https://files.pythonhosted.org/packages/a0/75/4a0a9bac998d78d889def5e4ef2b065acba8cae8c93696906c3a91f310ca/pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9", size = 1955269, upload-time = "2025-04-23T18:32:15.783Z" }, + { url = "https://files.pythonhosted.org/packages/f9/86/1beda0576969592f1497b4ce8e7bc8cbdf614c352426271b1b10d5f0aa64/pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9", size = 1893921, upload-time = "2025-04-23T18:32:18.473Z" }, + { url = "https://files.pythonhosted.org/packages/a4/7d/e09391c2eebeab681df2b74bfe6c43422fffede8dc74187b2b0bf6fd7571/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", size = 1806162, upload-time = "2025-04-23T18:32:20.188Z" }, + { url = "https://files.pythonhosted.org/packages/f1/3d/847b6b1fed9f8ed3bb95a9ad04fbd0b212e832d4f0f50ff4d9ee5a9f15cf/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", size = 1981560, upload-time = "2025-04-23T18:32:22.354Z" }, + { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" }, + { url = "https://files.pythonhosted.org/packages/7b/27/d4ae6487d73948d6f20dddcd94be4ea43e74349b56eba82e9bdee2d7494c/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8", size = 2025200, upload-time = "2025-04-23T18:33:14.199Z" }, + { url = "https://files.pythonhosted.org/packages/f1/b8/b3cb95375f05d33801024079b9392a5ab45267a63400bf1866e7ce0f0de4/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593", size = 1859123, upload-time = "2025-04-23T18:33:16.555Z" }, + { url = "https://files.pythonhosted.org/packages/05/bc/0d0b5adeda59a261cd30a1235a445bf55c7e46ae44aea28f7bd6ed46e091/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612", size = 1892852, upload-time = "2025-04-23T18:33:18.513Z" }, + { url = "https://files.pythonhosted.org/packages/3e/11/d37bdebbda2e449cb3f519f6ce950927b56d62f0b84fd9cb9e372a26a3d5/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7", size = 2067484, upload-time = "2025-04-23T18:33:20.475Z" }, + { url = "https://files.pythonhosted.org/packages/8c/55/1f95f0a05ce72ecb02a8a8a1c3be0579bbc29b1d5ab68f1378b7bebc5057/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e", size = 2108896, upload-time = "2025-04-23T18:33:22.501Z" }, + { url = "https://files.pythonhosted.org/packages/53/89/2b2de6c81fa131f423246a9109d7b2a375e83968ad0800d6e57d0574629b/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8", size = 2069475, upload-time = "2025-04-23T18:33:24.528Z" }, + { url = "https://files.pythonhosted.org/packages/b8/e9/1f7efbe20d0b2b10f6718944b5d8ece9152390904f29a78e68d4e7961159/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf", size = 2239013, upload-time = "2025-04-23T18:33:26.621Z" }, + { url = "https://files.pythonhosted.org/packages/3c/b2/5309c905a93811524a49b4e031e9851a6b00ff0fb668794472ea7746b448/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb", size = 2238715, upload-time = "2025-04-23T18:33:28.656Z" }, + { url = "https://files.pythonhosted.org/packages/32/56/8a7ca5d2cd2cda1d245d34b1c9a942920a718082ae8e54e5f3e5a58b7add/pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1", size = 2066757, upload-time = "2025-04-23T18:33:30.645Z" }, ] [[package]] @@ -2788,56 +2767,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0", size = 26775, upload-time = "2025-01-25T08:48:14.241Z" }, ] -[[package]] -name = "regex" -version = "2025.7.34" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0b/de/e13fa6dc61d78b30ba47481f99933a3b49a57779d625c392d8036770a60d/regex-2025.7.34.tar.gz", hash = "sha256:9ead9765217afd04a86822dfcd4ed2747dfe426e887da413b15ff0ac2457e21a", size = 400714, upload-time = "2025-07-31T00:21:16.262Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/85/f497b91577169472f7c1dc262a5ecc65e39e146fc3a52c571e5daaae4b7d/regex-2025.7.34-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:da304313761b8500b8e175eb2040c4394a875837d5635f6256d6fa0377ad32c8", size = 484594, upload-time = "2025-07-31T00:19:13.927Z" }, - { url = "https://files.pythonhosted.org/packages/1c/c5/ad2a5c11ce9e6257fcbfd6cd965d07502f6054aaa19d50a3d7fd991ec5d1/regex-2025.7.34-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:35e43ebf5b18cd751ea81455b19acfdec402e82fe0dc6143edfae4c5c4b3909a", size = 289294, upload-time = "2025-07-31T00:19:15.395Z" }, - { url = "https://files.pythonhosted.org/packages/8e/01/83ffd9641fcf5e018f9b51aa922c3e538ac9439424fda3df540b643ecf4f/regex-2025.7.34-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96bbae4c616726f4661fe7bcad5952e10d25d3c51ddc388189d8864fbc1b3c68", size = 285933, upload-time = "2025-07-31T00:19:16.704Z" }, - { url = "https://files.pythonhosted.org/packages/77/20/5edab2e5766f0259bc1da7381b07ce6eb4401b17b2254d02f492cd8a81a8/regex-2025.7.34-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9feab78a1ffa4f2b1e27b1bcdaad36f48c2fed4870264ce32f52a393db093c78", size = 792335, upload-time = "2025-07-31T00:19:18.561Z" }, - { url = "https://files.pythonhosted.org/packages/30/bd/744d3ed8777dce8487b2606b94925e207e7c5931d5870f47f5b643a4580a/regex-2025.7.34-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f14b36e6d4d07f1a5060f28ef3b3561c5d95eb0651741474ce4c0a4c56ba8719", size = 858605, upload-time = "2025-07-31T00:19:20.204Z" }, - { url = "https://files.pythonhosted.org/packages/99/3d/93754176289718d7578c31d151047e7b8acc7a8c20e7706716f23c49e45e/regex-2025.7.34-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85c3a958ef8b3d5079c763477e1f09e89d13ad22198a37e9d7b26b4b17438b33", size = 905780, upload-time = "2025-07-31T00:19:21.876Z" }, - { url = "https://files.pythonhosted.org/packages/ee/2e/c689f274a92deffa03999a430505ff2aeace408fd681a90eafa92fdd6930/regex-2025.7.34-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:37555e4ae0b93358fa7c2d240a4291d4a4227cc7c607d8f85596cdb08ec0a083", size = 798868, upload-time = "2025-07-31T00:19:23.222Z" }, - { url = "https://files.pythonhosted.org/packages/0d/9e/39673688805d139b33b4a24851a71b9978d61915c4d72b5ffda324d0668a/regex-2025.7.34-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ee38926f31f1aa61b0232a3a11b83461f7807661c062df9eb88769d86e6195c3", size = 781784, upload-time = "2025-07-31T00:19:24.59Z" }, - { url = "https://files.pythonhosted.org/packages/18/bd/4c1cab12cfabe14beaa076523056b8ab0c882a8feaf0a6f48b0a75dab9ed/regex-2025.7.34-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a664291c31cae9c4a30589bd8bc2ebb56ef880c9c6264cb7643633831e606a4d", size = 852837, upload-time = "2025-07-31T00:19:25.911Z" }, - { url = "https://files.pythonhosted.org/packages/cb/21/663d983cbb3bba537fc213a579abbd0f263fb28271c514123f3c547ab917/regex-2025.7.34-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:f3e5c1e0925e77ec46ddc736b756a6da50d4df4ee3f69536ffb2373460e2dafd", size = 844240, upload-time = "2025-07-31T00:19:27.688Z" }, - { url = "https://files.pythonhosted.org/packages/8e/2d/9beeeb913bc5d32faa913cf8c47e968da936af61ec20af5d269d0f84a100/regex-2025.7.34-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d428fc7731dcbb4e2ffe43aeb8f90775ad155e7db4347a639768bc6cd2df881a", size = 787139, upload-time = "2025-07-31T00:19:29.475Z" }, - { url = "https://files.pythonhosted.org/packages/eb/f5/9b9384415fdc533551be2ba805dd8c4621873e5df69c958f403bfd3b2b6e/regex-2025.7.34-cp311-cp311-win32.whl", hash = "sha256:e154a7ee7fa18333ad90b20e16ef84daaeac61877c8ef942ec8dfa50dc38b7a1", size = 264019, upload-time = "2025-07-31T00:19:31.129Z" }, - { url = "https://files.pythonhosted.org/packages/18/9d/e069ed94debcf4cc9626d652a48040b079ce34c7e4fb174f16874958d485/regex-2025.7.34-cp311-cp311-win_amd64.whl", hash = "sha256:24257953d5c1d6d3c129ab03414c07fc1a47833c9165d49b954190b2b7f21a1a", size = 276047, upload-time = "2025-07-31T00:19:32.497Z" }, - { url = "https://files.pythonhosted.org/packages/fd/cf/3bafbe9d1fd1db77355e7fbbbf0d0cfb34501a8b8e334deca14f94c7b315/regex-2025.7.34-cp311-cp311-win_arm64.whl", hash = "sha256:3157aa512b9e606586900888cd469a444f9b898ecb7f8931996cb715f77477f0", size = 268362, upload-time = "2025-07-31T00:19:34.094Z" }, - { url = "https://files.pythonhosted.org/packages/ff/f0/31d62596c75a33f979317658e8d261574785c6cd8672c06741ce2e2e2070/regex-2025.7.34-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:7f7211a746aced993bef487de69307a38c5ddd79257d7be83f7b202cb59ddb50", size = 485492, upload-time = "2025-07-31T00:19:35.57Z" }, - { url = "https://files.pythonhosted.org/packages/d8/16/b818d223f1c9758c3434be89aa1a01aae798e0e0df36c1f143d1963dd1ee/regex-2025.7.34-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fb31080f2bd0681484b275461b202b5ad182f52c9ec606052020fe13eb13a72f", size = 290000, upload-time = "2025-07-31T00:19:37.175Z" }, - { url = "https://files.pythonhosted.org/packages/cd/70/69506d53397b4bd6954061bae75677ad34deb7f6ca3ba199660d6f728ff5/regex-2025.7.34-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0200a5150c4cf61e407038f4b4d5cdad13e86345dac29ff9dab3d75d905cf130", size = 286072, upload-time = "2025-07-31T00:19:38.612Z" }, - { url = "https://files.pythonhosted.org/packages/b0/73/536a216d5f66084fb577bb0543b5cb7de3272eb70a157f0c3a542f1c2551/regex-2025.7.34-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:739a74970e736df0773788377969c9fea3876c2fc13d0563f98e5503e5185f46", size = 797341, upload-time = "2025-07-31T00:19:40.119Z" }, - { url = "https://files.pythonhosted.org/packages/26/af/733f8168449e56e8f404bb807ea7189f59507cbea1b67a7bbcd92f8bf844/regex-2025.7.34-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4fef81b2f7ea6a2029161ed6dea9ae13834c28eb5a95b8771828194a026621e4", size = 862556, upload-time = "2025-07-31T00:19:41.556Z" }, - { url = "https://files.pythonhosted.org/packages/19/dd/59c464d58c06c4f7d87de4ab1f590e430821345a40c5d345d449a636d15f/regex-2025.7.34-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ea74cf81fe61a7e9d77989050d0089a927ab758c29dac4e8e1b6c06fccf3ebf0", size = 910762, upload-time = "2025-07-31T00:19:43Z" }, - { url = "https://files.pythonhosted.org/packages/37/a8/b05ccf33ceca0815a1e253693b2c86544932ebcc0049c16b0fbdf18b688b/regex-2025.7.34-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e4636a7f3b65a5f340ed9ddf53585c42e3ff37101d383ed321bfe5660481744b", size = 801892, upload-time = "2025-07-31T00:19:44.645Z" }, - { url = "https://files.pythonhosted.org/packages/5f/9a/b993cb2e634cc22810afd1652dba0cae156c40d4864285ff486c73cd1996/regex-2025.7.34-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cef962d7834437fe8d3da6f9bfc6f93f20f218266dcefec0560ed7765f5fe01", size = 786551, upload-time = "2025-07-31T00:19:46.127Z" }, - { url = "https://files.pythonhosted.org/packages/2d/79/7849d67910a0de4e26834b5bb816e028e35473f3d7ae563552ea04f58ca2/regex-2025.7.34-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:cbe1698e5b80298dbce8df4d8d1182279fbdaf1044e864cbc9d53c20e4a2be77", size = 856457, upload-time = "2025-07-31T00:19:47.562Z" }, - { url = "https://files.pythonhosted.org/packages/91/c6/de516bc082524b27e45cb4f54e28bd800c01efb26d15646a65b87b13a91e/regex-2025.7.34-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:32b9f9bcf0f605eb094b08e8da72e44badabb63dde6b83bd530580b488d1c6da", size = 848902, upload-time = "2025-07-31T00:19:49.312Z" }, - { url = "https://files.pythonhosted.org/packages/7d/22/519ff8ba15f732db099b126f039586bd372da6cd4efb810d5d66a5daeda1/regex-2025.7.34-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:524c868ba527eab4e8744a9287809579f54ae8c62fbf07d62aacd89f6026b282", size = 788038, upload-time = "2025-07-31T00:19:50.794Z" }, - { url = "https://files.pythonhosted.org/packages/3f/7d/aabb467d8f57d8149895d133c88eb809a1a6a0fe262c1d508eb9dfabb6f9/regex-2025.7.34-cp312-cp312-win32.whl", hash = "sha256:d600e58ee6d036081c89696d2bdd55d507498a7180df2e19945c6642fac59588", size = 264417, upload-time = "2025-07-31T00:19:52.292Z" }, - { url = "https://files.pythonhosted.org/packages/3b/39/bd922b55a4fc5ad5c13753274e5b536f5b06ec8eb9747675668491c7ab7a/regex-2025.7.34-cp312-cp312-win_amd64.whl", hash = "sha256:9a9ab52a466a9b4b91564437b36417b76033e8778e5af8f36be835d8cb370d62", size = 275387, upload-time = "2025-07-31T00:19:53.593Z" }, - { url = "https://files.pythonhosted.org/packages/f7/3c/c61d2fdcecb754a40475a3d1ef9a000911d3e3fc75c096acf44b0dfb786a/regex-2025.7.34-cp312-cp312-win_arm64.whl", hash = "sha256:c83aec91af9c6fbf7c743274fd952272403ad9a9db05fe9bfc9df8d12b45f176", size = 268482, upload-time = "2025-07-31T00:19:55.183Z" }, - { url = "https://files.pythonhosted.org/packages/15/16/b709b2119975035169a25aa8e4940ca177b1a2e25e14f8d996d09130368e/regex-2025.7.34-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c3c9740a77aeef3f5e3aaab92403946a8d34437db930a0280e7e81ddcada61f5", size = 485334, upload-time = "2025-07-31T00:19:56.58Z" }, - { url = "https://files.pythonhosted.org/packages/94/a6/c09136046be0595f0331bc58a0e5f89c2d324cf734e0b0ec53cf4b12a636/regex-2025.7.34-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:69ed3bc611540f2ea70a4080f853741ec698be556b1df404599f8724690edbcd", size = 289942, upload-time = "2025-07-31T00:19:57.943Z" }, - { url = "https://files.pythonhosted.org/packages/36/91/08fc0fd0f40bdfb0e0df4134ee37cfb16e66a1044ac56d36911fd01c69d2/regex-2025.7.34-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d03c6f9dcd562c56527c42b8530aad93193e0b3254a588be1f2ed378cdfdea1b", size = 285991, upload-time = "2025-07-31T00:19:59.837Z" }, - { url = "https://files.pythonhosted.org/packages/be/2f/99dc8f6f756606f0c214d14c7b6c17270b6bbe26d5c1f05cde9dbb1c551f/regex-2025.7.34-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6164b1d99dee1dfad33f301f174d8139d4368a9fb50bf0a3603b2eaf579963ad", size = 797415, upload-time = "2025-07-31T00:20:01.668Z" }, - { url = "https://files.pythonhosted.org/packages/62/cf/2fcdca1110495458ba4e95c52ce73b361cf1cafd8a53b5c31542cde9a15b/regex-2025.7.34-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1e4f4f62599b8142362f164ce776f19d79bdd21273e86920a7b604a4275b4f59", size = 862487, upload-time = "2025-07-31T00:20:03.142Z" }, - { url = "https://files.pythonhosted.org/packages/90/38/899105dd27fed394e3fae45607c1983e138273ec167e47882fc401f112b9/regex-2025.7.34-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:72a26dcc6a59c057b292f39d41465d8233a10fd69121fa24f8f43ec6294e5415", size = 910717, upload-time = "2025-07-31T00:20:04.727Z" }, - { url = "https://files.pythonhosted.org/packages/ee/f6/4716198dbd0bcc9c45625ac4c81a435d1c4d8ad662e8576dac06bab35b17/regex-2025.7.34-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5273fddf7a3e602695c92716c420c377599ed3c853ea669c1fe26218867002f", size = 801943, upload-time = "2025-07-31T00:20:07.1Z" }, - { url = "https://files.pythonhosted.org/packages/40/5d/cff8896d27e4e3dd11dd72ac78797c7987eb50fe4debc2c0f2f1682eb06d/regex-2025.7.34-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c1844be23cd40135b3a5a4dd298e1e0c0cb36757364dd6cdc6025770363e06c1", size = 786664, upload-time = "2025-07-31T00:20:08.818Z" }, - { url = "https://files.pythonhosted.org/packages/10/29/758bf83cf7b4c34f07ac3423ea03cee3eb3176941641e4ccc05620f6c0b8/regex-2025.7.34-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dde35e2afbbe2272f8abee3b9fe6772d9b5a07d82607b5788e8508974059925c", size = 856457, upload-time = "2025-07-31T00:20:10.328Z" }, - { url = "https://files.pythonhosted.org/packages/d7/30/c19d212b619963c5b460bfed0ea69a092c6a43cba52a973d46c27b3e2975/regex-2025.7.34-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:f3f6e8e7af516a7549412ce57613e859c3be27d55341a894aacaa11703a4c31a", size = 849008, upload-time = "2025-07-31T00:20:11.823Z" }, - { url = "https://files.pythonhosted.org/packages/9e/b8/3c35da3b12c87e3cc00010ef6c3a4ae787cff0bc381aa3d251def219969a/regex-2025.7.34-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:469142fb94a869beb25b5f18ea87646d21def10fbacb0bcb749224f3509476f0", size = 788101, upload-time = "2025-07-31T00:20:13.729Z" }, - { url = "https://files.pythonhosted.org/packages/47/80/2f46677c0b3c2b723b2c358d19f9346e714113865da0f5f736ca1a883bde/regex-2025.7.34-cp313-cp313-win32.whl", hash = "sha256:da7507d083ee33ccea1310447410c27ca11fb9ef18c95899ca57ff60a7e4d8f1", size = 264401, upload-time = "2025-07-31T00:20:15.233Z" }, - { url = "https://files.pythonhosted.org/packages/be/fa/917d64dd074682606a003cba33585c28138c77d848ef72fc77cbb1183849/regex-2025.7.34-cp313-cp313-win_amd64.whl", hash = "sha256:9d644de5520441e5f7e2db63aec2748948cc39ed4d7a87fd5db578ea4043d997", size = 275368, upload-time = "2025-07-31T00:20:16.711Z" }, - { url = "https://files.pythonhosted.org/packages/65/cd/f94383666704170a2154a5df7b16be28f0c27a266bffcd843e58bc84120f/regex-2025.7.34-cp313-cp313-win_arm64.whl", hash = "sha256:7bf1c5503a9f2cbd2f52d7e260acb3131b07b6273c470abb78568174fe6bde3f", size = 268482, upload-time = "2025-07-31T00:20:18.189Z" }, -] - [[package]] name = "requests" version = "2.32.4" @@ -2868,15 +2797,15 @@ wheels = [ [[package]] name = "rich" -version = "13.9.4" +version = "14.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ab/3a/0316b28d0761c6734d6bc14e770d85506c986c85ffb239e688eeaab2c2bc/rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098", size = 223149, upload-time = "2024-11-01T16:43:57.873Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/75/af448d8e52bf1d8fa6a9d089ca6c07ff4453d86c65c145d0a300bb073b9b/rich-14.1.0.tar.gz", hash = "sha256:e497a48b844b0320d45007cdebfeaeed8db2a4f4bcf49f15e455cfc4af11eaa8", size = 224441, upload-time = "2025-07-25T07:32:58.125Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424, upload-time = "2024-11-01T16:43:55.817Z" }, + { url = "https://files.pythonhosted.org/packages/e3/30/3c4d035596d3cf444529e0b2953ad0466f6049528a879d27534700580395/rich-14.1.0-py3-none-any.whl", hash = "sha256:536f5f1785986d6dbdea3c75205c473f970777b4a0d6c6dd1b696aa05a3fa04f", size = 243368, upload-time = "2025-07-25T07:32:56.73Z" }, ] [[package]] @@ -2890,76 +2819,81 @@ wheels = [ [[package]] name = "rpds-py" -version = "0.26.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a5/aa/4456d84bbb54adc6a916fb10c9b374f78ac840337644e4a5eda229c81275/rpds_py-0.26.0.tar.gz", hash = "sha256:20dae58a859b0906f0685642e591056f1e787f3a8b39c8e8749a45dc7d26bdb0", size = 27385, upload-time = "2025-07-01T15:57:13.958Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/09/4c/4ee8f7e512030ff79fda1df3243c88d70fc874634e2dbe5df13ba4210078/rpds_py-0.26.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:9e8cb77286025bdb21be2941d64ac6ca016130bfdcd228739e8ab137eb4406ed", size = 372610, upload-time = "2025-07-01T15:53:58.844Z" }, - { url = "https://files.pythonhosted.org/packages/fa/9d/3dc16be00f14fc1f03c71b1d67c8df98263ab2710a2fbd65a6193214a527/rpds_py-0.26.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5e09330b21d98adc8ccb2dbb9fc6cb434e8908d4c119aeaa772cb1caab5440a0", size = 358032, upload-time = "2025-07-01T15:53:59.985Z" }, - { url = "https://files.pythonhosted.org/packages/e7/5a/7f1bf8f045da2866324a08ae80af63e64e7bfaf83bd31f865a7b91a58601/rpds_py-0.26.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c9c1b92b774b2e68d11193dc39620d62fd8ab33f0a3c77ecdabe19c179cdbc1", size = 381525, upload-time = "2025-07-01T15:54:01.162Z" }, - { url = "https://files.pythonhosted.org/packages/45/8a/04479398c755a066ace10e3d158866beb600867cacae194c50ffa783abd0/rpds_py-0.26.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:824e6d3503ab990d7090768e4dfd9e840837bae057f212ff9f4f05ec6d1975e7", size = 397089, upload-time = "2025-07-01T15:54:02.319Z" }, - { url = "https://files.pythonhosted.org/packages/72/88/9203f47268db488a1b6d469d69c12201ede776bb728b9d9f29dbfd7df406/rpds_py-0.26.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ad7fd2258228bf288f2331f0a6148ad0186b2e3643055ed0db30990e59817a6", size = 514255, upload-time = "2025-07-01T15:54:03.38Z" }, - { url = "https://files.pythonhosted.org/packages/f5/b4/01ce5d1e853ddf81fbbd4311ab1eff0b3cf162d559288d10fd127e2588b5/rpds_py-0.26.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0dc23bbb3e06ec1ea72d515fb572c1fea59695aefbffb106501138762e1e915e", size = 402283, upload-time = "2025-07-01T15:54:04.923Z" }, - { url = "https://files.pythonhosted.org/packages/34/a2/004c99936997bfc644d590a9defd9e9c93f8286568f9c16cdaf3e14429a7/rpds_py-0.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d80bf832ac7b1920ee29a426cdca335f96a2b5caa839811803e999b41ba9030d", size = 383881, upload-time = "2025-07-01T15:54:06.482Z" }, - { url = "https://files.pythonhosted.org/packages/05/1b/ef5fba4a8f81ce04c427bfd96223f92f05e6cd72291ce9d7523db3b03a6c/rpds_py-0.26.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0919f38f5542c0a87e7b4afcafab6fd2c15386632d249e9a087498571250abe3", size = 415822, upload-time = "2025-07-01T15:54:07.605Z" }, - { url = "https://files.pythonhosted.org/packages/16/80/5c54195aec456b292f7bd8aa61741c8232964063fd8a75fdde9c1e982328/rpds_py-0.26.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d422b945683e409000c888e384546dbab9009bb92f7c0b456e217988cf316107", size = 558347, upload-time = "2025-07-01T15:54:08.591Z" }, - { url = "https://files.pythonhosted.org/packages/f2/1c/1845c1b1fd6d827187c43afe1841d91678d7241cbdb5420a4c6de180a538/rpds_py-0.26.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:77a7711fa562ba2da1aa757e11024ad6d93bad6ad7ede5afb9af144623e5f76a", size = 587956, upload-time = "2025-07-01T15:54:09.963Z" }, - { url = "https://files.pythonhosted.org/packages/2e/ff/9e979329dd131aa73a438c077252ddabd7df6d1a7ad7b9aacf6261f10faa/rpds_py-0.26.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:238e8c8610cb7c29460e37184f6799547f7e09e6a9bdbdab4e8edb90986a2318", size = 554363, upload-time = "2025-07-01T15:54:11.073Z" }, - { url = "https://files.pythonhosted.org/packages/00/8b/d78cfe034b71ffbe72873a136e71acc7a831a03e37771cfe59f33f6de8a2/rpds_py-0.26.0-cp311-cp311-win32.whl", hash = "sha256:893b022bfbdf26d7bedb083efeea624e8550ca6eb98bf7fea30211ce95b9201a", size = 220123, upload-time = "2025-07-01T15:54:12.382Z" }, - { url = "https://files.pythonhosted.org/packages/94/c1/3c8c94c7dd3905dbfde768381ce98778500a80db9924731d87ddcdb117e9/rpds_py-0.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:87a5531de9f71aceb8af041d72fc4cab4943648d91875ed56d2e629bef6d4c03", size = 231732, upload-time = "2025-07-01T15:54:13.434Z" }, - { url = "https://files.pythonhosted.org/packages/67/93/e936fbed1b734eabf36ccb5d93c6a2e9246fbb13c1da011624b7286fae3e/rpds_py-0.26.0-cp311-cp311-win_arm64.whl", hash = "sha256:de2713f48c1ad57f89ac25b3cb7daed2156d8e822cf0eca9b96a6f990718cc41", size = 221917, upload-time = "2025-07-01T15:54:14.559Z" }, - { url = "https://files.pythonhosted.org/packages/ea/86/90eb87c6f87085868bd077c7a9938006eb1ce19ed4d06944a90d3560fce2/rpds_py-0.26.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:894514d47e012e794f1350f076c427d2347ebf82f9b958d554d12819849a369d", size = 363933, upload-time = "2025-07-01T15:54:15.734Z" }, - { url = "https://files.pythonhosted.org/packages/63/78/4469f24d34636242c924626082b9586f064ada0b5dbb1e9d096ee7a8e0c6/rpds_py-0.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc921b96fa95a097add244da36a1d9e4f3039160d1d30f1b35837bf108c21136", size = 350447, upload-time = "2025-07-01T15:54:16.922Z" }, - { url = "https://files.pythonhosted.org/packages/ad/91/c448ed45efdfdade82348d5e7995e15612754826ea640afc20915119734f/rpds_py-0.26.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e1157659470aa42a75448b6e943c895be8c70531c43cb78b9ba990778955582", size = 384711, upload-time = "2025-07-01T15:54:18.101Z" }, - { url = "https://files.pythonhosted.org/packages/ec/43/e5c86fef4be7f49828bdd4ecc8931f0287b1152c0bb0163049b3218740e7/rpds_py-0.26.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:521ccf56f45bb3a791182dc6b88ae5f8fa079dd705ee42138c76deb1238e554e", size = 400865, upload-time = "2025-07-01T15:54:19.295Z" }, - { url = "https://files.pythonhosted.org/packages/55/34/e00f726a4d44f22d5c5fe2e5ddd3ac3d7fd3f74a175607781fbdd06fe375/rpds_py-0.26.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9def736773fd56b305c0eef698be5192c77bfa30d55a0e5885f80126c4831a15", size = 517763, upload-time = "2025-07-01T15:54:20.858Z" }, - { url = "https://files.pythonhosted.org/packages/52/1c/52dc20c31b147af724b16104500fba13e60123ea0334beba7b40e33354b4/rpds_py-0.26.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cdad4ea3b4513b475e027be79e5a0ceac8ee1c113a1a11e5edc3c30c29f964d8", size = 406651, upload-time = "2025-07-01T15:54:22.508Z" }, - { url = "https://files.pythonhosted.org/packages/2e/77/87d7bfabfc4e821caa35481a2ff6ae0b73e6a391bb6b343db2c91c2b9844/rpds_py-0.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82b165b07f416bdccf5c84546a484cc8f15137ca38325403864bfdf2b5b72f6a", size = 386079, upload-time = "2025-07-01T15:54:23.987Z" }, - { url = "https://files.pythonhosted.org/packages/e3/d4/7f2200c2d3ee145b65b3cddc4310d51f7da6a26634f3ac87125fd789152a/rpds_py-0.26.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d04cab0a54b9dba4d278fe955a1390da3cf71f57feb78ddc7cb67cbe0bd30323", size = 421379, upload-time = "2025-07-01T15:54:25.073Z" }, - { url = "https://files.pythonhosted.org/packages/ae/13/9fdd428b9c820869924ab62236b8688b122baa22d23efdd1c566938a39ba/rpds_py-0.26.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:79061ba1a11b6a12743a2b0f72a46aa2758613d454aa6ba4f5a265cc48850158", size = 562033, upload-time = "2025-07-01T15:54:26.225Z" }, - { url = "https://files.pythonhosted.org/packages/f3/e1/b69686c3bcbe775abac3a4c1c30a164a2076d28df7926041f6c0eb5e8d28/rpds_py-0.26.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f405c93675d8d4c5ac87364bb38d06c988e11028a64b52a47158a355079661f3", size = 591639, upload-time = "2025-07-01T15:54:27.424Z" }, - { url = "https://files.pythonhosted.org/packages/5c/c9/1e3d8c8863c84a90197ac577bbc3d796a92502124c27092413426f670990/rpds_py-0.26.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dafd4c44b74aa4bed4b250f1aed165b8ef5de743bcca3b88fc9619b6087093d2", size = 557105, upload-time = "2025-07-01T15:54:29.93Z" }, - { url = "https://files.pythonhosted.org/packages/9f/c5/90c569649057622959f6dcc40f7b516539608a414dfd54b8d77e3b201ac0/rpds_py-0.26.0-cp312-cp312-win32.whl", hash = "sha256:3da5852aad63fa0c6f836f3359647870e21ea96cf433eb393ffa45263a170d44", size = 223272, upload-time = "2025-07-01T15:54:31.128Z" }, - { url = "https://files.pythonhosted.org/packages/7d/16/19f5d9f2a556cfed454eebe4d354c38d51c20f3db69e7b4ce6cff904905d/rpds_py-0.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf47cfdabc2194a669dcf7a8dbba62e37a04c5041d2125fae0233b720da6f05c", size = 234995, upload-time = "2025-07-01T15:54:32.195Z" }, - { url = "https://files.pythonhosted.org/packages/83/f0/7935e40b529c0e752dfaa7880224771b51175fce08b41ab4a92eb2fbdc7f/rpds_py-0.26.0-cp312-cp312-win_arm64.whl", hash = "sha256:20ab1ae4fa534f73647aad289003f1104092890849e0266271351922ed5574f8", size = 223198, upload-time = "2025-07-01T15:54:33.271Z" }, - { url = "https://files.pythonhosted.org/packages/6a/67/bb62d0109493b12b1c6ab00de7a5566aa84c0e44217c2d94bee1bd370da9/rpds_py-0.26.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:696764a5be111b036256c0b18cd29783fab22154690fc698062fc1b0084b511d", size = 363917, upload-time = "2025-07-01T15:54:34.755Z" }, - { url = "https://files.pythonhosted.org/packages/4b/f3/34e6ae1925a5706c0f002a8d2d7f172373b855768149796af87bd65dcdb9/rpds_py-0.26.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1e6c15d2080a63aaed876e228efe4f814bc7889c63b1e112ad46fdc8b368b9e1", size = 350073, upload-time = "2025-07-01T15:54:36.292Z" }, - { url = "https://files.pythonhosted.org/packages/75/83/1953a9d4f4e4de7fd0533733e041c28135f3c21485faaef56a8aadbd96b5/rpds_py-0.26.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390e3170babf42462739a93321e657444f0862c6d722a291accc46f9d21ed04e", size = 384214, upload-time = "2025-07-01T15:54:37.469Z" }, - { url = "https://files.pythonhosted.org/packages/48/0e/983ed1b792b3322ea1d065e67f4b230f3b96025f5ce3878cc40af09b7533/rpds_py-0.26.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7da84c2c74c0f5bc97d853d9e17bb83e2dcafcff0dc48286916001cc114379a1", size = 400113, upload-time = "2025-07-01T15:54:38.954Z" }, - { url = "https://files.pythonhosted.org/packages/69/7f/36c0925fff6f660a80be259c5b4f5e53a16851f946eb080351d057698528/rpds_py-0.26.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c5fe114a6dd480a510b6d3661d09d67d1622c4bf20660a474507aaee7eeeee9", size = 515189, upload-time = "2025-07-01T15:54:40.57Z" }, - { url = "https://files.pythonhosted.org/packages/13/45/cbf07fc03ba7a9b54662c9badb58294ecfb24f828b9732970bd1a431ed5c/rpds_py-0.26.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3100b3090269f3a7ea727b06a6080d4eb7439dca4c0e91a07c5d133bb1727ea7", size = 406998, upload-time = "2025-07-01T15:54:43.025Z" }, - { url = "https://files.pythonhosted.org/packages/6c/b0/8fa5e36e58657997873fd6a1cf621285ca822ca75b4b3434ead047daa307/rpds_py-0.26.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c03c9b0c64afd0320ae57de4c982801271c0c211aa2d37f3003ff5feb75bb04", size = 385903, upload-time = "2025-07-01T15:54:44.752Z" }, - { url = "https://files.pythonhosted.org/packages/4b/f7/b25437772f9f57d7a9fbd73ed86d0dcd76b4c7c6998348c070d90f23e315/rpds_py-0.26.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5963b72ccd199ade6ee493723d18a3f21ba7d5b957017607f815788cef50eaf1", size = 419785, upload-time = "2025-07-01T15:54:46.043Z" }, - { url = "https://files.pythonhosted.org/packages/a7/6b/63ffa55743dfcb4baf2e9e77a0b11f7f97ed96a54558fcb5717a4b2cd732/rpds_py-0.26.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9da4e873860ad5bab3291438525cae80169daecbfafe5657f7f5fb4d6b3f96b9", size = 561329, upload-time = "2025-07-01T15:54:47.64Z" }, - { url = "https://files.pythonhosted.org/packages/2f/07/1f4f5e2886c480a2346b1e6759c00278b8a69e697ae952d82ae2e6ee5db0/rpds_py-0.26.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5afaddaa8e8c7f1f7b4c5c725c0070b6eed0228f705b90a1732a48e84350f4e9", size = 590875, upload-time = "2025-07-01T15:54:48.9Z" }, - { url = "https://files.pythonhosted.org/packages/cc/bc/e6639f1b91c3a55f8c41b47d73e6307051b6e246254a827ede730624c0f8/rpds_py-0.26.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4916dc96489616a6f9667e7526af8fa693c0fdb4f3acb0e5d9f4400eb06a47ba", size = 556636, upload-time = "2025-07-01T15:54:50.619Z" }, - { url = "https://files.pythonhosted.org/packages/05/4c/b3917c45566f9f9a209d38d9b54a1833f2bb1032a3e04c66f75726f28876/rpds_py-0.26.0-cp313-cp313-win32.whl", hash = "sha256:2a343f91b17097c546b93f7999976fd6c9d5900617aa848c81d794e062ab302b", size = 222663, upload-time = "2025-07-01T15:54:52.023Z" }, - { url = "https://files.pythonhosted.org/packages/e0/0b/0851bdd6025775aaa2365bb8de0697ee2558184c800bfef8d7aef5ccde58/rpds_py-0.26.0-cp313-cp313-win_amd64.whl", hash = "sha256:0a0b60701f2300c81b2ac88a5fb893ccfa408e1c4a555a77f908a2596eb875a5", size = 234428, upload-time = "2025-07-01T15:54:53.692Z" }, - { url = "https://files.pythonhosted.org/packages/ed/e8/a47c64ed53149c75fb581e14a237b7b7cd18217e969c30d474d335105622/rpds_py-0.26.0-cp313-cp313-win_arm64.whl", hash = "sha256:257d011919f133a4746958257f2c75238e3ff54255acd5e3e11f3ff41fd14256", size = 222571, upload-time = "2025-07-01T15:54:54.822Z" }, - { url = "https://files.pythonhosted.org/packages/89/bf/3d970ba2e2bcd17d2912cb42874107390f72873e38e79267224110de5e61/rpds_py-0.26.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:529c8156d7506fba5740e05da8795688f87119cce330c244519cf706a4a3d618", size = 360475, upload-time = "2025-07-01T15:54:56.228Z" }, - { url = "https://files.pythonhosted.org/packages/82/9f/283e7e2979fc4ec2d8ecee506d5a3675fce5ed9b4b7cb387ea5d37c2f18d/rpds_py-0.26.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f53ec51f9d24e9638a40cabb95078ade8c99251945dad8d57bf4aabe86ecee35", size = 346692, upload-time = "2025-07-01T15:54:58.561Z" }, - { url = "https://files.pythonhosted.org/packages/e3/03/7e50423c04d78daf391da3cc4330bdb97042fc192a58b186f2d5deb7befd/rpds_py-0.26.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab504c4d654e4a29558eaa5bb8cea5fdc1703ea60a8099ffd9c758472cf913f", size = 379415, upload-time = "2025-07-01T15:54:59.751Z" }, - { url = "https://files.pythonhosted.org/packages/57/00/d11ee60d4d3b16808432417951c63df803afb0e0fc672b5e8d07e9edaaae/rpds_py-0.26.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fd0641abca296bc1a00183fe44f7fced8807ed49d501f188faa642d0e4975b83", size = 391783, upload-time = "2025-07-01T15:55:00.898Z" }, - { url = "https://files.pythonhosted.org/packages/08/b3/1069c394d9c0d6d23c5b522e1f6546b65793a22950f6e0210adcc6f97c3e/rpds_py-0.26.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:69b312fecc1d017b5327afa81d4da1480f51c68810963a7336d92203dbb3d4f1", size = 512844, upload-time = "2025-07-01T15:55:02.201Z" }, - { url = "https://files.pythonhosted.org/packages/08/3b/c4fbf0926800ed70b2c245ceca99c49f066456755f5d6eb8863c2c51e6d0/rpds_py-0.26.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c741107203954f6fc34d3066d213d0a0c40f7bb5aafd698fb39888af277c70d8", size = 402105, upload-time = "2025-07-01T15:55:03.698Z" }, - { url = "https://files.pythonhosted.org/packages/1c/b0/db69b52ca07413e568dae9dc674627a22297abb144c4d6022c6d78f1e5cc/rpds_py-0.26.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc3e55a7db08dc9a6ed5fb7103019d2c1a38a349ac41901f9f66d7f95750942f", size = 383440, upload-time = "2025-07-01T15:55:05.398Z" }, - { url = "https://files.pythonhosted.org/packages/4c/e1/c65255ad5b63903e56b3bb3ff9dcc3f4f5c3badde5d08c741ee03903e951/rpds_py-0.26.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e851920caab2dbcae311fd28f4313c6953993893eb5c1bb367ec69d9a39e7ed", size = 412759, upload-time = "2025-07-01T15:55:08.316Z" }, - { url = "https://files.pythonhosted.org/packages/e4/22/bb731077872377a93c6e93b8a9487d0406c70208985831034ccdeed39c8e/rpds_py-0.26.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dfbf280da5f876d0b00c81f26bedce274e72a678c28845453885a9b3c22ae632", size = 556032, upload-time = "2025-07-01T15:55:09.52Z" }, - { url = "https://files.pythonhosted.org/packages/e0/8b/393322ce7bac5c4530fb96fc79cc9ea2f83e968ff5f6e873f905c493e1c4/rpds_py-0.26.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:1cc81d14ddfa53d7f3906694d35d54d9d3f850ef8e4e99ee68bc0d1e5fed9a9c", size = 585416, upload-time = "2025-07-01T15:55:11.216Z" }, - { url = "https://files.pythonhosted.org/packages/49/ae/769dc372211835bf759319a7aae70525c6eb523e3371842c65b7ef41c9c6/rpds_py-0.26.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dca83c498b4650a91efcf7b88d669b170256bf8017a5db6f3e06c2bf031f57e0", size = 554049, upload-time = "2025-07-01T15:55:13.004Z" }, - { url = "https://files.pythonhosted.org/packages/6b/f9/4c43f9cc203d6ba44ce3146246cdc38619d92c7bd7bad4946a3491bd5b70/rpds_py-0.26.0-cp313-cp313t-win32.whl", hash = "sha256:4d11382bcaf12f80b51d790dee295c56a159633a8e81e6323b16e55d81ae37e9", size = 218428, upload-time = "2025-07-01T15:55:14.486Z" }, - { url = "https://files.pythonhosted.org/packages/7e/8b/9286b7e822036a4a977f2f1e851c7345c20528dbd56b687bb67ed68a8ede/rpds_py-0.26.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff110acded3c22c033e637dd8896e411c7d3a11289b2edf041f86663dbc791e9", size = 231524, upload-time = "2025-07-01T15:55:15.745Z" }, - { url = "https://files.pythonhosted.org/packages/51/f2/b5c85b758a00c513bb0389f8fc8e61eb5423050c91c958cdd21843faa3e6/rpds_py-0.26.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f61a9326f80ca59214d1cceb0a09bb2ece5b2563d4e0cd37bfd5515c28510674", size = 373505, upload-time = "2025-07-01T15:56:34.716Z" }, - { url = "https://files.pythonhosted.org/packages/23/e0/25db45e391251118e915e541995bb5f5ac5691a3b98fb233020ba53afc9b/rpds_py-0.26.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:183f857a53bcf4b1b42ef0f57ca553ab56bdd170e49d8091e96c51c3d69ca696", size = 359468, upload-time = "2025-07-01T15:56:36.219Z" }, - { url = "https://files.pythonhosted.org/packages/0b/73/dd5ee6075bb6491be3a646b301dfd814f9486d924137a5098e61f0487e16/rpds_py-0.26.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:941c1cfdf4799d623cf3aa1d326a6b4fdb7a5799ee2687f3516738216d2262fb", size = 382680, upload-time = "2025-07-01T15:56:37.644Z" }, - { url = "https://files.pythonhosted.org/packages/2f/10/84b522ff58763a5c443f5bcedc1820240e454ce4e620e88520f04589e2ea/rpds_py-0.26.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72a8d9564a717ee291f554eeb4bfeafe2309d5ec0aa6c475170bdab0f9ee8e88", size = 397035, upload-time = "2025-07-01T15:56:39.241Z" }, - { url = "https://files.pythonhosted.org/packages/06/ea/8667604229a10a520fcbf78b30ccc278977dcc0627beb7ea2c96b3becef0/rpds_py-0.26.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:511d15193cbe013619dd05414c35a7dedf2088fcee93c6bbb7c77859765bd4e8", size = 514922, upload-time = "2025-07-01T15:56:40.645Z" }, - { url = "https://files.pythonhosted.org/packages/24/e6/9ed5b625c0661c4882fc8cdf302bf8e96c73c40de99c31e0b95ed37d508c/rpds_py-0.26.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aea1f9741b603a8d8fedb0ed5502c2bc0accbc51f43e2ad1337fe7259c2b77a5", size = 402822, upload-time = "2025-07-01T15:56:42.137Z" }, - { url = "https://files.pythonhosted.org/packages/8a/58/212c7b6fd51946047fb45d3733da27e2fa8f7384a13457c874186af691b1/rpds_py-0.26.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4019a9d473c708cf2f16415688ef0b4639e07abaa569d72f74745bbeffafa2c7", size = 384336, upload-time = "2025-07-01T15:56:44.239Z" }, - { url = "https://files.pythonhosted.org/packages/aa/f5/a40ba78748ae8ebf4934d4b88e77b98497378bc2c24ba55ebe87a4e87057/rpds_py-0.26.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:093d63b4b0f52d98ebae33b8c50900d3d67e0666094b1be7a12fffd7f65de74b", size = 416871, upload-time = "2025-07-01T15:56:46.284Z" }, - { url = "https://files.pythonhosted.org/packages/d5/a6/33b1fc0c9f7dcfcfc4a4353daa6308b3ece22496ceece348b3e7a7559a09/rpds_py-0.26.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:2abe21d8ba64cded53a2a677e149ceb76dcf44284202d737178afe7ba540c1eb", size = 559439, upload-time = "2025-07-01T15:56:48.549Z" }, - { url = "https://files.pythonhosted.org/packages/71/2d/ceb3f9c12f8cfa56d34995097f6cd99da1325642c60d1b6680dd9df03ed8/rpds_py-0.26.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:4feb7511c29f8442cbbc28149a92093d32e815a28aa2c50d333826ad2a20fdf0", size = 588380, upload-time = "2025-07-01T15:56:50.086Z" }, - { url = "https://files.pythonhosted.org/packages/c8/ed/9de62c2150ca8e2e5858acf3f4f4d0d180a38feef9fdab4078bea63d8dba/rpds_py-0.26.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e99685fc95d386da368013e7fb4269dd39c30d99f812a8372d62f244f662709c", size = 555334, upload-time = "2025-07-01T15:56:51.703Z" }, +version = "0.27.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1e/d9/991a0dee12d9fc53ed027e26a26a64b151d77252ac477e22666b9688bc16/rpds_py-0.27.0.tar.gz", hash = "sha256:8b23cf252f180cda89220b378d917180f29d313cd6a07b2431c0d3b776aae86f", size = 27420, upload-time = "2025-08-07T08:26:39.624Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/c1/49d515434c1752e40f5e35b985260cf27af052593378580a2f139a5be6b8/rpds_py-0.27.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:dbc2ab5d10544eb485baa76c63c501303b716a5c405ff2469a1d8ceffaabf622", size = 371577, upload-time = "2025-08-07T08:23:25.379Z" }, + { url = "https://files.pythonhosted.org/packages/e1/6d/bf2715b2fee5087fa13b752b5fd573f1a93e4134c74d275f709e38e54fe7/rpds_py-0.27.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7ec85994f96a58cf7ed288caa344b7fe31fd1d503bdf13d7331ead5f70ab60d5", size = 354959, upload-time = "2025-08-07T08:23:26.767Z" }, + { url = "https://files.pythonhosted.org/packages/a3/5c/e7762808c746dd19733a81373c10da43926f6a6adcf4920a21119697a60a/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:190d7285cd3bb6d31d37a0534d7359c1ee191eb194c511c301f32a4afa5a1dd4", size = 381485, upload-time = "2025-08-07T08:23:27.869Z" }, + { url = "https://files.pythonhosted.org/packages/40/51/0d308eb0b558309ca0598bcba4243f52c4cd20e15fe991b5bd75824f2e61/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c10d92fb6d7fd827e44055fcd932ad93dac6a11e832d51534d77b97d1d85400f", size = 396816, upload-time = "2025-08-07T08:23:29.424Z" }, + { url = "https://files.pythonhosted.org/packages/5c/aa/2d585ec911d78f66458b2c91252134ca0c7c70f687a72c87283173dc0c96/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dd2c1d27ebfe6a015cfa2005b7fe8c52d5019f7bbdd801bc6f7499aab9ae739e", size = 514950, upload-time = "2025-08-07T08:23:30.576Z" }, + { url = "https://files.pythonhosted.org/packages/0b/ef/aced551cc1148179557aed84343073adadf252c91265263ee6203458a186/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4790c9d5dd565ddb3e9f656092f57268951398cef52e364c405ed3112dc7c7c1", size = 402132, upload-time = "2025-08-07T08:23:32.428Z" }, + { url = "https://files.pythonhosted.org/packages/4b/ac/cf644803d8d417653fe2b3604186861d62ea6afaef1b2284045741baef17/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4300e15e7d03660f04be84a125d1bdd0e6b2f674bc0723bc0fd0122f1a4585dc", size = 383660, upload-time = "2025-08-07T08:23:33.829Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ec/caf47c55ce02b76cbaeeb2d3b36a73da9ca2e14324e3d75cf72b59dcdac5/rpds_py-0.27.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:59195dc244fc183209cf8a93406889cadde47dfd2f0a6b137783aa9c56d67c85", size = 401730, upload-time = "2025-08-07T08:23:34.97Z" }, + { url = "https://files.pythonhosted.org/packages/0b/71/c1f355afdcd5b99ffc253422aa4bdcb04ccf1491dcd1bda3688a0c07fd61/rpds_py-0.27.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fae4a01ef8c4cb2bbe92ef2063149596907dc4a881a8d26743b3f6b304713171", size = 416122, upload-time = "2025-08-07T08:23:36.062Z" }, + { url = "https://files.pythonhosted.org/packages/38/0f/f4b5b1eda724ed0e04d2b26d8911cdc131451a7ee4c4c020a1387e5c6ded/rpds_py-0.27.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e3dc8d4ede2dbae6c0fc2b6c958bf51ce9fd7e9b40c0f5b8835c3fde44f5807d", size = 558771, upload-time = "2025-08-07T08:23:37.478Z" }, + { url = "https://files.pythonhosted.org/packages/93/c0/5f8b834db2289ab48d5cffbecbb75e35410103a77ac0b8da36bf9544ec1c/rpds_py-0.27.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c3782fb753aa825b4ccabc04292e07897e2fd941448eabf666856c5530277626", size = 587876, upload-time = "2025-08-07T08:23:38.662Z" }, + { url = "https://files.pythonhosted.org/packages/d2/dd/1a1df02ab8eb970115cff2ae31a6f73916609b900dc86961dc382b8c2e5e/rpds_py-0.27.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:887ab1f12b0d227e9260558a4a2320024b20102207ada65c43e1ffc4546df72e", size = 554359, upload-time = "2025-08-07T08:23:39.897Z" }, + { url = "https://files.pythonhosted.org/packages/a1/e4/95a014ab0d51ab6e3bebbdb476a42d992d2bbf9c489d24cff9fda998e925/rpds_py-0.27.0-cp311-cp311-win32.whl", hash = "sha256:5d6790ff400254137b81b8053b34417e2c46921e302d655181d55ea46df58cf7", size = 218084, upload-time = "2025-08-07T08:23:41.086Z" }, + { url = "https://files.pythonhosted.org/packages/49/78/f8d5b71ec65a0376b0de31efcbb5528ce17a9b7fdd19c3763303ccfdedec/rpds_py-0.27.0-cp311-cp311-win_amd64.whl", hash = "sha256:e24d8031a2c62f34853756d9208eeafa6b940a1efcbfe36e8f57d99d52bb7261", size = 230085, upload-time = "2025-08-07T08:23:42.143Z" }, + { url = "https://files.pythonhosted.org/packages/e7/d3/84429745184091e06b4cc70f8597408e314c2d2f7f5e13249af9ffab9e3d/rpds_py-0.27.0-cp311-cp311-win_arm64.whl", hash = "sha256:08680820d23df1df0a0260f714d12966bc6c42d02e8055a91d61e03f0c47dda0", size = 222112, upload-time = "2025-08-07T08:23:43.233Z" }, + { url = "https://files.pythonhosted.org/packages/cd/17/e67309ca1ac993fa1888a0d9b2f5ccc1f67196ace32e76c9f8e1dbbbd50c/rpds_py-0.27.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:19c990fdf5acecbf0623e906ae2e09ce1c58947197f9bced6bbd7482662231c4", size = 362611, upload-time = "2025-08-07T08:23:44.773Z" }, + { url = "https://files.pythonhosted.org/packages/93/2e/28c2fb84aa7aa5d75933d1862d0f7de6198ea22dfd9a0cca06e8a4e7509e/rpds_py-0.27.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6c27a7054b5224710fcfb1a626ec3ff4f28bcb89b899148c72873b18210e446b", size = 347680, upload-time = "2025-08-07T08:23:46.014Z" }, + { url = "https://files.pythonhosted.org/packages/44/3e/9834b4c8f4f5fe936b479e623832468aa4bd6beb8d014fecaee9eac6cdb1/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09965b314091829b378b60607022048953e25f0b396c2b70e7c4c81bcecf932e", size = 384600, upload-time = "2025-08-07T08:23:48Z" }, + { url = "https://files.pythonhosted.org/packages/19/78/744123c7b38865a965cd9e6f691fde7ef989a00a256fa8bf15b75240d12f/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:14f028eb47f59e9169bfdf9f7ceafd29dd64902141840633683d0bad5b04ff34", size = 400697, upload-time = "2025-08-07T08:23:49.407Z" }, + { url = "https://files.pythonhosted.org/packages/32/97/3c3d32fe7daee0a1f1a678b6d4dfb8c4dcf88197fa2441f9da7cb54a8466/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6168af0be75bba990a39f9431cdfae5f0ad501f4af32ae62e8856307200517b8", size = 517781, upload-time = "2025-08-07T08:23:50.557Z" }, + { url = "https://files.pythonhosted.org/packages/b2/be/28f0e3e733680aa13ecec1212fc0f585928a206292f14f89c0b8a684cad1/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ab47fe727c13c09d0e6f508e3a49e545008e23bf762a245b020391b621f5b726", size = 406449, upload-time = "2025-08-07T08:23:51.732Z" }, + { url = "https://files.pythonhosted.org/packages/95/ae/5d15c83e337c082d0367053baeb40bfba683f42459f6ebff63a2fd7e5518/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fa01b3d5e3b7d97efab65bd3d88f164e289ec323a8c033c5c38e53ee25c007e", size = 386150, upload-time = "2025-08-07T08:23:52.822Z" }, + { url = "https://files.pythonhosted.org/packages/bf/65/944e95f95d5931112829e040912b25a77b2e7ed913ea5fe5746aa5c1ce75/rpds_py-0.27.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:6c135708e987f46053e0a1246a206f53717f9fadfba27174a9769ad4befba5c3", size = 406100, upload-time = "2025-08-07T08:23:54.339Z" }, + { url = "https://files.pythonhosted.org/packages/21/a4/1664b83fae02894533cd11dc0b9f91d673797c2185b7be0f7496107ed6c5/rpds_py-0.27.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fc327f4497b7087d06204235199daf208fd01c82d80465dc5efa4ec9df1c5b4e", size = 421345, upload-time = "2025-08-07T08:23:55.832Z" }, + { url = "https://files.pythonhosted.org/packages/7c/26/b7303941c2b0823bfb34c71378249f8beedce57301f400acb04bb345d025/rpds_py-0.27.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7e57906e38583a2cba67046a09c2637e23297618dc1f3caddbc493f2be97c93f", size = 561891, upload-time = "2025-08-07T08:23:56.951Z" }, + { url = "https://files.pythonhosted.org/packages/9b/c8/48623d64d4a5a028fa99576c768a6159db49ab907230edddc0b8468b998b/rpds_py-0.27.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f4f69d7a4300fbf91efb1fb4916421bd57804c01ab938ab50ac9c4aa2212f03", size = 591756, upload-time = "2025-08-07T08:23:58.146Z" }, + { url = "https://files.pythonhosted.org/packages/b3/51/18f62617e8e61cc66334c9fb44b1ad7baae3438662098efbc55fb3fda453/rpds_py-0.27.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b4c4fbbcff474e1e5f38be1bf04511c03d492d42eec0babda5d03af3b5589374", size = 557088, upload-time = "2025-08-07T08:23:59.6Z" }, + { url = "https://files.pythonhosted.org/packages/bd/4c/e84c3a276e2496a93d245516be6b49e20499aa8ca1c94d59fada0d79addc/rpds_py-0.27.0-cp312-cp312-win32.whl", hash = "sha256:27bac29bbbf39601b2aab474daf99dbc8e7176ca3389237a23944b17f8913d97", size = 221926, upload-time = "2025-08-07T08:24:00.695Z" }, + { url = "https://files.pythonhosted.org/packages/83/89/9d0fbcef64340db0605eb0a0044f258076f3ae0a3b108983b2c614d96212/rpds_py-0.27.0-cp312-cp312-win_amd64.whl", hash = "sha256:8a06aa1197ec0281eb1d7daf6073e199eb832fe591ffa329b88bae28f25f5fe5", size = 233235, upload-time = "2025-08-07T08:24:01.846Z" }, + { url = "https://files.pythonhosted.org/packages/c9/b0/e177aa9f39cbab060f96de4a09df77d494f0279604dc2f509263e21b05f9/rpds_py-0.27.0-cp312-cp312-win_arm64.whl", hash = "sha256:e14aab02258cb776a108107bd15f5b5e4a1bbaa61ef33b36693dfab6f89d54f9", size = 223315, upload-time = "2025-08-07T08:24:03.337Z" }, + { url = "https://files.pythonhosted.org/packages/81/d2/dfdfd42565a923b9e5a29f93501664f5b984a802967d48d49200ad71be36/rpds_py-0.27.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:443d239d02d9ae55b74015234f2cd8eb09e59fbba30bf60baeb3123ad4c6d5ff", size = 362133, upload-time = "2025-08-07T08:24:04.508Z" }, + { url = "https://files.pythonhosted.org/packages/ac/4a/0a2e2460c4b66021d349ce9f6331df1d6c75d7eea90df9785d333a49df04/rpds_py-0.27.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b8a7acf04fda1f30f1007f3cc96d29d8cf0a53e626e4e1655fdf4eabc082d367", size = 347128, upload-time = "2025-08-07T08:24:05.695Z" }, + { url = "https://files.pythonhosted.org/packages/35/8d/7d1e4390dfe09d4213b3175a3f5a817514355cb3524593380733204f20b9/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d0f92b78cfc3b74a42239fdd8c1266f4715b573204c234d2f9fc3fc7a24f185", size = 384027, upload-time = "2025-08-07T08:24:06.841Z" }, + { url = "https://files.pythonhosted.org/packages/c1/65/78499d1a62172891c8cd45de737b2a4b84a414b6ad8315ab3ac4945a5b61/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ce4ed8e0c7dbc5b19352b9c2c6131dd23b95fa8698b5cdd076307a33626b72dc", size = 399973, upload-time = "2025-08-07T08:24:08.143Z" }, + { url = "https://files.pythonhosted.org/packages/10/a1/1c67c1d8cc889107b19570bb01f75cf49852068e95e6aee80d22915406fc/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fde355b02934cc6b07200cc3b27ab0c15870a757d1a72fd401aa92e2ea3c6bfe", size = 515295, upload-time = "2025-08-07T08:24:09.711Z" }, + { url = "https://files.pythonhosted.org/packages/df/27/700ec88e748436b6c7c4a2262d66e80f8c21ab585d5e98c45e02f13f21c0/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13bbc4846ae4c993f07c93feb21a24d8ec637573d567a924b1001e81c8ae80f9", size = 406737, upload-time = "2025-08-07T08:24:11.182Z" }, + { url = "https://files.pythonhosted.org/packages/33/cc/6b0ee8f0ba3f2df2daac1beda17fde5cf10897a7d466f252bd184ef20162/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be0744661afbc4099fef7f4e604e7f1ea1be1dd7284f357924af12a705cc7d5c", size = 385898, upload-time = "2025-08-07T08:24:12.798Z" }, + { url = "https://files.pythonhosted.org/packages/e8/7e/c927b37d7d33c0a0ebf249cc268dc2fcec52864c1b6309ecb960497f2285/rpds_py-0.27.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:069e0384a54f427bd65d7fda83b68a90606a3835901aaff42185fcd94f5a9295", size = 405785, upload-time = "2025-08-07T08:24:14.906Z" }, + { url = "https://files.pythonhosted.org/packages/5b/d2/8ed50746d909dcf402af3fa58b83d5a590ed43e07251d6b08fad1a535ba6/rpds_py-0.27.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4bc262ace5a1a7dc3e2eac2fa97b8257ae795389f688b5adf22c5db1e2431c43", size = 419760, upload-time = "2025-08-07T08:24:16.129Z" }, + { url = "https://files.pythonhosted.org/packages/d3/60/2b2071aee781cb3bd49f94d5d35686990b925e9b9f3e3d149235a6f5d5c1/rpds_py-0.27.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2fe6e18e5c8581f0361b35ae575043c7029d0a92cb3429e6e596c2cdde251432", size = 561201, upload-time = "2025-08-07T08:24:17.645Z" }, + { url = "https://files.pythonhosted.org/packages/98/1f/27b67304272521aaea02be293fecedce13fa351a4e41cdb9290576fc6d81/rpds_py-0.27.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d93ebdb82363d2e7bec64eecdc3632b59e84bd270d74fe5be1659f7787052f9b", size = 591021, upload-time = "2025-08-07T08:24:18.999Z" }, + { url = "https://files.pythonhosted.org/packages/db/9b/a2fadf823164dd085b1f894be6443b0762a54a7af6f36e98e8fcda69ee50/rpds_py-0.27.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0954e3a92e1d62e83a54ea7b3fdc9efa5d61acef8488a8a3d31fdafbfb00460d", size = 556368, upload-time = "2025-08-07T08:24:20.54Z" }, + { url = "https://files.pythonhosted.org/packages/24/f3/6d135d46a129cda2e3e6d4c5e91e2cc26ea0428c6cf152763f3f10b6dd05/rpds_py-0.27.0-cp313-cp313-win32.whl", hash = "sha256:2cff9bdd6c7b906cc562a505c04a57d92e82d37200027e8d362518df427f96cd", size = 221236, upload-time = "2025-08-07T08:24:22.144Z" }, + { url = "https://files.pythonhosted.org/packages/c5/44/65d7494f5448ecc755b545d78b188440f81da98b50ea0447ab5ebfdf9bd6/rpds_py-0.27.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc79d192fb76fc0c84f2c58672c17bbbc383fd26c3cdc29daae16ce3d927e8b2", size = 232634, upload-time = "2025-08-07T08:24:23.642Z" }, + { url = "https://files.pythonhosted.org/packages/70/d9/23852410fadab2abb611733933401de42a1964ce6600a3badae35fbd573e/rpds_py-0.27.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b3a5c8089eed498a3af23ce87a80805ff98f6ef8f7bdb70bd1b7dae5105f6ac", size = 222783, upload-time = "2025-08-07T08:24:25.098Z" }, + { url = "https://files.pythonhosted.org/packages/15/75/03447917f78512b34463f4ef11066516067099a0c466545655503bed0c77/rpds_py-0.27.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:90fb790138c1a89a2e58c9282fe1089638401f2f3b8dddd758499041bc6e0774", size = 359154, upload-time = "2025-08-07T08:24:26.249Z" }, + { url = "https://files.pythonhosted.org/packages/6b/fc/4dac4fa756451f2122ddaf136e2c6aeb758dc6fdbe9ccc4bc95c98451d50/rpds_py-0.27.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:010c4843a3b92b54373e3d2291a7447d6c3fc29f591772cc2ea0e9f5c1da434b", size = 343909, upload-time = "2025-08-07T08:24:27.405Z" }, + { url = "https://files.pythonhosted.org/packages/7b/81/723c1ed8e6f57ed9d8c0c07578747a2d3d554aaefc1ab89f4e42cfeefa07/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9ce7a9e967afc0a2af7caa0d15a3e9c1054815f73d6a8cb9225b61921b419bd", size = 379340, upload-time = "2025-08-07T08:24:28.714Z" }, + { url = "https://files.pythonhosted.org/packages/98/16/7e3740413de71818ce1997df82ba5f94bae9fff90c0a578c0e24658e6201/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:aa0bf113d15e8abdfee92aa4db86761b709a09954083afcb5bf0f952d6065fdb", size = 391655, upload-time = "2025-08-07T08:24:30.223Z" }, + { url = "https://files.pythonhosted.org/packages/e0/63/2a9f510e124d80660f60ecce07953f3f2d5f0b96192c1365443859b9c87f/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb91d252b35004a84670dfeafadb042528b19842a0080d8b53e5ec1128e8f433", size = 513017, upload-time = "2025-08-07T08:24:31.446Z" }, + { url = "https://files.pythonhosted.org/packages/2c/4e/cf6ff311d09776c53ea1b4f2e6700b9d43bb4e99551006817ade4bbd6f78/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:db8a6313dbac934193fc17fe7610f70cd8181c542a91382531bef5ed785e5615", size = 402058, upload-time = "2025-08-07T08:24:32.613Z" }, + { url = "https://files.pythonhosted.org/packages/88/11/5e36096d474cb10f2a2d68b22af60a3bc4164fd8db15078769a568d9d3ac/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce96ab0bdfcef1b8c371ada2100767ace6804ea35aacce0aef3aeb4f3f499ca8", size = 383474, upload-time = "2025-08-07T08:24:33.767Z" }, + { url = "https://files.pythonhosted.org/packages/db/a2/3dff02805b06058760b5eaa6d8cb8db3eb3e46c9e452453ad5fc5b5ad9fe/rpds_py-0.27.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:7451ede3560086abe1aa27dcdcf55cd15c96b56f543fb12e5826eee6f721f858", size = 400067, upload-time = "2025-08-07T08:24:35.021Z" }, + { url = "https://files.pythonhosted.org/packages/67/87/eed7369b0b265518e21ea836456a4ed4a6744c8c12422ce05bce760bb3cf/rpds_py-0.27.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:32196b5a99821476537b3f7732432d64d93a58d680a52c5e12a190ee0135d8b5", size = 412085, upload-time = "2025-08-07T08:24:36.267Z" }, + { url = "https://files.pythonhosted.org/packages/8b/48/f50b2ab2fbb422fbb389fe296e70b7a6b5ea31b263ada5c61377e710a924/rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a029be818059870664157194e46ce0e995082ac49926f1423c1f058534d2aaa9", size = 555928, upload-time = "2025-08-07T08:24:37.573Z" }, + { url = "https://files.pythonhosted.org/packages/98/41/b18eb51045d06887666c3560cd4bbb6819127b43d758f5adb82b5f56f7d1/rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3841f66c1ffdc6cebce8aed64e36db71466f1dc23c0d9a5592e2a782a3042c79", size = 585527, upload-time = "2025-08-07T08:24:39.391Z" }, + { url = "https://files.pythonhosted.org/packages/be/03/a3dd6470fc76499959b00ae56295b76b4bdf7c6ffc60d62006b1217567e1/rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:42894616da0fc0dcb2ec08a77896c3f56e9cb2f4b66acd76fc8992c3557ceb1c", size = 554211, upload-time = "2025-08-07T08:24:40.6Z" }, + { url = "https://files.pythonhosted.org/packages/bf/d1/ee5fd1be395a07423ac4ca0bcc05280bf95db2b155d03adefeb47d5ebf7e/rpds_py-0.27.0-cp313-cp313t-win32.whl", hash = "sha256:b1fef1f13c842a39a03409e30ca0bf87b39a1e2a305a9924deadb75a43105d23", size = 216624, upload-time = "2025-08-07T08:24:42.204Z" }, + { url = "https://files.pythonhosted.org/packages/1c/94/4814c4c858833bf46706f87349c37ca45e154da7dbbec9ff09f1abeb08cc/rpds_py-0.27.0-cp313-cp313t-win_amd64.whl", hash = "sha256:183f5e221ba3e283cd36fdfbe311d95cd87699a083330b4f792543987167eff1", size = 230007, upload-time = "2025-08-07T08:24:43.329Z" }, + { url = "https://files.pythonhosted.org/packages/59/64/72ab5b911fdcc48058359b0e786e5363e3fde885156116026f1a2ba9a5b5/rpds_py-0.27.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e6491658dd2569f05860bad645569145c8626ac231877b0fb2d5f9bcb7054089", size = 371658, upload-time = "2025-08-07T08:26:02.369Z" }, + { url = "https://files.pythonhosted.org/packages/6c/4b/90ff04b4da055db53d8fea57640d8d5d55456343a1ec9a866c0ecfe10fd1/rpds_py-0.27.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:bec77545d188f8bdd29d42bccb9191682a46fb2e655e3d1fb446d47c55ac3b8d", size = 355529, upload-time = "2025-08-07T08:26:03.83Z" }, + { url = "https://files.pythonhosted.org/packages/a4/be/527491fb1afcd86fc5ce5812eb37bc70428ee017d77fee20de18155c3937/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25a4aebf8ca02bbb90a9b3e7a463bbf3bee02ab1c446840ca07b1695a68ce424", size = 382822, upload-time = "2025-08-07T08:26:05.52Z" }, + { url = "https://files.pythonhosted.org/packages/e0/a5/dcdb8725ce11e6d0913e6fcf782a13f4b8a517e8acc70946031830b98441/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:44524b96481a4c9b8e6c46d6afe43fa1fb485c261e359fbe32b63ff60e3884d8", size = 397233, upload-time = "2025-08-07T08:26:07.179Z" }, + { url = "https://files.pythonhosted.org/packages/33/f9/0947920d1927e9f144660590cc38cadb0795d78fe0d9aae0ef71c1513b7c/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:45d04a73c54b6a5fd2bab91a4b5bc8b426949586e61340e212a8484919183859", size = 514892, upload-time = "2025-08-07T08:26:08.622Z" }, + { url = "https://files.pythonhosted.org/packages/1d/ed/d1343398c1417c68f8daa1afce56ef6ce5cc587daaf98e29347b00a80ff2/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:343cf24de9ed6c728abefc5d5c851d5de06497caa7ac37e5e65dd572921ed1b5", size = 402733, upload-time = "2025-08-07T08:26:10.433Z" }, + { url = "https://files.pythonhosted.org/packages/1d/0b/646f55442cd14014fb64d143428f25667a100f82092c90087b9ea7101c74/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aed8118ae20515974650d08eb724150dc2e20c2814bcc307089569995e88a14", size = 384447, upload-time = "2025-08-07T08:26:11.847Z" }, + { url = "https://files.pythonhosted.org/packages/4b/15/0596ef7529828e33a6c81ecf5013d1dd33a511a3e0be0561f83079cda227/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:af9d4fd79ee1cc8e7caf693ee02737daabfc0fcf2773ca0a4735b356c8ad6f7c", size = 402502, upload-time = "2025-08-07T08:26:13.537Z" }, + { url = "https://files.pythonhosted.org/packages/c3/8d/986af3c42f8454a6cafff8729d99fb178ae9b08a9816325ac7a8fa57c0c0/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f0396e894bd1e66c74ecbc08b4f6a03dc331140942c4b1d345dd131b68574a60", size = 416651, upload-time = "2025-08-07T08:26:14.923Z" }, + { url = "https://files.pythonhosted.org/packages/e9/9a/b4ec3629b7b447e896eec574469159b5b60b7781d3711c914748bf32de05/rpds_py-0.27.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:59714ab0a5af25d723d8e9816638faf7f4254234decb7d212715c1aa71eee7be", size = 559460, upload-time = "2025-08-07T08:26:16.295Z" }, + { url = "https://files.pythonhosted.org/packages/61/63/d1e127b40c3e4733b3a6f26ae7a063cdf2bc1caa5272c89075425c7d397a/rpds_py-0.27.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:88051c3b7d5325409f433c5a40328fcb0685fc04e5db49ff936e910901d10114", size = 588072, upload-time = "2025-08-07T08:26:17.776Z" }, + { url = "https://files.pythonhosted.org/packages/04/7e/8ffc71a8f6833d9c9fb999f5b0ee736b8b159fd66968e05c7afc2dbcd57e/rpds_py-0.27.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:181bc29e59e5e5e6e9d63b143ff4d5191224d355e246b5a48c88ce6b35c4e466", size = 555083, upload-time = "2025-08-07T08:26:19.301Z" }, ] [[package]] @@ -3048,62 +2982,16 @@ wheels = [ [[package]] name = "s3fs" -version = "2024.12.0" +version = "2025.7.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiobotocore" }, { name = "aiohttp" }, { name = "fsspec" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/96/88/e2fc4fc2a618126ac3cea9b16a4abc5a37dff2522067c9730b5d72d67ac3/s3fs-2024.12.0.tar.gz", hash = "sha256:1b0f3a8f5946cca5ba29871d6792ab1e4528ed762327d8aefafc81b73b99fd56", size = 76578, upload-time = "2024-12-19T20:05:42.779Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/af/eaec1466887348d7f6cc9d3a668b30b62a4629fb187d0268146118ba3d5e/s3fs-2024.12.0-py3-none-any.whl", hash = "sha256:d8665549f9d1de083151582437a2f10d5f3b3227c1f8e67a2b0b730db813e005", size = 30196, upload-time = "2024-12-19T20:05:40.095Z" }, -] - -[[package]] -name = "safety" -version = "3.6.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "authlib" }, - { name = "click" }, - { name = "dparse" }, - { name = "filelock" }, - { name = "httpx" }, - { name = "jinja2" }, - { name = "marshmallow" }, - { name = "nltk" }, - { name = "packaging" }, - { name = "psutil" }, - { name = "pydantic" }, - { name = "requests" }, - { name = "ruamel-yaml" }, - { name = "safety-schemas" }, - { name = "setuptools" }, - { name = "tenacity" }, - { name = "tomlkit" }, - { name = "typer" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/04/c0/832778e8f53e922f1939455cab4149339ca66c4f06e3c67599511d857279/safety-3.6.0.tar.gz", hash = "sha256:a820f827699f83d3d5c2faab24c0ac4d094911931503180847d97942c0a6f7e3", size = 291056, upload-time = "2025-07-09T11:49:52.384Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/45/12/7f7dfcd6975011e73573ac8acdd301b29f976d27e6149d8448051ad62189/safety-3.6.0-py3-none-any.whl", hash = "sha256:9cddbfd7a578b35e4d29df4fb6e3808425ed792a09d81400a5b7f4b4535ef666", size = 285260, upload-time = "2025-07-09T11:49:50.994Z" }, -] - -[[package]] -name = "safety-schemas" -version = "0.0.14" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "dparse" }, - { name = "packaging" }, - { name = "pydantic" }, - { name = "ruamel-yaml" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/04/40/e5107b3e456ca4b78d1c0d5bd07be3377e673cc54949b18e5f3aed345067/safety_schemas-0.0.14.tar.gz", hash = "sha256:49953f7a59e919572be25595a8946f9cbbcd2066fe3e160c9467d9d1d6d7af6a", size = 53216, upload-time = "2025-04-15T22:15:50.793Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bf/13/37438c4672ba1d23ec46df0e4b57e98469e5c5f4f98313cf6842b631652b/s3fs-2025.7.0.tar.gz", hash = "sha256:5e7f9ec0cad7745155e3eb86fae15b1481fa29946bf5b3a4ce3a60701ce6022d", size = 77795, upload-time = "2025-07-15T16:35:22.177Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/7f/f3d1ac7eb0a6546eda3e82c0487233cea0774e511239769945dbd1dd01de/safety_schemas-0.0.14-py3-none-any.whl", hash = "sha256:0bf6fc4aa5e474651b714cc9e427c862792946bf052b61d5c7bec4eac4c0f254", size = 39268, upload-time = "2025-04-15T22:15:49.317Z" }, + { url = "https://files.pythonhosted.org/packages/ff/c7/30d13b7fd4f866ca3f30e9a6e7ae038f0c45226f6e26b3cc98d6d197f93b/s3fs-2025.7.0-py3-none-any.whl", hash = "sha256:b6b2d3f84b6aa1c2ba5e62e39dd9410cf54f10a2cce1ea6db1ba0d1a6bcce685", size = 30315, upload-time = "2025-07-15T16:35:20.734Z" }, ] [[package]] @@ -3419,15 +3307,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/44/aa5c8b10b2cce7a053018e0d132bd58e27527a0243c4985383d5b6fd93e9/tblib-3.1.0-py3-none-any.whl", hash = "sha256:670bb4582578134b3d81a84afa1b016128b429f3d48e6cbbaecc9d15675e984e", size = 12552, upload-time = "2025-03-31T12:58:26.142Z" }, ] -[[package]] -name = "tenacity" -version = "9.1.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload-time = "2025-04-02T08:25:09.966Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" }, -] - [[package]] name = "tomli" version = "2.2.1" @@ -3467,15 +3346,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257, upload-time = "2024-11-27T22:38:35.385Z" }, ] -[[package]] -name = "tomlkit" -version = "0.13.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cc/18/0bbf3884e9eaa38819ebe46a7bd25dcd56b67434402b66a58c4b8e552575/tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1", size = 185207, upload-time = "2025-06-05T07:13:44.947Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bd/75/8539d011f6be8e29f339c42e633aae3cb73bffa95dd0f9adec09b9c58e85/tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0", size = 38901, upload-time = "2025-06-05T07:13:43.546Z" }, -] - [[package]] name = "toolz" version = "1.0.0" @@ -3804,6 +3674,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594, upload-time = "2025-01-14T10:35:44.018Z" }, ] +[[package]] +name = "xarray" +version = "2025.7.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e8/c5/a31ba8605005ef080c3d35efc696ddd851aee0a7a22420f9afebec386281/xarray-2025.7.1.tar.gz", hash = "sha256:2884bf5672b540fcc6ff8c20a3196bda0d78fbfb4d67398d60526e97c2faceef", size = 3013717, upload-time = "2025-07-10T04:53:07.01Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/ea/9554e5fb78eda4dbc9e9ccaf23034166fe3e9ea9af82ea6204b9578434bc/xarray-2025.7.1-py3-none-any.whl", hash = "sha256:e8647b659e53bd350d7c5a91c34dd4122ad6a3ca0bc41399d424a7c0273c7635", size = 1324464, upload-time = "2025-07-10T04:53:05.104Z" }, +] + [[package]] name = "xdoctest" version = "1.2.0"