Skip to content

Issue opening some virtual icechunk datasets due to _FillValue #628

@xaviernogueira

Description

@xaviernogueira

Hey! So diving into virtual datasets in icechunk, starting with GOES 19 .nc files. Creating the virtual datasets, concatenating them, and commiting to an Icechunk repo all is smooth sailing.

However, when I attempt to open back into xarray I find that a _FillValue triggers an assert statement. Running the same assert against all the values for each of the 128 variables in the NetCDF files, I am pretty sure it is _FillValue==-1 triggering the assert.

I have tried replacing the _FillValue encoding manually to:

  • str(-1): Error: Invalid base64-encoded string: number of data characters (1) cannot be 1 more than a multiple of 4
  • so then base64 encode where -1 -> //////////8=, but that would then throw an "invalid for base10" value error which seems odd to me for an int8 dtype where negative values should be allowed.
xr.open_zarr(session.store, zarr_format=3, mask_and_scale=Fals/None, decode_cf=False/None)
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
Cell In[231], line 1
----> 1 xr.open_zarr(session.store, zarr_format=3, mask_and_scale=False, decode_cf=False)

File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py:1505](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py#line=1504), in open_zarr(store, group, synchronizer, chunks, decode_cf, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, consolidated, overwrite_encoded_chunks, chunk_store, storage_options, decode_timedelta, use_cftime, zarr_version, zarr_format, use_zarr_fill_value_as_mask, chunked_array_type, from_array_kwargs, **kwargs)
   1491     raise TypeError(
   1492         "open_zarr() got unexpected keyword arguments " + ",".join(kwargs.keys())
   1493     )
   1495 backend_kwargs = {
   1496     "synchronizer": synchronizer,
   1497     "consolidated": consolidated,
   (...)
   1502     "zarr_format": zarr_format,
   1503 }
-> 1505 ds = open_dataset(
   1506     filename_or_obj=store,
   1507     group=group,
   1508     decode_cf=decode_cf,
   1509     mask_and_scale=mask_and_scale,
   1510     decode_times=decode_times,
   1511     concat_characters=concat_characters,
   1512     decode_coords=decode_coords,
   1513     engine="zarr",
   1514     chunks=chunks,
   1515     drop_variables=drop_variables,
   1516     chunked_array_type=chunked_array_type,
   1517     from_array_kwargs=from_array_kwargs,
   1518     backend_kwargs=backend_kwargs,
   1519     decode_timedelta=decode_timedelta,
   1520     use_cftime=use_cftime,
   1521     zarr_version=zarr_version,
   1522     use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask,
   1523 )
   1524 return ds

File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/api.py:687](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/api.py#line=686), in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    675 decoders = _resolve_decoders_kwargs(
    676     decode_cf,
    677     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    683     decode_coords=decode_coords,
    684 )
    686 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 687 backend_ds = backend.open_dataset(
    688     filename_or_obj,
    689     drop_variables=drop_variables,
    690     **decoders,
    691     **kwargs,
    692 )
    693 ds = _dataset_from_backend_dataset(
    694     backend_ds,
    695     filename_or_obj,
   (...)
    705     **kwargs,
    706 )
    707 return ds

File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py:1595](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py#line=1594), in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, zarr_version, zarr_format, store, engine, use_zarr_fill_value_as_mask, cache_members)
   1593 store_entrypoint = StoreBackendEntrypoint()
   1594 with close_on_error(store):
-> 1595     ds = store_entrypoint.open_dataset(
   1596         store,
   1597         mask_and_scale=mask_and_scale,
   1598         decode_times=decode_times,
   1599         concat_characters=concat_characters,
   1600         decode_coords=decode_coords,
   1601         drop_variables=drop_variables,
   1602         use_cftime=use_cftime,
   1603         decode_timedelta=decode_timedelta,
   1604     )
   1605 return ds

File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/store.py:44](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/store.py#line=43), in StoreBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta)
     30 def open_dataset(
     31     self,
     32     filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
   (...)
     40     decode_timedelta=None,
     41 ) -> Dataset:
     42     assert isinstance(filename_or_obj, AbstractDataStore)
---> 44     vars, attrs = filename_or_obj.load()
     45     encoding = filename_or_obj.get_encoding()
     47     vars, attrs, coord_names = conventions.decode_cf_variables(
     48         vars,
     49         attrs,
   (...)
     56         decode_timedelta=decode_timedelta,
     57     )

File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/common.py:312](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/common.py#line=311), in AbstractDataStore.load(self)
    293 def load(self):
    294     """
    295     This loads the variables and attributes simultaneously.
    296     A centralized loading function makes it easier to create
   (...)
    309     are requested, so care should be taken to make sure its fast.
    310     """
    311     variables = FrozenDict(
--> 312         (_decode_variable_name(k), v) for k, v in self.get_variables().items()
    313     )
    314     attributes = FrozenDict(self.get_attrs())
    315     return variables, attributes

File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py:826](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py#line=825), in ZarrStore.get_variables(self)
    825 def get_variables(self):
--> 826     return FrozenDict((k, self.open_store_variable(k)) for k in self.array_keys())

File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/core/utils.py:468](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/core/utils.py#line=467), in FrozenDict(*args, **kwargs)
    467 def FrozenDict(*args, **kwargs) -> Frozen:
--> 468     return Frozen(dict(*args, **kwargs))

File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py:826](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py#line=825), in <genexpr>(.0)
    825 def get_variables(self):
--> 826     return FrozenDict((k, self.open_store_variable(k)) for k in self.array_keys())

File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py:819](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py#line=818), in ZarrStore.open_store_variable(self, name)
    817         attributes["_FillValue"] = zarr_array.fill_value
    818 elif "_FillValue" in attributes:
--> 819     attributes["_FillValue"] = FillValueCoder.decode(
    820         attributes["_FillValue"], zarr_array.dtype
    821     )
    823 return Variable(dimensions, data, attributes, encoding)

File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py:153](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py#line=152), in FillValueCoder.decode(cls, value, dtype)
    151 np_dtype = np.dtype(dtype)
    152 if np_dtype.kind in "f":
--> 153     assert isinstance(value, str | bytes)
    154     return struct.unpack("<d", base64.standard_b64decode(value))[0]
    155 elif np_dtype.kind in "b":

AssertionError:

Any insight? Using the latest stable version, is this something an upgrade fixes?

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions