-
Notifications
You must be signed in to change notification settings - Fork 51
Open
Labels
bugSomething isn't workingSomething isn't working
Milestone
Description
Hey! So diving into virtual datasets in icechunk, starting with GOES 19 .nc files. Creating the virtual datasets, concatenating them, and commiting to an Icechunk repo all is smooth sailing.
However, when I attempt to open back into xarray I find that a _FillValue triggers an assert statement. Running the same assert against all the values for each of the 128 variables in the NetCDF files, I am pretty sure it is _FillValue==-1 triggering the assert.
I have tried replacing the _FillValue encoding manually to:
str(-1):Error: Invalid base64-encoded string: number of data characters (1) cannot be 1 more than a multiple of 4- so then
base64encode where -1 ->//////////8=, but that would then throw an "invalid for base10" value error which seems odd to me for an int8 dtype where negative values should be allowed.
xr.open_zarr(session.store, zarr_format=3, mask_and_scale=Fals/None, decode_cf=False/None)---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
Cell In[231], line 1
----> 1 xr.open_zarr(session.store, zarr_format=3, mask_and_scale=False, decode_cf=False)
File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py:1505](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py#line=1504), in open_zarr(store, group, synchronizer, chunks, decode_cf, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, consolidated, overwrite_encoded_chunks, chunk_store, storage_options, decode_timedelta, use_cftime, zarr_version, zarr_format, use_zarr_fill_value_as_mask, chunked_array_type, from_array_kwargs, **kwargs)
1491 raise TypeError(
1492 "open_zarr() got unexpected keyword arguments " + ",".join(kwargs.keys())
1493 )
1495 backend_kwargs = {
1496 "synchronizer": synchronizer,
1497 "consolidated": consolidated,
(...)
1502 "zarr_format": zarr_format,
1503 }
-> 1505 ds = open_dataset(
1506 filename_or_obj=store,
1507 group=group,
1508 decode_cf=decode_cf,
1509 mask_and_scale=mask_and_scale,
1510 decode_times=decode_times,
1511 concat_characters=concat_characters,
1512 decode_coords=decode_coords,
1513 engine="zarr",
1514 chunks=chunks,
1515 drop_variables=drop_variables,
1516 chunked_array_type=chunked_array_type,
1517 from_array_kwargs=from_array_kwargs,
1518 backend_kwargs=backend_kwargs,
1519 decode_timedelta=decode_timedelta,
1520 use_cftime=use_cftime,
1521 zarr_version=zarr_version,
1522 use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask,
1523 )
1524 return ds
File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/api.py:687](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/api.py#line=686), in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
675 decoders = _resolve_decoders_kwargs(
676 decode_cf,
677 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
683 decode_coords=decode_coords,
684 )
686 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 687 backend_ds = backend.open_dataset(
688 filename_or_obj,
689 drop_variables=drop_variables,
690 **decoders,
691 **kwargs,
692 )
693 ds = _dataset_from_backend_dataset(
694 backend_ds,
695 filename_or_obj,
(...)
705 **kwargs,
706 )
707 return ds
File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py:1595](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py#line=1594), in ZarrBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, chunk_store, storage_options, zarr_version, zarr_format, store, engine, use_zarr_fill_value_as_mask, cache_members)
1593 store_entrypoint = StoreBackendEntrypoint()
1594 with close_on_error(store):
-> 1595 ds = store_entrypoint.open_dataset(
1596 store,
1597 mask_and_scale=mask_and_scale,
1598 decode_times=decode_times,
1599 concat_characters=concat_characters,
1600 decode_coords=decode_coords,
1601 drop_variables=drop_variables,
1602 use_cftime=use_cftime,
1603 decode_timedelta=decode_timedelta,
1604 )
1605 return ds
File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/store.py:44](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/store.py#line=43), in StoreBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta)
30 def open_dataset(
31 self,
32 filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
(...)
40 decode_timedelta=None,
41 ) -> Dataset:
42 assert isinstance(filename_or_obj, AbstractDataStore)
---> 44 vars, attrs = filename_or_obj.load()
45 encoding = filename_or_obj.get_encoding()
47 vars, attrs, coord_names = conventions.decode_cf_variables(
48 vars,
49 attrs,
(...)
56 decode_timedelta=decode_timedelta,
57 )
File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/common.py:312](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/common.py#line=311), in AbstractDataStore.load(self)
293 def load(self):
294 """
295 This loads the variables and attributes simultaneously.
296 A centralized loading function makes it easier to create
(...)
309 are requested, so care should be taken to make sure its fast.
310 """
311 variables = FrozenDict(
--> 312 (_decode_variable_name(k), v) for k, v in self.get_variables().items()
313 )
314 attributes = FrozenDict(self.get_attrs())
315 return variables, attributes
File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py:826](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py#line=825), in ZarrStore.get_variables(self)
825 def get_variables(self):
--> 826 return FrozenDict((k, self.open_store_variable(k)) for k in self.array_keys())
File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/core/utils.py:468](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/core/utils.py#line=467), in FrozenDict(*args, **kwargs)
467 def FrozenDict(*args, **kwargs) -> Frozen:
--> 468 return Frozen(dict(*args, **kwargs))
File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py:826](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py#line=825), in <genexpr>(.0)
825 def get_variables(self):
--> 826 return FrozenDict((k, self.open_store_variable(k)) for k in self.array_keys())
File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py:819](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py#line=818), in ZarrStore.open_store_variable(self, name)
817 attributes["_FillValue"] = zarr_array.fill_value
818 elif "_FillValue" in attributes:
--> 819 attributes["_FillValue"] = FillValueCoder.decode(
820 attributes["_FillValue"], zarr_array.dtype
821 )
823 return Variable(dimensions, data, attributes, encoding)
File [~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py:153](http://localhost:8888/lab/tree/notebooks/~/GitHub/geo-features-ingestion/.pixi/envs/notebook/lib/python3.11/site-packages/xarray/backends/zarr.py#line=152), in FillValueCoder.decode(cls, value, dtype)
151 np_dtype = np.dtype(dtype)
152 if np_dtype.kind in "f":
--> 153 assert isinstance(value, str | bytes)
154 return struct.unpack("<d", base64.standard_b64decode(value))[0]
155 elif np_dtype.kind in "b":
AssertionError:
Any insight? Using the latest stable version, is this something an upgrade fixes?
sharkinsspatial
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working