Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ Attributes
Dataset.indexes
Dataset.get_index
Dataset.chunks
Dataset.chunksizes
Dataset.nbytes

Dictionary interface
Expand Down Expand Up @@ -271,6 +272,7 @@ Attributes
DataArray.encoding
DataArray.indexes
DataArray.get_index
DataArray.chunksizes

**ndarray attributes**:
:py:attr:`~DataArray.ndim`
Expand Down
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ New Features
`Nathan Lis <https://github.com/wxman22>`_.
- Histogram plots are set with a title displaying the scalar coords if any, similarly to the other plots (:issue:`5791`, :pull:`5792`).
By `Maxime Liquet <https://github.com/maximlt>`_.
- Added a new :py:meth:`Dataset.chunksizes`, :py:meth:`DataArray.chunksizes`, and :py:meth:`Variable.chunksizes`
property, which will always return a mapping from dimension names to chunking pattern along that dimension, guaranteed
to be consistent between `Dataset`, `DataArray`, and `Variable` objects. (:issue:`5846`, :pull:`5900`)
By `Tom Nicholas <https://github.com/TomNicholas>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
17 changes: 17 additions & 0 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1813,6 +1813,23 @@ def ones_like(other, dtype: DTypeLike = None):
return full_like(other, 1, dtype)


def get_chunksizes(
variables: Iterable[Variable],
) -> Mapping[Any, Tuple[int, ...]]:

chunks: Dict[Any, Tuple[int, ...]] = {}
for v in variables:
if hasattr(v.data, "chunks"):
for dim, c in v.chunksizes.items():
if dim in chunks and c != chunks[dim]:
raise ValueError(
f"Object has inconsistent chunks along dimension {dim}. "
"This can be fixed by calling unify_chunks()."
)
chunks[dim] = c
return Frozen(chunks)


def is_np_datetime_like(dtype: DTypeLike) -> bool:
"""Check if a dtype is a subclass of the numpy datetime types"""
return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)
Expand Down
32 changes: 29 additions & 3 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
reindex_like_indexers,
)
from .arithmetic import DataArrayArithmetic
from .common import AbstractArray, DataWithCoords
from .common import AbstractArray, DataWithCoords, get_chunksizes
from .computation import unify_chunks
from .coordinates import (
DataArrayCoordinates,
Expand Down Expand Up @@ -1058,11 +1058,37 @@ def __deepcopy__(self, memo=None) -> "DataArray":

@property
def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
"""Block dimensions for this array's data or None if it's not a dask
array.
"""
Tuple of block lengths for this dataarray's data, in order of dimensions, or None if
the underlying data is not a dask array.

See Also
--------
DataArray.chunk
DataArray.chunksizes
xarray.unify_chunks
"""
return self.variable.chunks

@property
def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
"""
Mapping from dimension names to block lengths for this dataarray's data, or None if
the underlying data is not a dask array.
Cannot be modified directly, but can be modified by calling .chunk().

Differs from DataArray.chunks because it returns a mapping of dimensions to chunk shapes
instead of a tuple of chunk shapes.

See Also
--------
DataArray.chunk
DataArray.chunks
xarray.unify_chunks
"""
all_variables = [self.variable] + [c.variable for c in self.coords.values()]
return get_chunksizes(all_variables)

def chunk(
self,
chunks: Union[
Expand Down
51 changes: 37 additions & 14 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
)
from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align
from .arithmetic import DatasetArithmetic
from .common import DataWithCoords, _contains_datetime_like_objects
from .common import DataWithCoords, _contains_datetime_like_objects, get_chunksizes
from .computation import unify_chunks
from .coordinates import (
DatasetCoordinates,
Expand Down Expand Up @@ -2090,20 +2090,37 @@ def info(self, buf=None) -> None:

@property
def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
"""Block dimensions for this dataset's data or None if it's not a dask
array.
"""
chunks: Dict[Hashable, Tuple[int, ...]] = {}
for v in self.variables.values():
if v.chunks is not None:
for dim, c in zip(v.dims, v.chunks):
if dim in chunks and c != chunks[dim]:
raise ValueError(
f"Object has inconsistent chunks along dimension {dim}. "
"This can be fixed by calling unify_chunks()."
)
chunks[dim] = c
return Frozen(chunks)
Mapping from dimension names to block lengths for this dataset's data, or None if
the underlying data is not a dask array.
Cannot be modified directly, but can be modified by calling .chunk().

Same as Dataset.chunksizes, but maintained for backwards compatibility.

See Also
--------
Dataset.chunk
Dataset.chunksizes
xarray.unify_chunks
"""
return get_chunksizes(self.variables.values())

@property
def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
"""
Mapping from dimension names to block lengths for this dataset's data, or None if
the underlying data is not a dask array.
Cannot be modified directly, but can be modified by calling .chunk().

Same as Dataset.chunks.

See Also
--------
Dataset.chunk
Dataset.chunks
xarray.unify_chunks
"""
return get_chunksizes(self.variables.values())

def chunk(
self,
Expand Down Expand Up @@ -2142,6 +2159,12 @@ def chunk(
Returns
-------
chunked : xarray.Dataset

See Also
--------
Dataset.chunks
Dataset.chunksizes
xarray.unify_chunks
"""
if chunks is None:
warnings.warn(
Expand Down
37 changes: 33 additions & 4 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
sparse_array_type,
)
from .utils import (
Frozen,
NdimSizeLenMixin,
OrderedSet,
_default,
Expand Down Expand Up @@ -996,16 +997,44 @@ def __deepcopy__(self, memo=None):
__hash__ = None # type: ignore[assignment]

@property
def chunks(self):
"""Block dimensions for this array's data or None if it's not a dask
array.
def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
"""
Tuple of block lengths for this dataarray's data, in order of dimensions, or None if
the underlying data is not a dask array.

See Also
--------
Variable.chunk
Variable.chunksizes
xarray.unify_chunks
"""
return getattr(self._data, "chunks", None)

@property
def chunksizes(self) -> Mapping[Any, Tuple[int, ...]]:
"""
Mapping from dimension names to block lengths for this variable's data, or None if
the underlying data is not a dask array.
Cannot be modified directly, but can be modified by calling .chunk().

Differs from variable.chunks because it returns a mapping of dimensions to chunk shapes
instead of a tuple of chunk shapes.

See Also
--------
Variable.chunk
Variable.chunks
xarray.unify_chunks
"""
if hasattr(self._data, "chunks"):
return Frozen({dim: c for dim, c in zip(self.dims, self.data.chunks)})
else:
return {}

_array_counter = itertools.count()

def chunk(self, chunks={}, name=None, lock=False):
"""Coerce this array's data into a dask arrays with the given chunks.
"""Coerce this array's data into a dask array with the given chunks.

If this variable is a non-dask array, it will be converted to dask
array. If it's a dask array, it will be rechunked to the given chunk
Expand Down