Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
55b47b1
Pull Zarrita into Zarr-Python @ 78274781ad64aef95772eb4b083f7ea9b7d03d06
jhamman Nov 3, 2023
a31a4e5
apply zarr lint rules
jhamman Nov 3, 2023
15f667c
zarrita -> v3
jhamman Nov 3, 2023
c3f8764
v3/abc [wip]
jhamman Nov 3, 2023
0864187
use abcs plus implementation notes
jhamman Nov 3, 2023
664c9ca
refactor arrays
d-v-b Dec 5, 2023
78d0bc0
working on making codecs extensible
normanrz Dec 5, 2023
8b86afa
merge
normanrz Dec 5, 2023
4fac528
adds index_location
normanrz Dec 5, 2023
cb6c9a8
start putting chunk-specific operations in chunks.py
d-v-b Dec 5, 2023
fcf79b6
adds support for codec entry points
normanrz Dec 5, 2023
4ca61fb
move v2 chunk io into chunk.py
d-v-b Dec 5, 2023
9f97282
adds tests from zarrita
normanrz Dec 5, 2023
f8bab5b
fixes types
normanrz Dec 5, 2023
c7cebb0
v2 array IO using v3 routines
d-v-b Dec 5, 2023
6c3e40a
Apply suggestions from code review
normanrz Dec 6, 2023
d30bdbf
rename CoreArrayMetadata to ChunkMetadata; remove v3 suffix from io r…
d-v-b Dec 6, 2023
9749f25
remove test codec from pyproject.toml
normanrz Dec 6, 2023
41cee85
pull in codec tests from @normanrz's branch; try to unify chunk encod…
d-v-b Dec 6, 2023
aef3a4c
Update zarr/v3/array/base.py
d-v-b Dec 6, 2023
93aa706
ChunkMetadata.shape -> ChunkMetadata.array_shape
d-v-b Dec 6, 2023
2324003
remove breakpoint
d-v-b Dec 6, 2023
0a89edf
rename test, fix failures relating to incorrect chunkkeyencoding for …
d-v-b Dec 7, 2023
3e84800
remove conditional statement in json serialization that was breaking …
d-v-b Dec 8, 2023
8d28e8d
remove runtime_configuration from chunk_metadata
d-v-b Dec 8, 2023
79c3ce2
remove runtime_configuration from chunk_metadata in array classes
d-v-b Dec 9, 2023
b2a1515
define codec_pipeline once at top level of v2.array
d-v-b Dec 9, 2023
a7ce7a1
chore: remove fill_value, chunk_key_encoding, and chunk_coords argume…
d-v-b Dec 15, 2023
27740b0
add v3/types.py, bring v2 into closer alignment to v3 api
d-v-b Jan 7, 2024
b4a653b
remove v3x.py
d-v-b Jan 7, 2024
348ddb1
remove v3x tests
d-v-b Jan 7, 2024
4fd50b5
start removing attrs, and using typeddict + to_dict methods instead. …
d-v-b Jan 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions zarr/tests/test_v3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import zarr.v3.array.v3 as v3
import zarr.v3.array.v2 as v2
import pytest
from typing import Any, Dict, Literal, Tuple, Union
import numpy as np

from zarr.v3.common import ChunkCoords

# todo: parametrize by chunks
@pytest.mark.parametrize("zarr_version", ("v2", "v3"))
@pytest.mark.parametrize(
"shape",
(
(10,),
(
10,
11,
),
(
10,
11,
12,
),
),
)
@pytest.mark.parametrize(
"dtype", (np.dtype("uint8"), "uint8", np.dtype("float32"), "float32", "int64")
)
@pytest.mark.parametrize("attributes", ({}, dict(a=10, b=10)))
@pytest.mark.parametrize("fill_value", (0, 1, 2))
def test_array(
tmpdir,
zarr_version: Literal["v2", "v3"],
shape: Tuple[int, ...],
dtype: Union[str, np.dtype],
attributes: Dict[str, Any],
fill_value: float,
):
store_path = str(tmpdir)
arr: Union[v2.ZArray, v3.ZArray]
if zarr_version == "v2":
arr = v2.ZArray.create(
store=store_path,
shape=shape,
dtype=dtype,
chunks=shape,
dimension_separator="/",
fill_value=fill_value,
attributes=attributes,
exists_ok=True,
)
else:
arr = v3.ZArray.create(
store=store_path,
shape=shape,
dtype=dtype,
chunk_shape=shape,
fill_value=fill_value,
attributes=attributes,
exists_ok=True,
)
fill_array = np.zeros(shape, dtype=dtype) + fill_value
assert np.array_equal(arr[:], fill_array)

data = np.arange(np.prod(shape)).reshape(shape).astype(dtype)

# note: if we try to create a prefix called "0/0/0" but an object named "0" already
# exists in the store, then we will get an unhandled exception
arr[:] = data
assert np.array_equal(arr[:], data)

# partial write
arr[slice(0, 1)] = data[slice(0, 1)]
12 changes: 6 additions & 6 deletions zarr/v3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

from typing import Union

import zarr.v3.codecs # noqa: F401
from zarr.v3.array import Array # noqa: F401
from zarr.v3.array_v2 import ArrayV2 # noqa: F401
import zarr.v3.array.codecs # noqa: F401
from zarr.v3.array.v3 import ZArray as ZArrayV3 # noqa: F401
from zarr.v3.array.v2 import ZArray as ZArrayV2 # noqa: F401
from zarr.v3.group import Group # noqa: F401
from zarr.v3.group_v2 import GroupV2 # noqa: F401
from zarr.v3.metadata import RuntimeConfiguration, runtime_configuration # noqa: F401
from zarr.v3.array.base import RuntimeConfiguration, runtime_configuration # noqa: F401
from zarr.v3.store import ( # noqa: F401
LocalStore,
RemoteStore,
Expand All @@ -22,7 +22,7 @@
async def open_auto_async(
store: StoreLike,
runtime_configuration_: RuntimeConfiguration = RuntimeConfiguration(),
) -> Union[Array, ArrayV2, Group, GroupV2]:
) -> Union[ZArrayV2, ZArrayV3, Group, GroupV2]:
store_path = make_store_path(store)
try:
return await Group.open_or_array(store_path, runtime_configuration=runtime_configuration_)
Expand All @@ -33,7 +33,7 @@ async def open_auto_async(
def open_auto(
store: StoreLike,
runtime_configuration_: RuntimeConfiguration = RuntimeConfiguration(),
) -> Union[Array, ArrayV2, Group, GroupV2]:
) -> Union[ZArrayV2, ZArrayV3, Group, GroupV2]:
return _sync(
open_auto_async(store, runtime_configuration_),
runtime_configuration_.asyncio_loop,
Expand Down
40 changes: 33 additions & 7 deletions zarr/v3/abc/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,35 @@
from __future__ import annotations

from abc import abstractmethod, ABC
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING, Optional, Type

import numpy as np

from zarr.v3.common import BytesLike
from zarr.v3.common import BytesLike, SliceSelection
from zarr.v3.store import StorePath


if TYPE_CHECKING:
from zarr.v3.metadata import CoreArrayMetadata
from zarr.v3.array.base import ChunkMetadata, CodecMetadata


class Codec(ABC):
supports_partial_decode: bool
supports_partial_encode: bool
is_fixed_size: bool
array_metadata: CoreArrayMetadata
array_metadata: ChunkMetadata

@abstractmethod
def compute_encoded_size(self, input_byte_length: int) -> int:
pass

def resolve_metadata(self) -> CoreArrayMetadata:
def resolve_metadata(self) -> ChunkMetadata:
return self.array_metadata

@classmethod
def from_metadata(
cls, codec_metadata: "CodecMetadata", array_metadata: ChunkMetadata
) -> "Type[Codec]":
pass


class ArrayArrayCodec(Codec):
@abstractmethod
Expand Down Expand Up @@ -68,6 +73,27 @@ async def encode(
pass


class ArrayBytesCodecPartialDecodeMixin:
@abstractmethod
async def decode_partial(
self,
store_path: StorePath,
selection: SliceSelection,
) -> Optional[np.ndarray]:
pass


class ArrayBytesCodecPartialEncodeMixin:
@abstractmethod
async def encode_partial(
self,
store_path: StorePath,
chunk_array: np.ndarray,
selection: SliceSelection,
) -> None:
pass


class BytesBytesCodec(Codec):
@abstractmethod
async def decode(
Expand Down
Empty file added zarr/v3/array/__init__.py
Empty file.
153 changes: 153 additions & 0 deletions zarr/v3/array/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
from __future__ import annotations

import json
from asyncio import AbstractEventLoop
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Tuple, Union

import numpy as np
from attr import frozen

""" from zarr.v3.array import v3
from zarr.v3.array import v2
"""
from zarr.v3.common import BytesLike, ChunkCoords, SliceSelection, to_thread
from zarr.v3.store import StorePath
import numcodecs
from numcodecs.compat import ensure_bytes, ensure_ndarray


@frozen
class RuntimeConfiguration:
order: Literal["C", "F"] = "C"
concurrency: Optional[int] = None
asyncio_loop: Optional[AbstractEventLoop] = None


def runtime_configuration(
order: Literal["C", "F"], concurrency: Optional[int] = None
) -> RuntimeConfiguration:
return RuntimeConfiguration(order=order, concurrency=concurrency)


""" class DataType(Enum):
bool = "bool"
int8 = "int8"
int16 = "int16"
int32 = "int32"
int64 = "int64"
uint8 = "uint8"
uint16 = "uint16"
uint32 = "uint32"
uint64 = "uint64"
float32 = "float32"
float64 = "float64"

@property
def byte_count(self) -> int:
data_type_byte_counts = {
DataType.bool: 1,
DataType.int8: 1,
DataType.int16: 2,
DataType.int32: 4,
DataType.int64: 8,
DataType.uint8: 1,
DataType.uint16: 2,
DataType.uint32: 4,
DataType.uint64: 8,
DataType.float32: 4,
DataType.float64: 8,
}
return data_type_byte_counts[self]

def to_numpy_shortname(self) -> str:
data_type_to_numpy = {
DataType.bool: "bool",
DataType.int8: "i1",
DataType.int16: "i2",
DataType.int32: "i4",
DataType.int64: "i8",
DataType.uint8: "u1",
DataType.uint16: "u2",
DataType.uint32: "u4",
DataType.uint64: "u8",
DataType.float32: "f4",
DataType.float64: "f8",
}
return data_type_to_numpy[self] """


def byte_count(dtype: np.dtype) -> int:
return dtype.itemsize


def to_numpy_shortname(dtype: np.dtype) -> str:
return dtype.str.lstrip("|").lstrip("^").lstrip("<").lstrip(">")


dtype_to_data_type = {
"|b1": "bool",
"bool": "bool",
"|i1": "int8",
"<i2": "int16",
"<i4": "int32",
"<i8": "int64",
"|u1": "uint8",
"<u2": "uint16",
"<u4": "uint32",
"<u8": "uint64",
"<f4": "float32",
"<f8": "float64",
}


@frozen
class ChunkMetadata:
shape: ChunkCoords
chunk_shape: ChunkCoords
# data_type: DataType
dtype: np.dtype
fill_value: Any
runtime_configuration: RuntimeConfiguration

@property
def ndim(self) -> int:
return len(self.shape)


""" @frozen
class ZArray:
shape: ChunkCoords
chunk_shape: ChunkCoords
dtype: DataType
T: "ZArray"
size: int
ndim: int
attrs: Dict[str, Any]
order: Literal["C", "F"]
metadata: Union[v2.ZArrayMetadata, v3.ZArrayMetadata]
chunk_store: StorePath
metadata_store: StorePath

@property
def ndim(self) -> int:
return len(self.metadata.shape)

@property
def dtype(self) -> np.dtype:
return np.dtype(self.metadata.dtype)

@property
def size(self) -> int
return np.prod(self.metadata.shape)

@property
def T(self) -> 'ZArray':
...

def __getitem__(*args):
return _chunk_getitem_sync(*args):

def __setitem__(*args):
return _chunk_setitem_sync(*args)
"""
Loading