-
-
Notifications
You must be signed in to change notification settings - Fork 365
[WIP] Refactor arrays in v3 #1589
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from 5 commits
Commits
Show all changes
32 commits
Select commit
Hold shift + click to select a range
55b47b1
Pull Zarrita into Zarr-Python @ 78274781ad64aef95772eb4b083f7ea9b7d03d06
jhamman a31a4e5
apply zarr lint rules
jhamman 15f667c
zarrita -> v3
jhamman c3f8764
v3/abc [wip]
jhamman 0864187
use abcs plus implementation notes
jhamman 664c9ca
refactor arrays
d-v-b 78d0bc0
working on making codecs extensible
normanrz 8b86afa
merge
normanrz 4fac528
adds index_location
normanrz cb6c9a8
start putting chunk-specific operations in chunks.py
d-v-b fcf79b6
adds support for codec entry points
normanrz 4ca61fb
move v2 chunk io into chunk.py
d-v-b 9f97282
adds tests from zarrita
normanrz f8bab5b
fixes types
normanrz c7cebb0
v2 array IO using v3 routines
d-v-b 6c3e40a
Apply suggestions from code review
normanrz d30bdbf
rename CoreArrayMetadata to ChunkMetadata; remove v3 suffix from io r…
d-v-b 9749f25
remove test codec from pyproject.toml
normanrz 41cee85
pull in codec tests from @normanrz's branch; try to unify chunk encod…
d-v-b aef3a4c
Update zarr/v3/array/base.py
d-v-b 93aa706
ChunkMetadata.shape -> ChunkMetadata.array_shape
d-v-b 2324003
remove breakpoint
d-v-b 0a89edf
rename test, fix failures relating to incorrect chunkkeyencoding for …
d-v-b 3e84800
remove conditional statement in json serialization that was breaking …
d-v-b 8d28e8d
remove runtime_configuration from chunk_metadata
d-v-b 79c3ce2
remove runtime_configuration from chunk_metadata in array classes
d-v-b b2a1515
define codec_pipeline once at top level of v2.array
d-v-b a7ce7a1
chore: remove fill_value, chunk_key_encoding, and chunk_coords argume…
d-v-b 27740b0
add v3/types.py, bring v2 into closer alignment to v3 api
d-v-b b4a653b
remove v3x.py
d-v-b 348ddb1
remove v3x tests
d-v-b 4fd50b5
start removing attrs, and using typeddict + to_dict methods instead. …
d-v-b File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import zarr.v3.array.v3 as v3 | ||
import zarr.v3.array.v2 as v2 | ||
import pytest | ||
from typing import Any, Dict, Literal, Tuple, Union | ||
import numpy as np | ||
|
||
from zarr.v3.common import ChunkCoords | ||
|
||
# todo: parametrize by chunks | ||
@pytest.mark.parametrize("zarr_version", ("v2", "v3")) | ||
@pytest.mark.parametrize( | ||
"shape", | ||
( | ||
(10,), | ||
( | ||
10, | ||
11, | ||
), | ||
( | ||
10, | ||
11, | ||
12, | ||
), | ||
), | ||
) | ||
@pytest.mark.parametrize( | ||
"dtype", (np.dtype("uint8"), "uint8", np.dtype("float32"), "float32", "int64") | ||
) | ||
@pytest.mark.parametrize("attributes", ({}, dict(a=10, b=10))) | ||
@pytest.mark.parametrize("fill_value", (0, 1, 2)) | ||
def test_array( | ||
tmpdir, | ||
zarr_version: Literal["v2", "v3"], | ||
shape: Tuple[int, ...], | ||
dtype: Union[str, np.dtype], | ||
attributes: Dict[str, Any], | ||
fill_value: float, | ||
): | ||
store_path = str(tmpdir) | ||
arr: Union[v2.ZArray, v3.ZArray] | ||
if zarr_version == "v2": | ||
arr = v2.ZArray.create( | ||
store=store_path, | ||
shape=shape, | ||
dtype=dtype, | ||
chunks=shape, | ||
dimension_separator="/", | ||
fill_value=fill_value, | ||
attributes=attributes, | ||
exists_ok=True, | ||
) | ||
else: | ||
arr = v3.ZArray.create( | ||
store=store_path, | ||
shape=shape, | ||
dtype=dtype, | ||
chunk_shape=shape, | ||
fill_value=fill_value, | ||
attributes=attributes, | ||
exists_ok=True, | ||
) | ||
fill_array = np.zeros(shape, dtype=dtype) + fill_value | ||
assert np.array_equal(arr[:], fill_array) | ||
|
||
data = np.arange(np.prod(shape)).reshape(shape).astype(dtype) | ||
|
||
# note: if we try to create a prefix called "0/0/0" but an object named "0" already | ||
# exists in the store, then we will get an unhandled exception | ||
arr[:] = data | ||
assert np.array_equal(arr[:], data) | ||
|
||
# partial write | ||
arr[slice(0, 1)] = data[slice(0, 1)] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
from __future__ import annotations | ||
|
||
import json | ||
from asyncio import AbstractEventLoop | ||
from enum import Enum | ||
from typing import Any, Dict, List, Literal, Optional, Tuple, Union | ||
|
||
import numpy as np | ||
from attr import frozen | ||
|
||
""" from zarr.v3.array import v3 | ||
from zarr.v3.array import v2 | ||
""" | ||
from zarr.v3.common import BytesLike, ChunkCoords, SliceSelection, to_thread | ||
from zarr.v3.store import StorePath | ||
import numcodecs | ||
from numcodecs.compat import ensure_bytes, ensure_ndarray | ||
|
||
|
||
@frozen | ||
class RuntimeConfiguration: | ||
order: Literal["C", "F"] = "C" | ||
concurrency: Optional[int] = None | ||
asyncio_loop: Optional[AbstractEventLoop] = None | ||
|
||
|
||
def runtime_configuration( | ||
order: Literal["C", "F"], concurrency: Optional[int] = None | ||
) -> RuntimeConfiguration: | ||
return RuntimeConfiguration(order=order, concurrency=concurrency) | ||
|
||
|
||
""" class DataType(Enum): | ||
bool = "bool" | ||
int8 = "int8" | ||
int16 = "int16" | ||
int32 = "int32" | ||
int64 = "int64" | ||
uint8 = "uint8" | ||
uint16 = "uint16" | ||
uint32 = "uint32" | ||
uint64 = "uint64" | ||
float32 = "float32" | ||
float64 = "float64" | ||
|
||
@property | ||
def byte_count(self) -> int: | ||
data_type_byte_counts = { | ||
DataType.bool: 1, | ||
DataType.int8: 1, | ||
DataType.int16: 2, | ||
DataType.int32: 4, | ||
DataType.int64: 8, | ||
DataType.uint8: 1, | ||
DataType.uint16: 2, | ||
DataType.uint32: 4, | ||
DataType.uint64: 8, | ||
DataType.float32: 4, | ||
DataType.float64: 8, | ||
} | ||
return data_type_byte_counts[self] | ||
|
||
def to_numpy_shortname(self) -> str: | ||
data_type_to_numpy = { | ||
DataType.bool: "bool", | ||
DataType.int8: "i1", | ||
DataType.int16: "i2", | ||
DataType.int32: "i4", | ||
DataType.int64: "i8", | ||
DataType.uint8: "u1", | ||
DataType.uint16: "u2", | ||
DataType.uint32: "u4", | ||
DataType.uint64: "u8", | ||
DataType.float32: "f4", | ||
DataType.float64: "f8", | ||
} | ||
return data_type_to_numpy[self] """ | ||
|
||
|
||
def byte_count(dtype: np.dtype) -> int: | ||
return dtype.itemsize | ||
|
||
|
||
def to_numpy_shortname(dtype: np.dtype) -> str: | ||
return dtype.str.lstrip("|").lstrip("^").lstrip("<").lstrip(">") | ||
|
||
|
||
dtype_to_data_type = { | ||
"|b1": "bool", | ||
"bool": "bool", | ||
"|i1": "int8", | ||
"<i2": "int16", | ||
"<i4": "int32", | ||
"<i8": "int64", | ||
"|u1": "uint8", | ||
"<u2": "uint16", | ||
"<u4": "uint32", | ||
"<u8": "uint64", | ||
"<f4": "float32", | ||
"<f8": "float64", | ||
} | ||
|
||
|
||
@frozen | ||
class ChunkMetadata: | ||
shape: ChunkCoords | ||
chunk_shape: ChunkCoords | ||
# data_type: DataType | ||
dtype: np.dtype | ||
fill_value: Any | ||
runtime_configuration: RuntimeConfiguration | ||
|
||
@property | ||
def ndim(self) -> int: | ||
return len(self.shape) | ||
|
||
|
||
""" @frozen | ||
class ZArray: | ||
shape: ChunkCoords | ||
chunk_shape: ChunkCoords | ||
dtype: DataType | ||
T: "ZArray" | ||
size: int | ||
ndim: int | ||
attrs: Dict[str, Any] | ||
order: Literal["C", "F"] | ||
metadata: Union[v2.ZArrayMetadata, v3.ZArrayMetadata] | ||
chunk_store: StorePath | ||
metadata_store: StorePath | ||
|
||
@property | ||
def ndim(self) -> int: | ||
return len(self.metadata.shape) | ||
|
||
@property | ||
def dtype(self) -> np.dtype: | ||
return np.dtype(self.metadata.dtype) | ||
|
||
@property | ||
def size(self) -> int | ||
return np.prod(self.metadata.shape) | ||
|
||
@property | ||
def T(self) -> 'ZArray': | ||
... | ||
|
||
def __getitem__(*args): | ||
return _chunk_getitem_sync(*args): | ||
|
||
def __setitem__(*args): | ||
return _chunk_setitem_sync(*args) | ||
""" |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.