Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@

### Enhancements

* A new configuration setting `append_step` can be used to validate
the step sizes between the labels of a coordinate variable associated with
the append dimension. Its value can be a number for numerical labels
or a time delta value of the form `8h` (8 hours) or `2D` (two days) for
date/time labels. The value can also be negative. [#21]

* The configuration setting `append_step` can take the special values
`"+"` and `"-"` which are used to verify that the labels are monotonically
increasing and decreasing, respectively. [#20]

* It is now possible to reference environment variables
in configuration files using the syntax `${ENV_VAR}`. [#36]

Expand Down
24 changes: 21 additions & 3 deletions docs/guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,27 @@ specify its name:
}
```

All other non-variadic dimensions can and should be specified using the
`fixed_dims` setting which is a mapping from dimension name to the
fixed dimension size, e.g.:
The configuration setting `append_step` can be used to validate the step sizes
between the labels of a coordinate variable associated with the append dimension.
Its value can be a number for numerical labels or a timedelta value of the form
`<count><unit>` for date/time labels. In the latter case `<count>` is an integer
and `<units>` is one of the possible
[numpy datetime units](https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-units),
for example, `8h` (8 hours) or `2D` (two days). Numerical and timedelta values
may be negative. `append_step` can also take the two special values `"+"` and
`"-"`. In this case it is just verified that the append labels are monotonically
increasing and decreasing, respectively.

```json
{
"append_dim": "time",
"append_step": "2D"
}
```

Other, non-variadic dimensions besides the append dimension can and should
be specified using the `fixed_dims` setting which is a mapping from dimension
name to the fixed dimension size, e.g.:

```json
{
Expand Down
137 changes: 137 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import shutil
import unittest

import numpy as np
import pytest
import xarray as xr

from zappend.api import FileObj
Expand All @@ -14,6 +16,7 @@
from .helpers import make_test_dataset


# noinspection PyMethodMayBeStatic
class ApiTest(unittest.TestCase):
def setUp(self):
clear_memory_fs()
Expand Down Expand Up @@ -73,6 +76,140 @@ def process_slice(ctx, slice_ds: xr.Dataset) -> SliceSource:
self.assertEqual({"chl"}, set(ds.data_vars))
self.assertEqual({"time", "y", "x"}, set(ds.coords))

def test_some_slices_with_inc_append_step(self):
target_dir = "memory://target.zarr"
slices = [
make_test_dataset(index=0, shape=(1, 50, 100)),
make_test_dataset(index=1, shape=(1, 50, 100)),
make_test_dataset(index=2, shape=(1, 50, 100)),
]
zappend(slices, target_dir=target_dir, append_step="1D")
ds = xr.open_zarr(target_dir)
np.testing.assert_array_equal(
ds.time.values,
np.array(["2024-01-01", "2024-01-02", "2024-01-03"], dtype=np.datetime64),
)

def test_some_slices_with_dec_append_step(self):
target_dir = "memory://target.zarr"
slices = [
make_test_dataset(index=2, shape=(1, 50, 100)),
make_test_dataset(index=1, shape=(1, 50, 100)),
make_test_dataset(index=0, shape=(1, 50, 100)),
]
zappend(slices, target_dir=target_dir, append_step="-1D")
ds = xr.open_zarr(target_dir)
np.testing.assert_array_equal(
ds.time.values,
np.array(["2024-01-03", "2024-01-02", "2024-01-01"], dtype=np.datetime64),
)

# # See https://github.com/bcdev/zappend/issues/21
#
# def test_some_slices_with_one_missing_append_step(self):
# target_dir = "memory://target.zarr"
# slices = [
# make_test_dataset(index=0, shape=(1, 50, 100)),
# make_test_dataset(index=2, shape=(1, 50, 100)),
# ]
# zappend(slices, target_dir=target_dir, append_step="1D")
# ds = xr.open_zarr(target_dir)
# np.testing.assert_array_equal(
# ds.time.values,
# np.array(
# ["2024-01-01", "2024-01-02", "2024-01-03"], dtype="datetime64[ns]"
# ),
# )

# # See https://github.com/bcdev/zappend/issues/21
#
# def test_some_slices_with_three_missing_append_steps(self):
# target_dir = "memory://target.zarr"
# slices = [
# make_test_dataset(index=0, shape=(1, 50, 100)),
# make_test_dataset(index=4, shape=(1, 50, 100)),
# ]
# zappend(slices, target_dir=target_dir, append_step="1D")
# ds = xr.open_zarr(target_dir)
# np.testing.assert_array_equal(
# ds.time.values,
# np.array(
# [
# "2024-01-01",
# "2024-01-02",
# "2024-01-03",
# "2024-01-04",
# "2024-01-05",
# ],
# dtype="datetime64[ns]",
# ),
# )

def test_it_raises_for_wrong_append_step(self):
target_dir = "memory://target.zarr"
slices = [
make_test_dataset(index=0, shape=(1, 50, 100)),
make_test_dataset(index=1, shape=(1, 50, 100)),
]
with pytest.raises(
ValueError,
match=(
"Cannot append slice because this would result in"
" an invalid step size."
),
):
zappend(slices, target_dir=target_dir, append_step="2D")

def test_some_slices_with_inc_append_labels(self):
append_step = "+"

target_dir = "memory://target.zarr"
slices = [
make_test_dataset(index=0, shape=(1, 50, 100)),
make_test_dataset(index=1, shape=(1, 50, 100)),
make_test_dataset(index=2, shape=(1, 50, 100)),
]
# OK!
zappend(slices, target_dir=target_dir, append_step=append_step)

target_dir = "memory://target.zarr"
slices = [
make_test_dataset(index=1, shape=(1, 50, 100)),
make_test_dataset(index=0, shape=(1, 50, 100)),
]
with pytest.raises(
ValueError,
match=(
"Cannot append slice because labels must be monotonically increasing"
),
):
zappend(slices, target_dir=target_dir, append_step=append_step)

def test_some_slices_with_dec_append_labels(self):
append_step = "-"

target_dir = "memory://target.zarr"
slices = [
make_test_dataset(index=2, shape=(1, 50, 100)),
make_test_dataset(index=1, shape=(1, 50, 100)),
make_test_dataset(index=0, shape=(1, 50, 100)),
]
# OK!
zappend(slices, target_dir=target_dir, append_step=append_step)

target_dir = "memory://target.zarr"
slices = [
make_test_dataset(index=0, shape=(1, 50, 100)),
make_test_dataset(index=1, shape=(1, 50, 100)),
]
with pytest.raises(
ValueError,
match=(
"Cannot append slice because labels must be monotonically decreasing"
),
):
zappend(slices, target_dir=target_dir, append_step=append_step)

def test_some_slices_with_profiling(self):
target_dir = "memory://target.zarr"
slices = [
Expand Down
1 change: 1 addition & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def test_get_config_schema(self):
self.assertEqual(
{
"append_dim",
"append_step",
"disable_rollback",
"dry_run",
"excluded_variables",
Expand Down
23 changes: 18 additions & 5 deletions tests/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import unittest

import pytest
import numpy as np
import xarray as xr
from zappend.api import zappend

from zappend.context import Context
from zappend.fsutil.fileobj import FileObj
from zappend.metadata import DatasetMetadata
Expand Down Expand Up @@ -37,20 +38,32 @@ def test_with_existing_target(self):
def test_append_dim(self):
ctx = Context({"target_dir": "memory://target.zarr"})
self.assertEqual("time", ctx.append_dim_name)

ctx = Context({"target_dir": "memory://target.zarr", "append_dim": "depth"})
self.assertEqual("depth", ctx.append_dim_name)

def test_append_step(self):
make_test_dataset(uri="memory://target.zarr")
ctx = Context({"target_dir": "memory://target.zarr"})
self.assertEqual(None, ctx.append_step_size)
ctx = Context({"target_dir": "memory://target.zarr", "append_step": "1D"})
self.assertEqual("1D", ctx.append_step_size)

def test_last_append_label(self):
make_test_dataset(uri="memory://target.zarr")
ctx = Context({"target_dir": "memory://target.zarr"})
self.assertEqual(None, ctx.last_append_label)
ctx = Context({"target_dir": "memory://TARGET.zarr", "append_step": "1D"})
self.assertEqual(None, ctx.last_append_label)
ctx = Context({"target_dir": "memory://target.zarr", "append_step": "1D"})
self.assertEqual(np.datetime64("2024-01-03"), ctx.last_append_label)

def test_slice_polling(self):
ctx = Context({"target_dir": "memory://target.zarr"})
self.assertEqual((None, None), ctx.slice_polling)

ctx = Context({"target_dir": "memory://target.zarr", "slice_polling": False})
self.assertEqual((None, None), ctx.slice_polling)

ctx = Context({"target_dir": "memory://target.zarr", "slice_polling": True})
self.assertEqual((2, 60), ctx.slice_polling)

ctx = Context(
{
"target_dir": "memory://target.zarr",
Expand Down
88 changes: 87 additions & 1 deletion tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@
# https://opensource.org/licenses/MIT.

import unittest

import numpy as np
import pytest
import xarray as xr

from zappend.fsutil.fileobj import FileObj
from zappend.context import Context
from zappend.processor import Processor
from zappend.processor import to_timedelta
from zappend.processor import verify_append_labels
from .helpers import clear_memory_fs
from .helpers import make_test_dataset


class TestProcessor(unittest.TestCase):
class ProcessorTest(unittest.TestCase):
def setUp(self):
clear_memory_fs()

Expand Down Expand Up @@ -142,3 +148,83 @@ def test_process_two_slices_with_chunk_overlap(self):
self.assertEqual(None, ds.time.chunks)
self.assertEqual(((3, 1), (5, 5), (10, 10)), ds.chl.chunks)
self.assertEqual(((3, 1), (5, 5), (10, 10)), ds.tsm.chunks)


# noinspection PyMethodMayBeStatic
class AppendLabelValidationTest(unittest.TestCase):
def setUp(self):
clear_memory_fs()

def test_verify_append_labels_succeeds(self):
ctx = Context({"target_dir": "memory://target.zarr", "append_step": "1D"})

# Ok, because we have no delta
slice_ds = make_test_dataset(shape=(1, 50, 100))
verify_append_labels(ctx, slice_ds)

# Ok, because we have 4 deltas that are 1D
slice_ds = make_test_dataset(shape=(5, 50, 100))
verify_append_labels(ctx, slice_ds)

# Ok, because after removing "time" coordinate variable,
# xarray will use numerical labels
ctx = Context({"target_dir": "memory://target.zarr", "append_step": 1})
slice_ds = make_test_dataset(shape=(3, 50, 100)).drop_vars(["time"])
verify_append_labels(ctx, slice_ds)

# Ok, because "foo" has no labels
ctx = Context(
{
"target_dir": "memory://target.zarr",
"append_dim": "foo",
"append_step": "1D",
}
)
slice_ds = make_test_dataset(shape=(3, 50, 100))
verify_append_labels(ctx, slice_ds)

def test_verify_append_labels_fails(self):
ctx = Context({"target_dir": "memory://target.zarr", "append_step": "2D"})
slice_ds = make_test_dataset(shape=(3, 50, 100))
with pytest.raises(
ValueError,
match="Cannot append slice because this would result in an invalid step size.",
):
verify_append_labels(ctx, slice_ds)

ctx = Context({"target_dir": "memory://target.zarr", "append_step": "-"})
slice_ds = make_test_dataset(shape=(3, 50, 100))
with pytest.raises(
ValueError,
match="Cannot append slice because labels must be monotonically decreasing.",
):
verify_append_labels(ctx, slice_ds)

ctx = Context({"target_dir": "memory://target.zarr", "append_step": "+"})
slice_ds = make_test_dataset(shape=(3, 50, 100))
time = slice_ds["time"]
slice_ds["time"] = xr.DataArray(
list(reversed(time.values)), dims=time.dims, attrs=time.attrs
)
with pytest.raises(
ValueError,
match="Cannot append slice because labels must be monotonically increasing.",
):
verify_append_labels(ctx, slice_ds)


class ToTimedeltaTest(unittest.TestCase):
def test_it(self):
self.assertEqual(np.timedelta64(1, "s"), to_timedelta("s"))
self.assertEqual(np.timedelta64(1, "m"), to_timedelta("m"))
self.assertEqual(np.timedelta64(1, "h"), to_timedelta("h"))
self.assertEqual(np.timedelta64(1, "h"), to_timedelta("1h"))
self.assertEqual(np.timedelta64(24, "h"), to_timedelta("24h"))
self.assertEqual(np.timedelta64(1, "D"), to_timedelta("24h"))
self.assertEqual(np.timedelta64(1, "D"), to_timedelta("D"))
self.assertEqual(np.timedelta64(1, "D"), to_timedelta("1D"))
self.assertEqual(np.timedelta64(7, "D"), to_timedelta("7D"))
self.assertEqual(np.timedelta64(1, "W"), to_timedelta("7D"))
self.assertEqual(np.timedelta64(12, "D"), to_timedelta("12D"))
self.assertEqual(np.timedelta64(60 * 60 * 24, "s"), to_timedelta(60 * 60 * 24))
self.assertEqual(np.timedelta64(1, "D"), to_timedelta(60 * 60 * 24))
3 changes: 2 additions & 1 deletion zappend/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
# https://opensource.org/licenses/MIT.

from .config import ConfigItem
from .config import ConfigList
from .config import ConfigLike
from .config import ConfigList
from .config import exclude_from_config
from .config import merge_configs
from .config import normalize_config
from .config import validate_config
from .defaults import DEFAULT_APPEND_DIM
from .defaults import DEFAULT_APPEND_STEP
from .defaults import DEFAULT_SLICE_POLLING_INTERVAL
from .defaults import DEFAULT_SLICE_POLLING_TIMEOUT
from .defaults import DEFAULT_ZARR_VERSION
Expand Down
1 change: 1 addition & 0 deletions zappend/config/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

DEFAULT_ZARR_VERSION = 2
DEFAULT_APPEND_DIM = "time"
DEFAULT_APPEND_STEP = None

DEFAULT_SLICE_POLLING_INTERVAL = 2
DEFAULT_SLICE_POLLING_TIMEOUT = 60
Loading