Skip to content

Commit 5152624

Browse files
authored
Add reader_kwargs argument to open_virtual_dataset (#315)
* add reader_kwargs argument to open_virtual_dataset, and pass it down to every reader * rename reader_kwargs -> virtual_backend_kwargs * release note
1 parent 3d7a4be commit 5152624

File tree

11 files changed

+57
-0
lines changed

11 files changed

+57
-0
lines changed

docs/releases.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ v1.1.1 (unreleased)
99
New Features
1010
~~~~~~~~~~~~
1111

12+
- Add a ``virtual_backend_kwargs`` keyword argument to file readers and to ``open_virtual_dataset``, to allow reader-specific options to be passed down.
13+
(:pull:`315`) By `Tom Nicholas <https://github.com/TomNicholas>`_.
14+
1215
Breaking changes
1316
~~~~~~~~~~~~~~~~
1417

virtualizarr/backend.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ def open_virtual_dataset(
113113
cftime_variables: Iterable[str] | None = None,
114114
indexes: Mapping[str, Index] | None = None,
115115
virtual_array_class=ManifestArray,
116+
virtual_backend_kwargs: Optional[dict] = None,
116117
reader_options: Optional[dict] = None,
117118
backend: Optional[VirtualBackend] = None,
118119
) -> Dataset:
@@ -147,6 +148,8 @@ def open_virtual_dataset(
147148
virtual_array_class
148149
Virtual array class to use to represent the references to the chunks in each on-disk array.
149150
Currently can only be ManifestArray, but once VirtualZarrArray is implemented the default should be changed to that.
151+
virtual_backend_kwargs: dict, default is None
152+
Dictionary of keyword arguments passed down to this reader. Allows passing arguments specific to certain readers.
150153
reader_options: dict, default {}
151154
Dict passed into Kerchunk file readers, to allow reading from remote filesystems.
152155
Note: Each Kerchunk file reader has distinct arguments, so ensure reader_options match selected Kerchunk reader arguments.
@@ -201,6 +204,7 @@ def open_virtual_dataset(
201204
loadable_variables=loadable_variables,
202205
decode_times=decode_times,
203206
indexes=indexes,
207+
virtual_backend_kwargs=virtual_backend_kwargs,
204208
reader_options=reader_options,
205209
)
206210

virtualizarr/readers/common.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ def open_virtual_dataset(
168168
loadable_variables: Iterable[str] | None = None,
169169
decode_times: bool | None = None,
170170
indexes: Mapping[str, Index] | None = None,
171+
virtual_backend_kwargs: Optional[dict] = None,
171172
reader_options: Optional[dict] = None,
172173
) -> Dataset:
173174
raise NotImplementedError()
@@ -180,6 +181,7 @@ def open_virtual_datatree(
180181
loadable_variables: Iterable[str] | None = None,
181182
decode_times: bool | None = None,
182183
indexes: Mapping[str, Index] | None = None,
184+
virtual_backend_kwargs: Optional[dict] = None,
183185
reader_options: Optional[dict] = None,
184186
) -> DataTree:
185187
raise NotImplementedError()

virtualizarr/readers/dmrpp.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,19 @@ def open_virtual_dataset(
2323
loadable_variables: Iterable[str] | None = None,
2424
decode_times: bool | None = None,
2525
indexes: Mapping[str, Index] | None = None,
26+
virtual_backend_kwargs: Optional[dict] = None,
2627
reader_options: Optional[dict] = None,
2728
) -> Dataset:
2829
loadable_variables, drop_variables = check_for_collisions(
2930
drop_variables=drop_variables,
3031
loadable_variables=loadable_variables,
3132
)
3233

34+
if virtual_backend_kwargs:
35+
raise NotImplementedError(
36+
"DMR++ reader does not understand any virtual_backend_kwargs"
37+
)
38+
3339
if loadable_variables != [] or decode_times or indexes is None:
3440
raise NotImplementedError(
3541
"Specifying `loadable_variables` or auto-creating indexes with `indexes=None` is not supported for dmrpp files."

virtualizarr/readers/fits.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,16 @@ def open_virtual_dataset(
2323
loadable_variables: Iterable[str] | None = None,
2424
decode_times: bool | None = None,
2525
indexes: Mapping[str, Index] | None = None,
26+
virtual_backend_kwargs: Optional[dict] = None,
2627
reader_options: Optional[dict] = None,
2728
) -> Dataset:
2829
from kerchunk.fits import process_file
2930

31+
if virtual_backend_kwargs:
32+
raise NotImplementedError(
33+
"FITS reader does not understand any virtual_backend_kwargs"
34+
)
35+
3036
# handle inconsistency in kerchunk, see GH issue https://github.com/zarr-developers/VirtualiZarr/issues/160
3137
refs = KerchunkStoreRefs({"refs": process_file(filepath, **reader_options)})
3238

virtualizarr/readers/hdf/hdf.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,14 @@ def open_virtual_dataset(
3838
loadable_variables: Iterable[str] | None = None,
3939
decode_times: bool | None = None,
4040
indexes: Mapping[str, Index] | None = None,
41+
virtual_backend_kwargs: Optional[dict] = None,
4142
reader_options: Optional[dict] = None,
4243
) -> xr.Dataset:
44+
if virtual_backend_kwargs:
45+
raise NotImplementedError(
46+
"HDF reader does not understand any virtual_backend_kwargs"
47+
)
48+
4349
drop_variables, loadable_variables = check_for_collisions(
4450
drop_variables,
4551
loadable_variables,

virtualizarr/readers/hdf5.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,16 @@ def open_virtual_dataset(
2323
loadable_variables: Iterable[str] | None = None,
2424
decode_times: bool | None = None,
2525
indexes: Mapping[str, Index] | None = None,
26+
virtual_backend_kwargs: Optional[dict] = None,
2627
reader_options: Optional[dict] = None,
2728
) -> Dataset:
2829
from kerchunk.hdf import SingleHdf5ToZarr
2930

31+
if virtual_backend_kwargs:
32+
raise NotImplementedError(
33+
"HDF5 reader does not understand any virtual_backend_kwargs"
34+
)
35+
3036
drop_variables, loadable_variables = check_for_collisions(
3137
drop_variables,
3238
loadable_variables,

virtualizarr/readers/kerchunk.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,16 @@ def open_virtual_dataset(
2020
loadable_variables: Iterable[str] | None = None,
2121
decode_times: bool | None = None,
2222
indexes: Mapping[str, Index] | None = None,
23+
virtual_backend_kwargs: Optional[dict] = None,
2324
reader_options: Optional[dict] = None,
2425
) -> Dataset:
2526
"""Reads existing kerchunk references (in JSON or parquet) format."""
2627

28+
if virtual_backend_kwargs:
29+
raise NotImplementedError(
30+
"Kerchunk reader does not understand any virtual_backend_kwargs"
31+
)
32+
2733
if group:
2834
raise NotImplementedError()
2935

virtualizarr/readers/netcdf3.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,16 @@ def open_virtual_dataset(
2323
loadable_variables: Iterable[str] | None = None,
2424
decode_times: bool | None = None,
2525
indexes: Mapping[str, Index] | None = None,
26+
virtual_backend_kwargs: Optional[dict] = None,
2627
reader_options: Optional[dict] = None,
2728
) -> Dataset:
2829
from kerchunk.netCDF3 import NetCDF3ToZarr
2930

31+
if virtual_backend_kwargs:
32+
raise NotImplementedError(
33+
"netcdf3 reader does not understand any virtual_backend_kwargs"
34+
)
35+
3036
drop_variables, loadable_variables = check_for_collisions(
3137
drop_variables,
3238
loadable_variables,

virtualizarr/readers/tiff.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,14 @@ def open_virtual_dataset(
2525
loadable_variables: Iterable[str] | None = None,
2626
decode_times: bool | None = None,
2727
indexes: Mapping[str, Index] | None = None,
28+
virtual_backend_kwargs: Optional[dict] = None,
2829
reader_options: Optional[dict] = None,
2930
) -> Dataset:
31+
if virtual_backend_kwargs:
32+
raise NotImplementedError(
33+
"TIFF reader does not understand any virtual_backend_kwargs"
34+
)
35+
3036
from kerchunk.tiff import tiff_to_zarr
3137

3238
drop_variables, loadable_variables = check_for_collisions(

0 commit comments

Comments
 (0)