Skip to content

Commit 2543fd3

Browse files
committed
Merge branch 'issue1293-load-stac-start-end-datetime'
2 parents 51ca476 + d4cf9a5 commit 2543fd3

File tree

5 files changed

+348
-19
lines changed

5 files changed

+348
-19
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ without compromising stable operations.
1818
- Configurable usage of `async_task` ([eu-cdse/openeo-cdse-infra#387](https://github.com/eu-cdse/openeo-cdse-infra/issues/387))
1919
- Add job option "omit-derived-from-links" to omit "derived_from" links in batch job results metadata ([ESA-WEED-project/eo_processing#175](https://github.com/ESA-WEED-project/eo_processing/issues/175))
2020
- Better freeIPA configurability for proxy user lookup ([#1261](https://github.com/Open-EO/openeo-geopyspark-driver/issues/1261), eu-cdse/openeo-cdse-infra#626)
21+
- `load_stac`: temporal filtering of STAC Items: prefer more descriptive `start_datetime`+`end_datetime` over nominal `datetime` if possible ([#1293](https://github.com/Open-EO/openeo-geopyspark-driver/issues/1293))
2122

2223

2324
## 0.66.0

openeogeotrellis/load_stac.py

Lines changed: 101 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import re
22

33
import datetime as dt
4+
import datetime
45
import json
56
import time
67
from functools import partial
@@ -48,6 +49,7 @@
4849
from openeogeotrellis.constants import EVAL_ENV_KEY
4950
from openeogeotrellis.geopysparkcubemetadata import GeopysparkCubeMetadata
5051
from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube
52+
from openeogeotrellis.util.datetime import to_datetime_utc_unless_none
5153
from openeogeotrellis.utils import normalize_temporal_extent, get_jvm, to_projected_polygons, map_optional, unzip
5254
from openeogeotrellis.integrations.stac import ResilientStacIO
5355

@@ -92,22 +94,7 @@ def load_stac(
9294
to_date = (dt.datetime.combine(until_date, dt.time.max, until_date.tzinfo) if from_date == until_date
9395
else until_date - dt.timedelta(milliseconds=1))
9496

95-
def intersects_spatiotemporally(itm: pystac.Item) -> bool:
96-
def intersects_temporally() -> bool:
97-
nominal_date = itm.datetime or dateutil.parser.parse(itm.properties["start_datetime"])
98-
return from_date <= nominal_date <= to_date
99-
100-
def intersects_spatially() -> bool:
101-
if not requested_bbox or itm.bbox is None:
102-
return True
103-
104-
requested_bbox_lonlat = requested_bbox.reproject("EPSG:4326")
105-
return requested_bbox_lonlat.as_polygon().intersects(
106-
Polygon.from_bounds(*itm.bbox)
107-
)
108-
109-
return intersects_temporally() and intersects_spatially()
110-
97+
spatiotemporal_extent = _SpatioTemporalExtent(bbox=requested_bbox, from_date=from_date, to_date=to_date)
11198

11299
def get_pixel_value_offset(itm: pystac.Item, asst: pystac.Asset) -> float:
113100
raster_scale = asst.extra_fields.get("raster:scale", itm.properties.get("raster:scale", 1.0))
@@ -200,7 +187,7 @@ def operator_value(criterion: Dict[str, object]) -> (str, object):
200187
"proj:shape": asset.get("proj:shape"),
201188
}))
202189

203-
if intersects_spatiotemporally(pystac_item) and "data" in asset.get("roles", []):
190+
if spatiotemporal_extent.item_intersects(pystac_item) and "data" in asset.get("roles", []):
204191
pystac_asset = pystac.Asset(
205192
href=asset["href"],
206193
extra_fields={
@@ -229,7 +216,7 @@ def operator_value(criterion: Dict[str, object]) -> (str, object):
229216

230217
item = stac_object
231218
band_names = stac_metadata_parser.bands_from_stac_item(item=item).band_names()
232-
intersecting_items = [item] if intersects_spatiotemporally(item) else []
219+
intersecting_items = [item] if spatiotemporal_extent.item_intersects(item) else []
233220
elif isinstance(stac_object, pystac.Collection) and _supports_item_search(stac_object):
234221
collection = stac_object
235222
netcdf_with_time_dimension = contains_netcdf_with_time_dimension(collection)
@@ -364,7 +351,7 @@ def intersects_temporally(interval) -> bool:
364351
itm
365352
for intersecting_catalog in intersecting_catalogs(root=catalog)
366353
for itm in intersecting_catalog.get_items()
367-
if intersects_spatiotemporally(itm)
354+
if spatiotemporal_extent.item_intersects(itm)
368355
)
369356

370357
jvm = get_jvm()
@@ -723,6 +710,101 @@ def intersects_temporally(interval) -> bool:
723710
return GeopysparkDataCube(pyramid=gps.Pyramid(levels), metadata=metadata)
724711

725712

713+
class _TemporalExtent:
714+
"""
715+
Helper to represent a load_collection/load_stac-style temporal extent
716+
with a from_date (inclusive) and to_date (exclusive)
717+
and calculate intersection with STAC entities
718+
based on nominal datetime or start_datetime+end_datetime
719+
720+
refs:
721+
- https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md#datetime
722+
- https://github.com/radiantearth/stac-spec/blob/master/commons/common-metadata.md#date-and-time-range
723+
"""
724+
# TODO: move this to a more generic location for better reuse
725+
726+
__slots__ = ("from_date", "to_date")
727+
728+
def __init__(
729+
self,
730+
from_date: Union[str, datetime.datetime, datetime.date, None],
731+
to_date: Union[str, datetime.datetime, datetime.date, None],
732+
):
733+
self.from_date: Union[datetime.datetime, None] = to_datetime_utc_unless_none(from_date)
734+
self.to_date: Union[datetime.datetime, None] = to_datetime_utc_unless_none(to_date)
735+
736+
def intersects(
737+
self,
738+
nominal: Union[str, datetime.datetime, datetime.date, None] = None,
739+
start_datetime: Union[str, datetime.datetime, datetime.date, None] = None,
740+
end_datetime: Union[str, datetime.datetime, datetime.date, None] = None,
741+
) -> bool:
742+
"""
743+
Check if the given datetime/interval intersects with the spatiotemporal extent.
744+
745+
:param nominal: nominal datetime (e.g. typically the "datetime" property of a STAC Item)
746+
:param start_datetime: start of the interval (e.g. "start_datetime" property of a STAC Item)
747+
:param end_datetime: end of the interval (e.g. "end_datetime" property of a STAC Item)
748+
"""
749+
start_datetime = to_datetime_utc_unless_none(start_datetime)
750+
end_datetime = to_datetime_utc_unless_none(end_datetime)
751+
nominal = to_datetime_utc_unless_none(nominal)
752+
753+
# If available, start+end are preferred (cleanly defined interval)
754+
# fall back on nominal otherwise
755+
if start_datetime and end_datetime and start_datetime <= end_datetime:
756+
pass
757+
elif nominal:
758+
start_datetime = end_datetime = nominal
759+
else:
760+
raise ValueError(f"Ill-defined instant/interval {nominal=} {start_datetime=} {end_datetime=}")
761+
762+
return (self.from_date is None or self.from_date <= end_datetime) and (
763+
self.to_date is None or start_datetime < self.to_date
764+
)
765+
766+
767+
class _SpatialExtent:
768+
"""
769+
Helper to represent a spatial extent with a bounding box
770+
and calculate intersection with STAC entities (e.g. bbox of a STAC Item).
771+
"""
772+
# TODO: move this to a more generic location for better reuse
773+
774+
__slots__ = ("bbox", "_bbox_lonlat_shape")
775+
776+
def __init__(self, *, bbox: Union[BoundingBox, None]):
777+
# TODO: support more bbox representations as input
778+
self.bbox = bbox
779+
self._bbox_lonlat_shape = self.bbox.reproject("EPSG:4326").as_polygon() if self.bbox else None
780+
781+
def intersects(self, bbox: Union[List[float], Tuple[float, float, float, float], None]):
782+
# TODO: this assumes bbox is in lon/lat coordinates, also support other CRSes?
783+
if not self.bbox or bbox is None:
784+
return True
785+
return self._bbox_lonlat_shape.intersects(Polygon.from_bounds(*bbox))
786+
787+
788+
class _SpatioTemporalExtent:
789+
# TODO: move this to a more generic location for better reuse
790+
def __init__(
791+
self,
792+
*,
793+
bbox: Union[BoundingBox, None],
794+
from_date: Union[str, datetime.datetime, datetime.date, None],
795+
to_date: Union[str, datetime.datetime, datetime.date, None],
796+
):
797+
self._spatial_extent = _SpatialExtent(bbox=bbox)
798+
self._temporal_extent = _TemporalExtent(from_date=from_date, to_date=to_date)
799+
800+
def item_intersects(self, item: pystac.Item) -> bool:
801+
return self._temporal_extent.intersects(
802+
nominal=item.datetime,
803+
start_datetime=item.properties.get("start_datetime"),
804+
end_datetime=item.properties.get("end_datetime"),
805+
) and self._spatial_extent.intersects(item.bbox)
806+
807+
726808
def _is_supported_raster_mime_type(mime_type: str) -> bool:
727809
mime_type = mime_type.lower()
728810
# https://github.com/radiantearth/stac-spec/blob/master/best-practices.md#common-media-types-in-stac

openeogeotrellis/util/datetime.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from typing import Union
2+
import datetime
3+
4+
import dateutil.parser
5+
6+
7+
def to_datetime_utc(d: Union[str, datetime.datetime, datetime.date]) -> datetime.datetime:
8+
"""Parse/convert to datetime in UTC."""
9+
if isinstance(d, str):
10+
d = dateutil.parser.parse(d)
11+
elif isinstance(d, datetime.datetime):
12+
pass
13+
elif isinstance(d, datetime.date):
14+
d = datetime.datetime.combine(d, datetime.time.min)
15+
else:
16+
raise ValueError(f"Expected str/datetime, but got {type(d)}")
17+
if d.tzinfo is None:
18+
d = d.replace(tzinfo=datetime.timezone.utc)
19+
else:
20+
d = d.astimezone(datetime.timezone.utc)
21+
return d
22+
23+
24+
def to_datetime_utc_unless_none(
25+
d: Union[str, datetime.datetime, datetime.date, None]
26+
) -> Union[datetime.datetime, None]:
27+
"""Parse/convert to datetime in UTC, but preserve None."""
28+
return None if d is None else to_datetime_utc(d)

tests/test_load_stac.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import dirty_equals
22
import pystac
33
from contextlib import nullcontext
4+
import datetime
45

56
import mock
67
import pytest
@@ -12,6 +13,7 @@
1213
from openeo_driver.backend import BatchJobMetadata, BatchJobs, LoadParameters
1314
from openeo_driver.errors import OpenEOApiException
1415
from openeo_driver.util.date_math import now_utc
16+
from openeo_driver.util.geometry import BoundingBox
1517
from openeo_driver.utils import EvalEnv
1618

1719
from openeogeotrellis.load_stac import (
@@ -22,6 +24,9 @@
2224
_is_band_asset,
2325
_supports_item_search,
2426
_get_proj_metadata,
27+
_TemporalExtent,
28+
_SpatioTemporalExtent,
29+
_SpatialExtent,
2530
)
2631

2732

@@ -718,3 +723,166 @@ def test_get_proj_metadata_from_asset():
718723
)
719724
item = pystac.Item.from_dict(StacDummyBuilder.item())
720725
assert _get_proj_metadata(asset, item=item) == (32631, (12.0, 34.0, 56.0, 78.0), (12, 34))
726+
727+
728+
class TestTemporalExtent:
729+
def test_empty(self):
730+
extent = _TemporalExtent(None, None)
731+
assert extent.intersects("1789-07-14") == True
732+
assert extent.intersects(nominal="1789-07-14") == True
733+
assert extent.intersects(start_datetime="1914-07-28", end_datetime="1918-11-11") == True
734+
assert extent.intersects(nominal="2025-07-24") == True
735+
736+
def test_nominal_basic(self):
737+
extent = _TemporalExtent("2025-03-04T11:11:11", "2025-05-06T22:22:22")
738+
assert extent.intersects(nominal="2022-10-11") == False
739+
assert extent.intersects(nominal="2025-03-03T12:13:14") == False
740+
assert extent.intersects(nominal="2025-03-05T05:05:05") == True
741+
assert extent.intersects(nominal="2025-07-07T07:07:07") == False
742+
743+
assert extent.intersects(nominal=datetime.date(2025, 4, 10)) == True
744+
assert extent.intersects(nominal=datetime.datetime(2025, 4, 10, 12)) == True
745+
746+
def test_nominal_edges(self):
747+
extent = _TemporalExtent("2025-03-04T11:11:11", "2025-05-06T22:22:22")
748+
assert extent.intersects(nominal="2025-03-04T11:11:10") == False
749+
assert extent.intersects(nominal="2025-03-04T11:11:11") == True
750+
assert extent.intersects(nominal="2025-03-05T05:05:05") == True
751+
assert extent.intersects(nominal="2025-05-06T22:22:21") == True
752+
assert extent.intersects(nominal="2025-05-06T22:22:22") == False
753+
754+
def test_nominal_timezones(self):
755+
extent = _TemporalExtent("2025-03-04T11:11:11Z", "2025-05-06T22:22:22-03")
756+
assert extent.intersects(nominal="2025-03-04T11:11:10") == False
757+
assert extent.intersects(nominal="2025-03-04T11:11:10-02") == True
758+
assert extent.intersects(nominal="2025-03-04T11:11:10-04:00") == True
759+
assert extent.intersects(nominal="2025-03-04T13:11:11") == True
760+
assert extent.intersects(nominal="2025-03-04T13:11:11+02") == True
761+
assert extent.intersects(nominal="2025-03-04T13:11:10+02") == False
762+
763+
assert extent.intersects(nominal="2025-05-06T22:22:22") == True
764+
assert extent.intersects(nominal="2025-05-06T22:22:22-02") == True
765+
assert extent.intersects(nominal="2025-05-06T22:22:22-03") == False
766+
assert extent.intersects(nominal="2025-05-07T01:22:21Z") == True
767+
assert extent.intersects(nominal="2025-05-07T01:22:22Z") == False
768+
769+
def test_nominal_half_open(self):
770+
extent = _TemporalExtent(None, "2025-05-06")
771+
assert extent.intersects(nominal="1789-07-14") == True
772+
assert extent.intersects(nominal="2025-05-05") == True
773+
assert extent.intersects(nominal="2025-05-06") == False
774+
assert extent.intersects(nominal="2025-11-11") == False
775+
776+
extent = _TemporalExtent("2025-05-06", None)
777+
assert extent.intersects(nominal="2025-05-05") == False
778+
assert extent.intersects(nominal="2025-05-06") == True
779+
assert extent.intersects(nominal="2099-11-11") == True
780+
781+
def test_start_end_basic(self):
782+
extent = _TemporalExtent("2025-03-04T11:11:11", "2025-05-06T22:22:22")
783+
assert extent.intersects(start_datetime="2022-02-02", end_datetime="2022-02-03") == False
784+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-04-04") == True
785+
assert extent.intersects(start_datetime="2025-03-10", end_datetime="2025-04-04") == True
786+
assert extent.intersects(start_datetime="2025-03-10", end_datetime="2025-08-08") == True
787+
assert extent.intersects(start_datetime="2025-06-10", end_datetime="2025-08-08") == False
788+
789+
def test_start_end_edges(self):
790+
extent = _TemporalExtent("2025-03-04T11:11:11", "2025-05-06T22:22:22")
791+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-03-04T11:11:10") == False
792+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-03-04T11:11:11") == True
793+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-03-04T11:11:12") == True
794+
795+
assert extent.intersects(start_datetime="2025-05-06T22:22:21", end_datetime="2025-08-08") == True
796+
assert extent.intersects(start_datetime="2025-05-06T22:22:22", end_datetime="2025-08-08") == False
797+
assert extent.intersects(start_datetime="2025-05-06T22:22:23", end_datetime="2025-08-08") == False
798+
799+
def test_start_end_timezones(self):
800+
extent = _TemporalExtent("2025-03-04T11:11:11Z", "2025-05-06T22:22:22-03")
801+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-03-04T12:12:12") == True
802+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-03-04T12:12:12Z") == True
803+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-03-04T12:12:12+06") == False
804+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-03-04T10:10:10") == False
805+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-03-04T10:10:10-03") == True
806+
807+
def test_start_end_half_open(self):
808+
extent = _TemporalExtent(None, "2025-05-06")
809+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-05-05") == True
810+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-08-08") == True
811+
assert extent.intersects(start_datetime="2025-06-06", end_datetime="2025-08-08") == False
812+
813+
extent = _TemporalExtent("2025-05-06", None)
814+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-05-05") == False
815+
assert extent.intersects(start_datetime="2025-02-02", end_datetime="2025-08-08") == True
816+
assert extent.intersects(start_datetime="2025-06-06", end_datetime="2025-08-08") == True
817+
818+
def test_nominal_vs_start_end(self):
819+
"""https://github.com/Open-EO/openeo-geopyspark-driver/issues/1293"""
820+
extent = _TemporalExtent("2024-02-01", "2024-02-10")
821+
assert extent.intersects(nominal="2024-01-01", start_datetime="2024-01-01", end_datetime="2024-12-31") == True
822+
823+
824+
class TestSpatialExtent:
825+
def test_empty(self):
826+
extent = _SpatialExtent(bbox=None)
827+
assert extent.intersects(None) is True
828+
assert extent.intersects((1, 2, 3, 4)) == True
829+
830+
def test_basic(self):
831+
extent = _SpatialExtent(bbox=BoundingBox(west=3, south=51, east=4, north=52, crs=4326))
832+
assert extent.intersects((1, 2, 3, 4)) == False
833+
assert extent.intersects((2, 50, 3.1, 51.1)) == True
834+
assert extent.intersects((3.3, 51.1, 3.5, 51.5)) == True
835+
assert extent.intersects((3.9, 51.9, 4.4, 52.2)) == True
836+
assert extent.intersects((5, 51.1, 6, 52.2)) == False
837+
838+
839+
class TestSpatioTemporalExtent:
840+
@pytest.mark.parametrize(
841+
["bbox", "properties", "expected"],
842+
[
843+
(
844+
[20, 34, 26, 40],
845+
{
846+
"datetime": "2024-01-01T00:00:00Z",
847+
"start_datetime": "2024-01-01T00:00:00Z",
848+
"end_datetime": "2024-12-31T23:59:59Z",
849+
},
850+
True,
851+
),
852+
(
853+
[20, 34, 26, 40],
854+
{"datetime": "2024-01-01T00:00:00Z"},
855+
False,
856+
),
857+
(
858+
[20, 34, 26, 40],
859+
{"datetime": "2024-02-02T00:00:00Z"},
860+
True,
861+
),
862+
(
863+
[60, 34, 66, 40],
864+
{
865+
"datetime": "2024-01-01T00:00:00Z",
866+
"start_datetime": "2024-01-01T00:00:00Z",
867+
"end_datetime": "2024-12-31T23:59:59Z",
868+
},
869+
False,
870+
),
871+
],
872+
)
873+
def test_item_intersects(self, bbox, properties, expected):
874+
extent = _SpatioTemporalExtent(
875+
bbox=BoundingBox(west=21, south=35, east=25, north=38, crs=4326),
876+
from_date="2024-02-01",
877+
to_date="2024-02-10",
878+
)
879+
item = pystac.Item.from_dict(
880+
{
881+
"type": "Feature",
882+
"stac_version": "1.0.0",
883+
"id": "2024_GRC_V00",
884+
"bbox": bbox,
885+
"properties": properties,
886+
}
887+
)
888+
assert extent.item_intersects(item) == expected

0 commit comments

Comments
 (0)