Skip to content

Commit d651445

Browse files
committed
ENH: Add module with extra dependencies to create a generic xarray dataset from a packet data file
1 parent 1baa070 commit d651445

File tree

7 files changed

+233
-3
lines changed

7 files changed

+233
-3
lines changed

.github/workflows/pr_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
- name: Install dependencies
4747
run: |
4848
python -m pip install --upgrade pip
49-
pip install . pytest pytest-randomly
49+
pip install ".[xarray]" pytest pytest-randomly
5050
5151
- name: Testing
5252
run: |

docs/source/changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Release notes for the `space_packet_parser` library
2525
``definition.packet_generator(data, combine_segmented_packets=True, secondary_header_bytes=4)``
2626
- Add a command line interface (spp) to enable quick and easy access to
2727
some common tasks and utilities.
28+
- Add function to directly create an `xarray.DataSet` from a packet file and XTCE definition.
2829

2930
### v5.0.1 (released)
3031
- BUGFIX: Allow raw_value representation for enums with falsy raw values. Previously these defaulted to the enum label.

pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ python = ">=3.9"
3939
lxml = ">=4.8.0"
4040
click = "^8.0"
4141
rich = "^13.0"
42+
# Extras dependencies
43+
xarray = { version = ">2024.0.0", optional = true }
44+
numpy = { version = "^2.0.0", optional = true }
4245

4346
[tool.poetry.group.dev.dependencies]
4447
pycodestyle = "*"
@@ -60,6 +63,9 @@ optional = true
6063
matplotlib = ">=3.4"
6164
memory-profiler = "^0.61.0"
6265

66+
[tool.poetry.extras]
67+
xarray = ["xarray", "numpy"]
68+
6369
[tool.poetry.scripts]
6470
spp = "space_packet_parser.cli:spp"
6571

space_packet_parser/extras/__init__.py

Whitespace-only changes.

space_packet_parser/extras/xarr.py

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
"""Extras package that supports generating an `xarray.DataSet` directly"""
2+
# Extras import first since it might fail
3+
try:
4+
import xarray as xr
5+
import numpy as np
6+
except ImportError as ie:
7+
raise ImportError(
8+
"Failed to import dependencies for xarray extra. Did you install the [xarray] extras package?"
9+
) from ie
10+
# Standard
11+
import collections
12+
from typing import Optional, Union, Iterable
13+
from pathlib import Path
14+
# Local
15+
from space_packet_parser import definitions, parameters, encodings
16+
17+
18+
def _get_minimum_numpy_datatype( # noqa: PLR0912 - Too many branches pylint: disable=too-many-branches
19+
name: str,
20+
definition: definitions.XtcePacketDefinition,
21+
use_raw_value: bool = False
22+
) -> Optional[str]:
23+
"""
24+
Get the minimum datatype for a given variable.
25+
26+
Parameters
27+
----------
28+
name : str
29+
The variable name.
30+
definition : definitions.XtcePacketDefinition
31+
The XTCE packet definition. Used to examine data types to infer their niminal numpy representation.
32+
use_raw_value : bool
33+
Default False. If True, uses the data type of the raw value for each parameter.
34+
35+
Returns
36+
-------
37+
datatype : Optional[str]
38+
The minimum numpy dtype for the parameter.
39+
Returns None to indicate that numpy should use default dtype inference.
40+
"""
41+
data_encoding = definition.named_parameters[name].parameter_type.encoding
42+
43+
datatype = None
44+
45+
if use_raw_value and isinstance(
46+
definition.named_parameters[name].parameter_type,
47+
parameters.EnumeratedParameterType,
48+
):
49+
# We don't have a way of knowing what is enumerated,
50+
# let numpy infer the datatype
51+
return None
52+
53+
if isinstance(data_encoding, encodings.NumericDataEncoding):
54+
if not use_raw_value and (
55+
data_encoding.context_calibrators is not None
56+
or data_encoding.default_calibrator is not None
57+
):
58+
# If there are calibrators, we need to default to None and
59+
# let numpy infer the datatype
60+
return None
61+
62+
nbits = data_encoding.size_in_bits
63+
if isinstance(data_encoding, encodings.IntegerDataEncoding):
64+
datatype = "int"
65+
if data_encoding.encoding == "unsigned":
66+
datatype = "uint"
67+
if nbits <= 8:
68+
datatype += "8"
69+
elif nbits <= 16:
70+
datatype += "16"
71+
elif nbits <= 32:
72+
datatype += "32"
73+
else:
74+
datatype += "64"
75+
elif isinstance(data_encoding, encodings.FloatDataEncoding):
76+
datatype = "float"
77+
if nbits == 32:
78+
datatype += "32"
79+
else:
80+
datatype += "64"
81+
elif isinstance(data_encoding, encodings.BinaryDataEncoding):
82+
# TODO: Binary string representation right now, do we want bytes or
83+
# something else like the new StringDType instead?
84+
datatype = "str"
85+
elif isinstance(data_encoding, encodings.StringDataEncoding):
86+
# TODO: Use the new StringDType instead?
87+
datatype = "str"
88+
else:
89+
raise ValueError(f"Unsupported data encoding: {data_encoding}")
90+
91+
return datatype
92+
93+
94+
def create_dataset(
95+
packet_files: Union[str, Path, Iterable[Union[str, Path]]],
96+
xtce_packet_definition: Union[str, Path, definitions.XtcePacketDefinition],
97+
use_raw_values: bool = False,
98+
packet_generator_kwargs: Optional[dict] = None
99+
):
100+
"""Create an xarray dataset from an iterable of parsed packet objects
101+
102+
# TODO: Filter by APID to handle muxed streams?
103+
104+
Notes
105+
-----
106+
This function only handles packet definitions with the same variable structure
107+
across all packets with the same ApId. For example, this cannot be used for polymorphic
108+
packets whose structure changes based on previously parsed values.
109+
110+
Parameters
111+
----------
112+
packet_files : Union[str, Path, Iterable[Union[str, Path]]]
113+
Packet files
114+
xtce_packet_definition : Union[str, Path, XtcePacketDefinition]
115+
Packet definition for parsing the packet data
116+
use_raw_values: bool
117+
Default False. If True, saves parameter raw values to the resulting DataSet.
118+
e.g. enumerated lookups will be saved as their encoded integer values.
119+
packet_generator_kwargs : Optional[dict]
120+
Keyword arguments passed to `XtcePacketDefinition.packet_generator()`
121+
122+
Returns
123+
-------
124+
: xarray.DataSet
125+
DataSet object parsed from the iterable of packets.
126+
"""
127+
packet_generator_kwargs = packet_generator_kwargs or {}
128+
129+
if not isinstance(xtce_packet_definition, definitions.XtcePacketDefinition):
130+
xtce_packet_definition = definitions.XtcePacketDefinition(xtce_packet_definition)
131+
132+
if isinstance(packet_files, (str, Path)):
133+
packet_files = [packet_files]
134+
135+
# Set up containers to store our data
136+
# We are getting a packet file that may contain multiple apids
137+
# Each apid has consistent data fields, so we want to create a
138+
# dataset per apid.
139+
# {apid1: dataset1, apid2: dataset2, ...}
140+
data_dict: dict[int, dict] = {}
141+
# Also keep track of the datatype mapping for each field
142+
datatype_mapping: dict[int, dict] = {}
143+
# Keep track of which variables (keys) are in the dataset
144+
variable_mapping: dict[int, set] = {}
145+
146+
for packet_file in packet_files:
147+
with open(packet_file, "rb") as f:
148+
packet_generator = list(xtce_packet_definition.packet_generator(f, **packet_generator_kwargs))
149+
150+
for packet in packet_generator:
151+
apid = list(packet.values())[3] # Allows for unrestricted naming of header fields
152+
if apid not in data_dict:
153+
# This is the first packet for this APID
154+
data_dict[apid] = collections.defaultdict(list)
155+
datatype_mapping[apid] = {}
156+
variable_mapping[apid] = packet.keys()
157+
158+
if variable_mapping[apid] != packet.keys():
159+
raise ValueError(
160+
f"Packet fields do not match for APID {apid}. This could be "
161+
f"due to a conditional (polymorphic) packet definition in the XTCE, while this "
162+
f"function currently only supports flat packet definitions."
163+
f"\nExpected: {variable_mapping[apid]},\ngot: {list(packet.keys())}"
164+
)
165+
166+
# TODO: Do we want to give an option to remove the header content?
167+
# Headers are generally useful, so what is the use case?
168+
169+
for key, value in packet.items():
170+
if use_raw_values:
171+
# Use the derived value if it exists, otherwise use the raw value
172+
val = value.raw_value
173+
else:
174+
val = value
175+
176+
data_dict[apid][key].append(val)
177+
if key not in datatype_mapping[apid]:
178+
# Add this datatype to the mapping
179+
datatype_mapping[apid][key] = _get_minimum_numpy_datatype(
180+
key, xtce_packet_definition, use_raw_value=use_raw_values
181+
)
182+
183+
# Turn the dict into an xarray dataset
184+
dataset_by_apid = {}
185+
186+
for apid, data in data_dict.items():
187+
ds = xr.Dataset(
188+
data_vars={
189+
key: (["packet"], np.asarray(list_of_values, dtype=datatype_mapping[apid][key]))
190+
for key, list_of_values in data.items()
191+
}
192+
)
193+
194+
dataset_by_apid[apid] = ds
195+
196+
return dataset_by_apid

space_packet_parser/packets.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,8 @@ def create_ccsds_packet(data=b"\x00",
217217
secondary_header_flag=0,
218218
apid=2047, # 2047 is defined as a fill packet in the CCSDS spec
219219
sequence_flags=SequenceFlags.UNSEGMENTED,
220-
sequence_count=0):
221-
"""Create a binary CCSDS packet.
220+
sequence_count=0) -> RawPacketData:
221+
"""Create a binary CCSDS packet from input values.
222222
223223
Pack the header fields into the proper bit locations and append the data bytes.
224224
@@ -238,6 +238,11 @@ def create_ccsds_packet(data=b"\x00",
238238
CCSDS Packet Sequence Flags (2 bits)
239239
sequence_count : int
240240
CCSDS Packet Sequence Count (14 bits)
241+
242+
Returns
243+
-------
244+
: RawPacketData
245+
Resulting binary packet
241246
"""
242247
if version_number < 0 or version_number > 7: # 3 bits
243248
raise ValueError("version_number must be between 0 and 7")

tests/integration/test_xarr.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""Test creating an xarray dataset from CCSDS packets"""
2+
from space_packet_parser.extras.xarr import create_dataset
3+
4+
5+
def test_create_xarray_dataset(jpss_test_data_dir):
6+
"""Test creating an xarray dataset from JPSS geolocation packets"""
7+
packet_file = jpss_test_data_dir / "J01_G011_LZ_2021-04-09T00-00-00Z_V01.DAT1"
8+
definition_file = jpss_test_data_dir / "jpss1_geolocation_xtce_v1.xml"
9+
ds = create_dataset(packet_file, definition_file)
10+
assert list(ds.keys()) == [11]
11+
assert len(ds[11]) == 27
12+
assert len(ds[11]["VERSION"]) == 7200
13+
14+
15+
def test_create_xarray_dataset_multiple_files(jpss_test_data_dir):
16+
"""Testing parsing multiple files of packets"""
17+
packet_file = jpss_test_data_dir / "J01_G011_LZ_2021-04-09T00-00-00Z_V01.DAT1"
18+
definition_file = jpss_test_data_dir / "jpss1_geolocation_xtce_v1.xml"
19+
ds = create_dataset([packet_file, packet_file], definition_file)
20+
assert list(ds.keys()) == [11]
21+
assert len(ds[11]) == 27
22+
assert len(ds[11]["VERSION"]) == 14400

0 commit comments

Comments
 (0)