Skip to content

Commit 1d4866e

Browse files
doc: Adds documentation to the methods and attributes of the Metadata class (#1744)
The Metadata class is lacking documentation, and is quite complex. --------- Co-authored-by: Patricio Del Boca <[email protected]>
1 parent c48cc79 commit 1d4866e

File tree

1 file changed

+192
-9
lines changed

1 file changed

+192
-9
lines changed

frictionless/metadata/metadata.py

Lines changed: 192 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,33 @@
3737
class Metadata:
3838
"""Metadata representation
3939
40+
This class provides functionality for serialization / deserialization of
41+
python child classes to descriptors.
42+
43+
A **descriptor** is a JSON serializable `dict`.
44+
A **profile** is a JSON Schema dict that sets expectations on the format
45+
of the descriptor.
46+
4047
For proper functioning a child class must be decorated by
4148
"@attrs.define(kw_only=True, repr=False)" and ensure that
42-
"Metadata.__attrs_post_init__" is called
49+
"Metadata.__attrs_post_init__" is called :
50+
51+
- `kw_only=True` is required because this class will need explicit
52+
keywords to be able to track which properties have been set at
53+
initialization (see implementation of `__new__`, which uses the keyword
54+
arguments `kwargs`)
55+
- `repr=False` is to avoid `attrs` to overwrite the inherited `__repr__`
56+
function defined in this class.
4357
4458
"""
4559

4660
custom: dict[str, Any] = {}
4761
"""
48-
List of custom parameters. Any extra properties will be added
62+
List of custom parameters. Any extra property will be added
4963
to the custom property.
64+
65+
A "custom" property is an additional property to the ones expected by the
66+
classe's "profile" (See the "metadata_profile_*" properties)
5067
"""
5168

5269
def __new__(cls, *args: Any, **kwargs: Any):
@@ -61,6 +78,14 @@ def __attrs_post_init__(self):
6178
self.metadata_initiated = True
6279

6380
def __setattr__(self, name: str, value: Any):
81+
"""Side effects when setting a property
82+
83+
Properties starting with `_` or `metadata_` have no side effects.
84+
85+
For all other properties, the "metatdata_assigned" and
86+
"metatadata_defaults" are update, depending of if the value has been
87+
set explicitely or implicitely as the default respectively.
88+
"""
6489
if not name.startswith(("_", "metadata_")):
6590
if self.metadata_initiated:
6691
if value is not None:
@@ -74,6 +99,7 @@ def __setattr__(self, name: str, value: Any):
7499
super().__setattr__(name, value)
75100

76101
def __repr__(self) -> str:
102+
"""Prints the descriptor of the object"""
77103
return pprint.pformat(self.to_descriptor(), sort_dicts=False)
78104

79105
@property
@@ -105,7 +131,15 @@ def handle_data(self, data: str):
105131
# Defined
106132

107133
def list_defined(self) -> List[str]:
134+
"""Returns a list of all properties that have been defined.
135+
136+
TODO : the difference with metadata_assigned is that it lists values
137+
that are set in the class that are different from `metadata_defaults`.
138+
How is that possible, I thought metadata_defaults can only be set to
139+
the defaults ?
140+
"""
108141
defined = list(self.metadata_assigned)
142+
109143
for name, default in self.metadata_defaults.items():
110144
value = getattr(self, name, None)
111145
if isinstance(value, type):
@@ -118,15 +152,27 @@ def add_defined(self, name: str) -> None:
118152
self.metadata_assigned.add(name)
119153

120154
def has_defined(self, name: str) -> bool:
155+
"""Whether a property has been defined explicitely"""
121156
return name in self.list_defined()
122157

123158
def get_defined(self, name: str, *, default: Any = None) -> Any:
159+
"""Retrieve the value of a property if it has been explicitely
160+
assigned, or return a default value otherwise"""
124161
if self.has_defined(name):
125162
return getattr(self, name)
126-
if default is not None:
127-
return default
163+
164+
return default
128165

129166
def set_not_defined(self, name: str, value: Any, *, distinct: bool = False) -> None:
167+
"""If no property with "name" has already been assigned, then assign
168+
"value" to this property, but without the side effects of setting an
169+
attribute (see
170+
`__setattr__`, in particular, "has_defined(name)" will still return
171+
False after definition).
172+
173+
Setting `distinct=True` will prevent from overwriting an already set
174+
(including default values or values set with this method already)
175+
"""
130176
if not self.has_defined(name) and value is not None:
131177
if distinct and getattr(self, name, None) == value:
132178
return
@@ -141,6 +187,11 @@ def validate_descriptor(
141187
*,
142188
basepath: Optional[str] = None,
143189
) -> Report:
190+
"""Validate a descriptor
191+
192+
To do so, it tries to convert a descriptor into a class instance, and
193+
report errors it has encountered (if any)
194+
"""
144195
errors = []
145196
timer = helpers.Timer()
146197
try:
@@ -165,36 +216,63 @@ def from_descriptor(
165216
allow_invalid: bool = False,
166217
**options: Any,
167218
) -> Self:
219+
"""Constructs an instance from a descriptor.
220+
221+
This method will identify the most specialized Class and instantiate
222+
it given information provided in the descriptor.
223+
224+
"descriptor" can be provided as a path to a descriptor file. The path
225+
can be relative to a base path provided as an option with the name
226+
"basepath".
227+
228+
If `allow_invalid = True`, the class creation will try to continue
229+
despite the descriptor having errors.
230+
"""
168231
descriptor_path = None
232+
169233
if isinstance(descriptor, str):
170234
descriptor_path = descriptor
171235
basepath = options.pop("basepath", None)
172236
descriptor = helpers.join_basepath(descriptor, basepath)
173237
if "basepath" in inspect.signature(cls.__init__).parameters:
174238
options["basepath"] = helpers.parse_basepath(descriptor)
239+
175240
descriptor = cls.metadata_retrieve(descriptor)
176-
# TODO: remove in next version
241+
242+
# TODO: remove in v6
177243
# Transform with a base class in case the type is not available
178244
cls.metadata_transform(descriptor)
179-
type = descriptor.get("type")
245+
246+
expected_type = descriptor.get("type")
247+
248+
# python class "type" property, if present, has precedence over descriptor type
180249
class_type = vars(cls).get("type")
181250
if isinstance(class_type, str):
182-
type = class_type
183-
Class = cls.metadata_select_class(type)
251+
expected_type = class_type
252+
253+
# Get the most specialized class associated with the expected_type
254+
# (defaults to the current class if `expected_type` is `None`)
255+
Class = cls.metadata_select_class(expected_type)
184256
Error = Class.metadata_Error or platform.frictionless_errors.MetadataError
257+
185258
Class.metadata_transform(descriptor)
186259
errors = list(Class.metadata_validate(descriptor))
260+
187261
if not allow_invalid:
188262
if errors:
189263
error = Error(note="descriptor is not valid")
190264
raise FrictionlessException(error, reasons=errors)
265+
191266
metadata = Class.metadata_import(descriptor, **helpers.remove_non_values(options))
192267
if descriptor_path:
193268
metadata.metadata_descriptor_path = descriptor_path
194269
metadata.metadata_descriptor_initial = metadata.to_descriptor()
195270
return metadata # type: ignore
196271

197272
def to_descriptor(self, *, validate: bool = False) -> types.IDescriptor:
273+
"""Return a descriptor associated to the class instance.
274+
If `validate = True`, the descriptor will additionnaly be validated.
275+
"""
198276
descriptor = self.metadata_export()
199277
if validate:
200278
Error = self.metadata_Error or platform.frictionless_errors.MetadataError
@@ -277,16 +355,57 @@ def to_markdown(self, path: Optional[str] = None, table: bool = False) -> str:
277355
metadata_type: ClassVar[str]
278356
metadata_Error: ClassVar[Optional[Type[Error]]] = None
279357
metadata_profile: ClassVar[Dict[str, Any]] = {}
358+
"""A JSON Schema like dictionary that defines the expected format of the descriptor"""
359+
280360
metadata_profile_patch: ClassVar[Dict[str, Any]] = {}
361+
"""Change to the expected format of the descriptor
362+
363+
This will usually be used by child classes to amend and build upon the
364+
descriptor of their parent.
365+
"""
366+
281367
metadata_profile_merged: ClassVar[Dict[str, Any]] = {}
368+
"""Provides a consolidated definition of the descriptor, taking into
369+
account a `metadata_profile` and all `metadata_profile_patch`es that
370+
apply.
371+
"""
372+
282373
metadata_initiated: bool = False
374+
"""Is set to true when the class initialization is finished"""
375+
283376
metadata_assigned: Set[str] = set()
377+
"""Set of all names of properties to which a value (different from None)
378+
has been _explicitely_ assigned (including with explicit arguments at
379+
object initialization)"""
380+
284381
metadata_defaults: Dict[str, Any] = {}
382+
"""Names and values of properties that have not been
383+
explicitely set, and that have been set to a default value instead"""
384+
285385
metadata_descriptor_path: Optional[str] = None
386+
"""Descriptor file path
387+
If applicable, i.e. if a class has been instantiated with
388+
a descriptor read from a file
389+
"""
390+
286391
metadata_descriptor_initial: Optional[types.IDescriptor] = None
392+
"""Descriptor used for class instantiation
393+
If applicable, i.e. if a class has been instantiated with
394+
a descriptor
395+
"""
287396

288397
@classmethod
289398
def metadata_select_class(cls, type: Optional[str]) -> Type[Metadata]:
399+
"""Allows to specify a more specialized class for the "type" given as
400+
input
401+
402+
When a class can be dispatched into several different more
403+
specialized classes, this function makes the link between the type and
404+
the class.
405+
406+
Otherwise, "type" is expected to be None, and the current class is
407+
returned.
408+
"""
290409
if type:
291410
note = f'unsupported type for "{cls.metadata_type}": {type}'
292411
Error = cls.metadata_Error or platform.frictionless_errors.MetadataError
@@ -295,10 +414,21 @@ def metadata_select_class(cls, type: Optional[str]) -> Type[Metadata]:
295414

296415
@classmethod
297416
def metadata_select_property_class(cls, name: str) -> Optional[Type[Metadata]]:
417+
"""Defines the class to use with a given property's metadata
418+
419+
Complex properties are likely to have their own python class,
420+
inheriting from Metadata. If this is the case, this method should
421+
return this class when called with the property name as "name".
422+
"""
298423
pass
299424

300425
@classmethod
301426
def metadata_ensure_profile(cls):
427+
"""Consolidates `metadata_profile` and `metadata_profile_patch`es
428+
429+
All patches are applied, in order from parent to child, in case of
430+
multiple successive inheritance.
431+
"""
302432
if not cls.__dict__.get("metadata_profile_merged", None):
303433
cls.metadata_profile_merged = cls.metadata_profile
304434
for subcls in reversed(cls.mro()):
@@ -310,14 +440,32 @@ def metadata_ensure_profile(cls):
310440

311441
@classmethod
312442
def metadata_retrieve(
313-
cls, descriptor: Union[types.IDescriptor, str], *, size: Optional[int] = None
443+
cls,
444+
descriptor: Union[types.IDescriptor, str, Path],
445+
*,
446+
size: Optional[int] = None,
314447
) -> types.IDescriptor:
448+
"""Copy or fetch the "descriptor" as a dictionnary.
449+
450+
If "descriptor" is a string or Path, then it is interpreted as a
451+
(possibly remote) path to a descriptor file.
452+
453+
The content of the file is expected to be in JSON format, except if
454+
the filename has an explicit `.yaml` extension.
455+
456+
"""
315457
try:
316458
if isinstance(descriptor, Mapping):
317459
return deepcopy(descriptor)
460+
461+
# Types are tested explicitely,
462+
# for providing feedback to users that do not comply with
463+
# the function signature and provide a wrong type
318464
if isinstance(descriptor, (str, Path)): # type: ignore
465+
# descriptor is read from (possibly remote) file
319466
if isinstance(descriptor, Path):
320467
descriptor = str(descriptor)
468+
321469
if helpers.is_remote_path(descriptor):
322470
session = platform.frictionless.system.http_session
323471
response = session.get(descriptor, stream=True)
@@ -328,20 +476,36 @@ def metadata_retrieve(
328476
else:
329477
with open(descriptor, encoding="utf-8") as file:
330478
content = file.read(size)
479+
331480
if descriptor.endswith(".yaml"):
332481
metadata = platform.yaml.safe_load(io.StringIO(content))
333482
else:
334483
metadata = json.loads(content)
484+
335485
assert isinstance(metadata, dict)
336486
return metadata # type: ignore
487+
337488
raise TypeError("descriptor type is not supported")
489+
338490
except Exception as exception:
339491
Error = cls.metadata_Error or platform.frictionless_errors.MetadataError
340492
note = f'cannot retrieve metadata "{descriptor}" because "{exception}"'
341493
raise FrictionlessException(Error(note=note)) from exception
342494

343495
@classmethod
344496
def metadata_transform(cls, descriptor: types.IDescriptor):
497+
"""Transform the descriptor inplace before serializing into a python class
498+
instance.
499+
500+
The transformation applies recursively to any property handled with
501+
`metadata_select_property_class(name)`.
502+
503+
The actual transformation steps are defined by child classes, which must call
504+
`super().metadata_transform` to ensure recursive transformation.
505+
506+
This can be used for instance for retrocompatibility, converting
507+
former descriptors into new ones.
508+
"""
345509
profile = cls.metadata_ensure_profile()
346510
for name in profile.get("properties", {}):
347511
value = descriptor.get(name)
@@ -364,12 +528,22 @@ def metadata_validate(
364528
profile: Optional[Union[types.IDescriptor, str]] = None,
365529
error_class: Optional[Type[Error]] = None,
366530
) -> Generator[Error, None, None]:
531+
"""Validates a descriptor according to a profile
532+
533+
A **profile** is a JSON Schema dict that sets expectations on the format
534+
of the descriptor.
535+
536+
The profile to validate can be set explicitely ("profile" parameter),
537+
otherwise it defaults to the class profile.
538+
"""
367539
Error = error_class
368540
if not Error:
369541
Error = cls.metadata_Error or platform.frictionless_errors.MetadataError
542+
370543
profile = profile or cls.metadata_ensure_profile()
371544
if isinstance(profile, str):
372545
profile = cls.metadata_retrieve(profile)
546+
373547
validator_class = platform.jsonschema.validators.validator_for(profile) # type: ignore
374548
validator = validator_class(profile) # type: ignore
375549
for error in validator.iter_errors(descriptor): # type: ignore
@@ -379,6 +553,7 @@ def metadata_validate(
379553
if metadata_path:
380554
note = f"{note} at property '{metadata_path}'"
381555
yield Error(note=note)
556+
382557
for name in profile.get("properties", {}):
383558
value = descriptor.get(name)
384559
Class = cls.metadata_select_property_class(name)
@@ -400,6 +575,10 @@ def metadata_import(
400575
with_basepath: bool = False,
401576
**options: Any,
402577
) -> Self:
578+
"""Deserialization of a descriptor to a class instance
579+
580+
The deserialization and serialization must be lossless.
581+
"""
403582
merged_options = {}
404583
profile = cls.metadata_ensure_profile()
405584
basepath = options.pop("basepath", None)
@@ -431,6 +610,10 @@ def metadata_import(
431610
return metadata
432611

433612
def metadata_export(self, *, exclude: List[str] = []) -> types.IDescriptor:
613+
"""Serialize class instance to descriptor
614+
615+
The deserialization and serialization must be lossless
616+
"""
434617
descriptor = {}
435618
profile = self.metadata_ensure_profile()
436619
for name in profile.get("properties", {}):

0 commit comments

Comments
 (0)