37
37
class Metadata :
38
38
"""Metadata representation
39
39
40
+ This class provides functionality for serialization / deserialization of
41
+ python child classes to descriptors.
42
+
43
+ A **descriptor** is a JSON serializable `dict`.
44
+ A **profile** is a JSON Schema dict that sets expectations on the format
45
+ of the descriptor.
46
+
40
47
For proper functioning a child class must be decorated by
41
48
"@attrs.define(kw_only=True, repr=False)" and ensure that
42
- "Metadata.__attrs_post_init__" is called
49
+ "Metadata.__attrs_post_init__" is called :
50
+
51
+ - `kw_only=True` is required because this class will need explicit
52
+ keywords to be able to track which properties have been set at
53
+ initialization (see implementation of `__new__`, which uses the keyword
54
+ arguments `kwargs`)
55
+ - `repr=False` is to avoid `attrs` to overwrite the inherited `__repr__`
56
+ function defined in this class.
43
57
44
58
"""
45
59
46
60
custom : dict [str , Any ] = {}
47
61
"""
48
- List of custom parameters. Any extra properties will be added
62
+ List of custom parameters. Any extra property will be added
49
63
to the custom property.
64
+
65
+ A "custom" property is an additional property to the ones expected by the
66
+ classe's "profile" (See the "metadata_profile_*" properties)
50
67
"""
51
68
52
69
def __new__ (cls , * args : Any , ** kwargs : Any ):
@@ -61,6 +78,14 @@ def __attrs_post_init__(self):
61
78
self .metadata_initiated = True
62
79
63
80
def __setattr__ (self , name : str , value : Any ):
81
+ """Side effects when setting a property
82
+
83
+ Properties starting with `_` or `metadata_` have no side effects.
84
+
85
+ For all other properties, the "metatdata_assigned" and
86
+ "metatadata_defaults" are update, depending of if the value has been
87
+ set explicitely or implicitely as the default respectively.
88
+ """
64
89
if not name .startswith (("_" , "metadata_" )):
65
90
if self .metadata_initiated :
66
91
if value is not None :
@@ -74,6 +99,7 @@ def __setattr__(self, name: str, value: Any):
74
99
super ().__setattr__ (name , value )
75
100
76
101
def __repr__ (self ) -> str :
102
+ """Prints the descriptor of the object"""
77
103
return pprint .pformat (self .to_descriptor (), sort_dicts = False )
78
104
79
105
@property
@@ -105,7 +131,15 @@ def handle_data(self, data: str):
105
131
# Defined
106
132
107
133
def list_defined (self ) -> List [str ]:
134
+ """Returns a list of all properties that have been defined.
135
+
136
+ TODO : the difference with metadata_assigned is that it lists values
137
+ that are set in the class that are different from `metadata_defaults`.
138
+ How is that possible, I thought metadata_defaults can only be set to
139
+ the defaults ?
140
+ """
108
141
defined = list (self .metadata_assigned )
142
+
109
143
for name , default in self .metadata_defaults .items ():
110
144
value = getattr (self , name , None )
111
145
if isinstance (value , type ):
@@ -118,15 +152,27 @@ def add_defined(self, name: str) -> None:
118
152
self .metadata_assigned .add (name )
119
153
120
154
def has_defined (self , name : str ) -> bool :
155
+ """Whether a property has been defined explicitely"""
121
156
return name in self .list_defined ()
122
157
123
158
def get_defined (self , name : str , * , default : Any = None ) -> Any :
159
+ """Retrieve the value of a property if it has been explicitely
160
+ assigned, or return a default value otherwise"""
124
161
if self .has_defined (name ):
125
162
return getattr (self , name )
126
- if default is not None :
127
- return default
163
+
164
+ return default
128
165
129
166
def set_not_defined (self , name : str , value : Any , * , distinct : bool = False ) -> None :
167
+ """If no property with "name" has already been assigned, then assign
168
+ "value" to this property, but without the side effects of setting an
169
+ attribute (see
170
+ `__setattr__`, in particular, "has_defined(name)" will still return
171
+ False after definition).
172
+
173
+ Setting `distinct=True` will prevent from overwriting an already set
174
+ (including default values or values set with this method already)
175
+ """
130
176
if not self .has_defined (name ) and value is not None :
131
177
if distinct and getattr (self , name , None ) == value :
132
178
return
@@ -141,6 +187,11 @@ def validate_descriptor(
141
187
* ,
142
188
basepath : Optional [str ] = None ,
143
189
) -> Report :
190
+ """Validate a descriptor
191
+
192
+ To do so, it tries to convert a descriptor into a class instance, and
193
+ report errors it has encountered (if any)
194
+ """
144
195
errors = []
145
196
timer = helpers .Timer ()
146
197
try :
@@ -165,36 +216,63 @@ def from_descriptor(
165
216
allow_invalid : bool = False ,
166
217
** options : Any ,
167
218
) -> Self :
219
+ """Constructs an instance from a descriptor.
220
+
221
+ This method will identify the most specialized Class and instantiate
222
+ it given information provided in the descriptor.
223
+
224
+ "descriptor" can be provided as a path to a descriptor file. The path
225
+ can be relative to a base path provided as an option with the name
226
+ "basepath".
227
+
228
+ If `allow_invalid = True`, the class creation will try to continue
229
+ despite the descriptor having errors.
230
+ """
168
231
descriptor_path = None
232
+
169
233
if isinstance (descriptor , str ):
170
234
descriptor_path = descriptor
171
235
basepath = options .pop ("basepath" , None )
172
236
descriptor = helpers .join_basepath (descriptor , basepath )
173
237
if "basepath" in inspect .signature (cls .__init__ ).parameters :
174
238
options ["basepath" ] = helpers .parse_basepath (descriptor )
239
+
175
240
descriptor = cls .metadata_retrieve (descriptor )
176
- # TODO: remove in next version
241
+
242
+ # TODO: remove in v6
177
243
# Transform with a base class in case the type is not available
178
244
cls .metadata_transform (descriptor )
179
- type = descriptor .get ("type" )
245
+
246
+ expected_type = descriptor .get ("type" )
247
+
248
+ # python class "type" property, if present, has precedence over descriptor type
180
249
class_type = vars (cls ).get ("type" )
181
250
if isinstance (class_type , str ):
182
- type = class_type
183
- Class = cls .metadata_select_class (type )
251
+ expected_type = class_type
252
+
253
+ # Get the most specialized class associated with the expected_type
254
+ # (defaults to the current class if `expected_type` is `None`)
255
+ Class = cls .metadata_select_class (expected_type )
184
256
Error = Class .metadata_Error or platform .frictionless_errors .MetadataError
257
+
185
258
Class .metadata_transform (descriptor )
186
259
errors = list (Class .metadata_validate (descriptor ))
260
+
187
261
if not allow_invalid :
188
262
if errors :
189
263
error = Error (note = "descriptor is not valid" )
190
264
raise FrictionlessException (error , reasons = errors )
265
+
191
266
metadata = Class .metadata_import (descriptor , ** helpers .remove_non_values (options ))
192
267
if descriptor_path :
193
268
metadata .metadata_descriptor_path = descriptor_path
194
269
metadata .metadata_descriptor_initial = metadata .to_descriptor ()
195
270
return metadata # type: ignore
196
271
197
272
def to_descriptor (self , * , validate : bool = False ) -> types .IDescriptor :
273
+ """Return a descriptor associated to the class instance.
274
+ If `validate = True`, the descriptor will additionnaly be validated.
275
+ """
198
276
descriptor = self .metadata_export ()
199
277
if validate :
200
278
Error = self .metadata_Error or platform .frictionless_errors .MetadataError
@@ -277,16 +355,57 @@ def to_markdown(self, path: Optional[str] = None, table: bool = False) -> str:
277
355
metadata_type : ClassVar [str ]
278
356
metadata_Error : ClassVar [Optional [Type [Error ]]] = None
279
357
metadata_profile : ClassVar [Dict [str , Any ]] = {}
358
+ """A JSON Schema like dictionary that defines the expected format of the descriptor"""
359
+
280
360
metadata_profile_patch : ClassVar [Dict [str , Any ]] = {}
361
+ """Change to the expected format of the descriptor
362
+
363
+ This will usually be used by child classes to amend and build upon the
364
+ descriptor of their parent.
365
+ """
366
+
281
367
metadata_profile_merged : ClassVar [Dict [str , Any ]] = {}
368
+ """Provides a consolidated definition of the descriptor, taking into
369
+ account a `metadata_profile` and all `metadata_profile_patch`es that
370
+ apply.
371
+ """
372
+
282
373
metadata_initiated : bool = False
374
+ """Is set to true when the class initialization is finished"""
375
+
283
376
metadata_assigned : Set [str ] = set ()
377
+ """Set of all names of properties to which a value (different from None)
378
+ has been _explicitely_ assigned (including with explicit arguments at
379
+ object initialization)"""
380
+
284
381
metadata_defaults : Dict [str , Any ] = {}
382
+ """Names and values of properties that have not been
383
+ explicitely set, and that have been set to a default value instead"""
384
+
285
385
metadata_descriptor_path : Optional [str ] = None
386
+ """Descriptor file path
387
+ If applicable, i.e. if a class has been instantiated with
388
+ a descriptor read from a file
389
+ """
390
+
286
391
metadata_descriptor_initial : Optional [types .IDescriptor ] = None
392
+ """Descriptor used for class instantiation
393
+ If applicable, i.e. if a class has been instantiated with
394
+ a descriptor
395
+ """
287
396
288
397
@classmethod
289
398
def metadata_select_class (cls , type : Optional [str ]) -> Type [Metadata ]:
399
+ """Allows to specify a more specialized class for the "type" given as
400
+ input
401
+
402
+ When a class can be dispatched into several different more
403
+ specialized classes, this function makes the link between the type and
404
+ the class.
405
+
406
+ Otherwise, "type" is expected to be None, and the current class is
407
+ returned.
408
+ """
290
409
if type :
291
410
note = f'unsupported type for "{ cls .metadata_type } ": { type } '
292
411
Error = cls .metadata_Error or platform .frictionless_errors .MetadataError
@@ -295,10 +414,21 @@ def metadata_select_class(cls, type: Optional[str]) -> Type[Metadata]:
295
414
296
415
@classmethod
297
416
def metadata_select_property_class (cls , name : str ) -> Optional [Type [Metadata ]]:
417
+ """Defines the class to use with a given property's metadata
418
+
419
+ Complex properties are likely to have their own python class,
420
+ inheriting from Metadata. If this is the case, this method should
421
+ return this class when called with the property name as "name".
422
+ """
298
423
pass
299
424
300
425
@classmethod
301
426
def metadata_ensure_profile (cls ):
427
+ """Consolidates `metadata_profile` and `metadata_profile_patch`es
428
+
429
+ All patches are applied, in order from parent to child, in case of
430
+ multiple successive inheritance.
431
+ """
302
432
if not cls .__dict__ .get ("metadata_profile_merged" , None ):
303
433
cls .metadata_profile_merged = cls .metadata_profile
304
434
for subcls in reversed (cls .mro ()):
@@ -310,14 +440,32 @@ def metadata_ensure_profile(cls):
310
440
311
441
@classmethod
312
442
def metadata_retrieve (
313
- cls , descriptor : Union [types .IDescriptor , str ], * , size : Optional [int ] = None
443
+ cls ,
444
+ descriptor : Union [types .IDescriptor , str , Path ],
445
+ * ,
446
+ size : Optional [int ] = None ,
314
447
) -> types .IDescriptor :
448
+ """Copy or fetch the "descriptor" as a dictionnary.
449
+
450
+ If "descriptor" is a string or Path, then it is interpreted as a
451
+ (possibly remote) path to a descriptor file.
452
+
453
+ The content of the file is expected to be in JSON format, except if
454
+ the filename has an explicit `.yaml` extension.
455
+
456
+ """
315
457
try :
316
458
if isinstance (descriptor , Mapping ):
317
459
return deepcopy (descriptor )
460
+
461
+ # Types are tested explicitely,
462
+ # for providing feedback to users that do not comply with
463
+ # the function signature and provide a wrong type
318
464
if isinstance (descriptor , (str , Path )): # type: ignore
465
+ # descriptor is read from (possibly remote) file
319
466
if isinstance (descriptor , Path ):
320
467
descriptor = str (descriptor )
468
+
321
469
if helpers .is_remote_path (descriptor ):
322
470
session = platform .frictionless .system .http_session
323
471
response = session .get (descriptor , stream = True )
@@ -328,20 +476,36 @@ def metadata_retrieve(
328
476
else :
329
477
with open (descriptor , encoding = "utf-8" ) as file :
330
478
content = file .read (size )
479
+
331
480
if descriptor .endswith (".yaml" ):
332
481
metadata = platform .yaml .safe_load (io .StringIO (content ))
333
482
else :
334
483
metadata = json .loads (content )
484
+
335
485
assert isinstance (metadata , dict )
336
486
return metadata # type: ignore
487
+
337
488
raise TypeError ("descriptor type is not supported" )
489
+
338
490
except Exception as exception :
339
491
Error = cls .metadata_Error or platform .frictionless_errors .MetadataError
340
492
note = f'cannot retrieve metadata "{ descriptor } " because "{ exception } "'
341
493
raise FrictionlessException (Error (note = note )) from exception
342
494
343
495
@classmethod
344
496
def metadata_transform (cls , descriptor : types .IDescriptor ):
497
+ """Transform the descriptor inplace before serializing into a python class
498
+ instance.
499
+
500
+ The transformation applies recursively to any property handled with
501
+ `metadata_select_property_class(name)`.
502
+
503
+ The actual transformation steps are defined by child classes, which must call
504
+ `super().metadata_transform` to ensure recursive transformation.
505
+
506
+ This can be used for instance for retrocompatibility, converting
507
+ former descriptors into new ones.
508
+ """
345
509
profile = cls .metadata_ensure_profile ()
346
510
for name in profile .get ("properties" , {}):
347
511
value = descriptor .get (name )
@@ -364,12 +528,22 @@ def metadata_validate(
364
528
profile : Optional [Union [types .IDescriptor , str ]] = None ,
365
529
error_class : Optional [Type [Error ]] = None ,
366
530
) -> Generator [Error , None , None ]:
531
+ """Validates a descriptor according to a profile
532
+
533
+ A **profile** is a JSON Schema dict that sets expectations on the format
534
+ of the descriptor.
535
+
536
+ The profile to validate can be set explicitely ("profile" parameter),
537
+ otherwise it defaults to the class profile.
538
+ """
367
539
Error = error_class
368
540
if not Error :
369
541
Error = cls .metadata_Error or platform .frictionless_errors .MetadataError
542
+
370
543
profile = profile or cls .metadata_ensure_profile ()
371
544
if isinstance (profile , str ):
372
545
profile = cls .metadata_retrieve (profile )
546
+
373
547
validator_class = platform .jsonschema .validators .validator_for (profile ) # type: ignore
374
548
validator = validator_class (profile ) # type: ignore
375
549
for error in validator .iter_errors (descriptor ): # type: ignore
@@ -379,6 +553,7 @@ def metadata_validate(
379
553
if metadata_path :
380
554
note = f"{ note } at property '{ metadata_path } '"
381
555
yield Error (note = note )
556
+
382
557
for name in profile .get ("properties" , {}):
383
558
value = descriptor .get (name )
384
559
Class = cls .metadata_select_property_class (name )
@@ -400,6 +575,10 @@ def metadata_import(
400
575
with_basepath : bool = False ,
401
576
** options : Any ,
402
577
) -> Self :
578
+ """Deserialization of a descriptor to a class instance
579
+
580
+ The deserialization and serialization must be lossless.
581
+ """
403
582
merged_options = {}
404
583
profile = cls .metadata_ensure_profile ()
405
584
basepath = options .pop ("basepath" , None )
@@ -431,6 +610,10 @@ def metadata_import(
431
610
return metadata
432
611
433
612
def metadata_export (self , * , exclude : List [str ] = []) -> types .IDescriptor :
613
+ """Serialize class instance to descriptor
614
+
615
+ The deserialization and serialization must be lossless
616
+ """
434
617
descriptor = {}
435
618
profile = self .metadata_ensure_profile ()
436
619
for name in profile .get ("properties" , {}):
0 commit comments