5
5
import json
6
6
import os
7
7
import pprint
8
+ import sys
9
+ import warnings
10
+ from copy import deepcopy
8
11
9
12
from traitlets .log import get_logger
10
13
11
14
from ._imports import import_item
12
15
from .corpus .words import generate_corpus_id
13
16
from .json_compat import ValidationError , _validator_for_name , get_current_validator
14
17
from .reader import get_version
18
+ from .warnings import DuplicateCellId , MissingIDFieldWarning
15
19
16
20
validators = {}
17
21
@@ -246,6 +250,82 @@ def better_validation_error(error, version, version_minor):
246
250
return NotebookValidationError (error , ref )
247
251
248
252
253
+ def normalize (nbdict , version , version_minor ):
254
+ """
255
+ EXPERIMENTAL
256
+
257
+ normalise a notebook prior to validation.
258
+
259
+ This tries to implement a couple of normalisation steps to standardise
260
+ notebooks and make validation easier.
261
+
262
+ You should in general not rely on this function and make sure the notebooks
263
+ that reach nbformat are already in a normal form.
264
+
265
+ Parameters
266
+ ----------
267
+ nbdict : dict
268
+ notebook document
269
+ version : int
270
+ version_minor : int
271
+
272
+ Returns
273
+ -------
274
+ changes : int
275
+ number of changes in the notebooks
276
+ notebook : dict
277
+ deep-copy of the original object with relevant changes.
278
+
279
+ """
280
+ nbdict = deepcopy (nbdict )
281
+ return _normalize (nbdict )
282
+
283
+
284
+ def _normalize (nbdict , version , version_minor , repair_duplicate_cell_ids ):
285
+ changes = 0
286
+
287
+ if version >= 4 and version_minor >= 5 :
288
+ # if we support cell ids ensure default ids are provided
289
+ for cell in nbdict ["cells" ]:
290
+ if "id" not in cell :
291
+ warnings .warn (
292
+ "Code cell is missing an id field, this will become"
293
+ " a hard error in future nbformat versions. You may want"
294
+ " to use `normalize()` on your notebooks before validations"
295
+ " (available since nbformat 5.1.4). Previous of nbformat"
296
+ " are also mutating their arguments, and will stop to do so"
297
+ " in the future." ,
298
+ MissingIDFieldWarning ,
299
+ stacklevel = 3 ,
300
+ )
301
+ # Generate cell ids if any are missing
302
+ if repair_duplicate_cell_ids :
303
+ cell ["id" ] = generate_corpus_id ()
304
+ changes += 1
305
+
306
+ # if we support cell ids check for uniqueness when validating the whole notebook
307
+ seen_ids = set ()
308
+ for cell in nbdict ["cells" ]:
309
+ if "id" not in cell :
310
+ continue
311
+ cell_id = cell ["id" ]
312
+ if cell_id in seen_ids :
313
+ # Best effort to repair if we find a duplicate id
314
+ if repair_duplicate_cell_ids :
315
+ new_id = generate_corpus_id ()
316
+ cell ["id" ] = new_id
317
+ changes += 1
318
+ warnings .warn (
319
+ f"Non-unique cell id { cell_id !r} detected. Corrected to { new_id !r} ." ,
320
+ DuplicateCellId ,
321
+ stacklevel = 3 ,
322
+ )
323
+ else :
324
+ raise ValidationError (f"Non-unique cell id '{ cell_id } ' detected." )
325
+ seen_ids .add (cell_id )
326
+ return changes , nbdict
327
+
328
+
249
329
def validate (
250
330
nbdict = None ,
251
331
ref = None ,
@@ -256,13 +336,18 @@ def validate(
256
336
repair_duplicate_cell_ids = True ,
257
337
strip_invalid_metadata = False ,
258
338
):
339
+
259
340
"""Checks whether the given notebook dict-like object
260
341
conforms to the relevant notebook format schema.
261
342
262
-
343
+ Parameters
344
+ ----------
345
+ ref : optional, str
346
+ reference to the subset of the schema we want to validate against.
347
+ for example ``"markdown_cell"``, `"code_cell"` ....
263
348
Raises ValidationError if not valid.
264
349
"""
265
-
350
+ assert isinstance ( ref , str ) or ref is None
266
351
# backwards compatibility for nbjson argument
267
352
if nbdict is not None :
268
353
pass
@@ -283,13 +368,8 @@ def validate(
283
368
if version is None :
284
369
version , version_minor = 1 , 0
285
370
286
- notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5
287
- if notebook_supports_cell_ids and repair_duplicate_cell_ids :
288
- # Auto-generate cell ids for cells that are missing them.
289
- for cell in nbdict ["cells" ]:
290
- if "id" not in cell :
291
- # Generate cell ids if any are missing
292
- cell ["id" ] = generate_corpus_id ()
371
+ if ref is None :
372
+ _normalize (nbdict , version , version_minor , repair_duplicate_cell_ids )
293
373
294
374
for error in iter_validate (
295
375
nbdict ,
@@ -299,25 +379,8 @@ def validate(
299
379
relax_add_props = relax_add_props ,
300
380
strip_invalid_metadata = strip_invalid_metadata ,
301
381
):
302
- raise error
303
382
304
- if notebook_supports_cell_ids :
305
- # if we support cell ids check for uniqueness when validating the whole notebook
306
- seen_ids = set ()
307
- for cell in nbdict ["cells" ]:
308
- cell_id = cell ["id" ]
309
- if cell_id in seen_ids :
310
- if repair_duplicate_cell_ids :
311
- # Best effort to repair if we find a duplicate id
312
- cell ["id" ] = generate_corpus_id ()
313
- get_logger ().warning (
314
- "Non-unique cell id '{}' detected. Corrected to '{}'." .format (
315
- cell_id , cell ["id" ]
316
- )
317
- )
318
- else :
319
- raise ValidationError (f"Non-unique cell id '{ cell_id } ' detected." )
320
- seen_ids .add (cell_id )
383
+ raise error
321
384
322
385
323
386
def iter_validate (
0 commit comments