Skip to content

Commit 7b9b215

Browse files
committed
Start working on mutation issues in validate.
We change the validation logic and separate the normalisation from the validation step. We make sure that if a notebook is normalized, it emits a warning. In the future we will turn the warning in to an Error. We add test for the current and an xfail test for the future behavior
1 parent 9e9a7b7 commit 7b9b215

File tree

5 files changed

+342
-30
lines changed

5 files changed

+342
-30
lines changed

nbformat/json_compat.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ def _validator_for_name(validator_name):
8888
for (name, module, validator_cls) in _VALIDATOR_MAP:
8989
if module and validator_name == name:
9090
return validator_cls
91+
# we always return something.
92+
raise ValueError(f"Missing validator for {repr(validator_name)}")
9193

9294

9395
def get_current_validator():

nbformat/validator.py

Lines changed: 90 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,17 @@
55
import json
66
import os
77
import pprint
8+
import sys
9+
import warnings
10+
from copy import deepcopy
811

912
from traitlets.log import get_logger
1013

1114
from ._imports import import_item
1215
from .corpus.words import generate_corpus_id
1316
from .json_compat import ValidationError, _validator_for_name, get_current_validator
1417
from .reader import get_version
18+
from .warnings import DuplicateCellId, MissingIDFieldWarning
1519

1620
validators = {}
1721

@@ -246,6 +250,82 @@ def better_validation_error(error, version, version_minor):
246250
return NotebookValidationError(error, ref)
247251

248252

253+
def normalize(nbdict, version, version_minor):
254+
"""
255+
EXPERIMENTAL
256+
257+
normalise a notebook prior to validation.
258+
259+
This tries to implement a couple of normalisation steps to standardise
260+
notebooks and make validation easier.
261+
262+
You should in general not rely on this function and make sure the notebooks
263+
that reach nbformat are already in a normal form.
264+
265+
Parameters
266+
----------
267+
nbdict : dict
268+
notebook document
269+
version : int
270+
version_minor : int
271+
272+
Returns
273+
-------
274+
changes : int
275+
number of changes in the notebooks
276+
notebook : dict
277+
deep-copy of the original object with relevant changes.
278+
279+
"""
280+
nbdict = deepcopy(nbdict)
281+
return _normalize(nbdict)
282+
283+
284+
def _normalize(nbdict, version, version_minor, repair_duplicate_cell_ids):
285+
changes = 0
286+
287+
if version >= 4 and version_minor >= 5:
288+
# if we support cell ids ensure default ids are provided
289+
for cell in nbdict["cells"]:
290+
if "id" not in cell:
291+
warnings.warn(
292+
"Code cell is missing an id field, this will become"
293+
" a hard error in future nbformat versions. You may want"
294+
" to use `normalize()` on your notebooks before validations"
295+
" (available since nbformat 5.1.4). Previous of nbformat"
296+
" are also mutating their arguments, and will stop to do so"
297+
" in the future.",
298+
MissingIDFieldWarning,
299+
stacklevel=3,
300+
)
301+
# Generate cell ids if any are missing
302+
if repair_duplicate_cell_ids:
303+
cell["id"] = generate_corpus_id()
304+
changes += 1
305+
306+
# if we support cell ids check for uniqueness when validating the whole notebook
307+
seen_ids = set()
308+
for cell in nbdict["cells"]:
309+
if "id" not in cell:
310+
continue
311+
cell_id = cell["id"]
312+
if cell_id in seen_ids:
313+
# Best effort to repair if we find a duplicate id
314+
if repair_duplicate_cell_ids:
315+
new_id = generate_corpus_id()
316+
cell["id"] = new_id
317+
changes += 1
318+
warnings.warn(
319+
f"Non-unique cell id {cell_id!r} detected. Corrected to {new_id!r}.",
320+
DuplicateCellId,
321+
stacklevel=3,
322+
)
323+
else:
324+
raise ValidationError(f"Non-unique cell id '{cell_id}' detected.")
325+
seen_ids.add(cell_id)
326+
return changes, nbdict
327+
328+
249329
def validate(
250330
nbdict=None,
251331
ref=None,
@@ -256,13 +336,18 @@ def validate(
256336
repair_duplicate_cell_ids=True,
257337
strip_invalid_metadata=False,
258338
):
339+
259340
"""Checks whether the given notebook dict-like object
260341
conforms to the relevant notebook format schema.
261342
262-
343+
Parameters
344+
----------
345+
ref : optional, str
346+
reference to the subset of the schema we want to validate against.
347+
for example ``"markdown_cell"``, `"code_cell"` ....
263348
Raises ValidationError if not valid.
264349
"""
265-
350+
assert isinstance(ref, str) or ref is None
266351
# backwards compatibility for nbjson argument
267352
if nbdict is not None:
268353
pass
@@ -283,13 +368,8 @@ def validate(
283368
if version is None:
284369
version, version_minor = 1, 0
285370

286-
notebook_supports_cell_ids = ref is None and version >= 4 and version_minor >= 5
287-
if notebook_supports_cell_ids and repair_duplicate_cell_ids:
288-
# Auto-generate cell ids for cells that are missing them.
289-
for cell in nbdict["cells"]:
290-
if "id" not in cell:
291-
# Generate cell ids if any are missing
292-
cell["id"] = generate_corpus_id()
371+
if ref is None:
372+
_normalize(nbdict, version, version_minor, repair_duplicate_cell_ids)
293373

294374
for error in iter_validate(
295375
nbdict,
@@ -299,25 +379,8 @@ def validate(
299379
relax_add_props=relax_add_props,
300380
strip_invalid_metadata=strip_invalid_metadata,
301381
):
302-
raise error
303382

304-
if notebook_supports_cell_ids:
305-
# if we support cell ids check for uniqueness when validating the whole notebook
306-
seen_ids = set()
307-
for cell in nbdict["cells"]:
308-
cell_id = cell["id"]
309-
if cell_id in seen_ids:
310-
if repair_duplicate_cell_ids:
311-
# Best effort to repair if we find a duplicate id
312-
cell["id"] = generate_corpus_id()
313-
get_logger().warning(
314-
"Non-unique cell id '{}' detected. Corrected to '{}'.".format(
315-
cell_id, cell["id"]
316-
)
317-
)
318-
else:
319-
raise ValidationError(f"Non-unique cell id '{cell_id}' detected.")
320-
seen_ids.add(cell_id)
383+
raise error
321384

322385

323386
def iter_validate(

nbformat/warnings.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""
2+
Warnings that can be emitted by nbformat.
3+
"""
4+
5+
6+
class MissingIDFieldWarning(FutureWarning):
7+
"""
8+
9+
This warning is emitted in the validation step of nbformat as we used to
10+
mutate the structure which is cause signature issues.
11+
12+
This will be turned into an error at later point.
13+
14+
We subclass FutureWarning as we will change the behavior in the future.
15+
16+
"""
17+
18+
pass
19+
20+
21+
class DuplicateCellId(FutureWarning):
22+
"""
23+
24+
This warning is emitted in the validation step of nbformat as we used to
25+
mutate the structure which is cause signature issues.
26+
27+
This will be turned into an error at later point.
28+
29+
We subclass FutureWarning as we will change the behavior in the future.
30+
"""
31+
32+
pass

0 commit comments

Comments
 (0)