Skip to content

Commit 29d61ea

Browse files
authored
ENH harden Card by adding a flag to allow/disallow insecure pickle loading (#485)
* ENH harden Card by adding a flag to allow/disallow insecure pickle loading * improved typing for `trusted` arg in `Card` * TST updated test after `Card` hardening * ENH better exception handling when trying to load a pickle with allow_pickle=False * TST added tests for Card when pickle is passed and allow_pickle=False * changelog
1 parent 5c45771 commit 29d61ea

File tree

3 files changed

+74
-11
lines changed

3 files changed

+74
-11
lines changed

docs/changes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ skops Changelog
1111

1212
v0.13
1313
-----
14+
- `Card` now requires a new parameter, `allow_pickle`, to call `get_model` with
15+
models that are not `.skops` files. This change is to mitigate security risks
16+
associated with pickles. :pr:`485` by `Io_no`_.
1417

1518
v0.12
1619
-----

skops/card/_model_card.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,9 @@ def __repr__(self) -> str:
227227
return f"{self.__class__.__name__}({nrows}x{ncols})"
228228

229229

230-
def _load_model(model: Any, trusted=False) -> Any:
230+
def _load_model(
231+
model: Any, trusted: Optional[Sequence[str]] = None, allow_pickle: bool = False
232+
) -> Any:
231233
"""Return a model instance.
232234
233235
Loads the model if provided a file path, if already a model instance return
@@ -238,10 +240,14 @@ def _load_model(model: Any, trusted=False) -> Any:
238240
model : pathlib.Path, str, or sklearn estimator
239241
Path/str or the actual model instance. if a Path or str, loads the model.
240242
241-
trusted : bool, default=False
243+
trusted: list of str, default=None
242244
Passed to :func:`skops.io.load` if the model is a file path and it's
243245
a `skops` file.
244246
247+
allow_pickle : bool, default=False
248+
If `True`, allows loading models using `joblib.load`. This may lead to
249+
security issues if the model file is not trustworthy.
250+
245251
Returns
246252
-------
247253
model : object
@@ -255,13 +261,28 @@ def _load_model(model: Any, trusted=False) -> Any:
255261
if not model_path.exists():
256262
raise FileNotFoundError(f"File is not present: {model_path}")
257263

264+
if trusted and allow_pickle:
265+
raise ValueError(
266+
"`allow_pickle` cannot be `True` if `trusted` is not empty. "
267+
"Pickles cannot be trusted or checked for security issues."
268+
)
269+
270+
msg = ""
258271
try:
259272
if zipfile.is_zipfile(model_path):
260273
model = load(model_path, trusted=trusted)
261-
else:
274+
elif allow_pickle:
262275
model = joblib.load(model_path)
276+
else:
277+
msg = (
278+
"Model file is not a skops file, and allow_pickle is set to False. "
279+
"Please set allow_pickle=True to load the model."
280+
"This may lead to security issues if the model file is not trustworthy."
281+
)
282+
raise RuntimeError(msg)
263283
except Exception as ex:
264-
msg = f'An "{type(ex).__name__}" occurred during model loading.'
284+
if not msg:
285+
msg = f'"{type(ex).__name__}" occurred during model loading.'
265286
raise RuntimeError(msg) from ex
266287

267288
return model
@@ -310,10 +331,14 @@ class Card:
310331
not work, e.g. :meth:`Card.add_metrics`, since it's not clear where to
311332
put the metrics when there is no template or a custom template.
312333
313-
trusted: bool, default=False
334+
trusted: list of str, default=None
314335
Passed to :func:`skops.io.load` if the model is a file path and it's
315336
a `skops` file.
316337
338+
allow_pickle: bool, default=False
339+
If `True`, allows loading models using `joblib.load`. This may lead to
340+
security issues if the model file is not trustworthy.
341+
317342
Attributes
318343
----------
319344
model: estimator object
@@ -379,11 +404,13 @@ def __init__(
379404
model_diagram: bool | Literal["auto"] | str = "auto",
380405
template: Literal["skops"] | dict[str, str] | None = "skops",
381406
trusted: Optional[List[str]] = None,
407+
allow_pickle: bool = False,
382408
) -> None:
383409
self.model = model
384410
self.model_format = model_format
385411
self.template = template
386412
self.trusted = trusted
413+
self.allow_pickle = allow_pickle
387414

388415
self._data: dict[str, Section] = {}
389416
self._metrics: dict[str, str | float | int] = {}
@@ -465,7 +492,7 @@ def get_model(self) -> Any:
465492

466493
@cached_property
467494
def _model(self):
468-
model = _load_model(self.model, self.trusted)
495+
model = _load_model(self.model, self.trusted, self.allow_pickle)
469496
return model
470497

471498
def add(self, folded: bool = False, **kwargs: str) -> Self:

skops/card/tests/test_card.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,18 +82,49 @@ def test_load_model(suffix):
8282
_, save_file = save_model_to_file(model0, suffix)
8383
if suffix == ".skops":
8484
untrusted_types = get_untrusted_types(file=save_file)
85+
allow_pickle = False
8586
else:
8687
untrusted_types = None
87-
loaded_model_str = _load_model(save_file, trusted=untrusted_types)
88+
allow_pickle = True
89+
loaded_model_str = _load_model(
90+
save_file, trusted=untrusted_types, allow_pickle=allow_pickle
91+
)
8892
save_file_path = Path(save_file)
89-
loaded_model_path = _load_model(save_file_path, trusted=untrusted_types)
93+
loaded_model_path = _load_model(
94+
save_file_path, trusted=untrusted_types, allow_pickle=allow_pickle
95+
)
9096
loaded_model_instance = _load_model(model0, trusted=untrusted_types)
9197

9298
assert loaded_model_str.param_1 == 10
9399
assert loaded_model_path.param_1 == 10
94100
assert loaded_model_instance.param_1 == 10
95101

96102

103+
@pytest.mark.parametrize("suffix", [".pkl", ".pickle"])
104+
def test_load_model_exception_allow_pickle(suffix):
105+
model0 = MyRegressor(param_1=10)
106+
_, save_file = save_model_to_file(model0, suffix)
107+
108+
with pytest.raises(
109+
RuntimeError,
110+
match=(
111+
"Model file is not a skops file, and allow_pickle is set to False. "
112+
"Please set allow_pickle=True to load the model."
113+
"This may lead to security issues if the model file is not trustworthy."
114+
),
115+
):
116+
_load_model(save_file, trusted=None, allow_pickle=False)
117+
118+
with pytest.raises(
119+
ValueError,
120+
match=(
121+
"`allow_pickle` cannot be `True` if `trusted` is not empty. "
122+
"Pickles cannot be trusted or checked for security issues."
123+
),
124+
):
125+
_load_model(save_file, trusted=[""], allow_pickle=True)
126+
127+
97128
@pytest.fixture
98129
def model_card(model_diagram=True):
99130
model = fit_model()
@@ -166,7 +197,9 @@ def test_model_caching(skops_model_card, iris_skops_file, destination_path):
166197

167198
new_model = MyClassifier(param_1=10)
168199
# mock _load_model, it still loads the model but we can track call count
169-
mock_load_model = mock.Mock(side_effect=load)
200+
mock_load_model = mock.Mock(
201+
side_effect=lambda path, trusted, _: load(path, trusted=trusted)
202+
)
170203
card = Card(iris_skops_file, trusted=[MyClassifier])
171204
with mock.patch("skops.card._model_card._load_model", mock_load_model):
172205
model1 = card.get_model()
@@ -1133,7 +1166,7 @@ def path_to_card(self, path, suffix):
11331166
if suffix == ".skops":
11341167
card = Card(model=path, trusted=get_untrusted_types(file=path))
11351168
else:
1136-
card = Card(model=path)
1169+
card = Card(model=path, allow_pickle=True)
11371170
return card
11381171

11391172
@pytest.mark.parametrize("meth", [repr, str])
@@ -1178,7 +1211,7 @@ def test_load_model_exception(self, meth, suffix):
11781211
os.close(file_handle)
11791212

11801213
with pytest.raises(Exception, match="occurred during model loading."):
1181-
card = Card(file_name)
1214+
card = Card(file_name, allow_pickle=True)
11821215
meth(card)
11831216

11841217
@pytest.mark.parametrize("meth", [repr, str])

0 commit comments

Comments
 (0)