Skip to content

Commit 670efbb

Browse files
Handle .npz Numpy files (#32)
Co-authored-by: Matthieu Maitre <[email protected]>
1 parent b1d7c63 commit 670efbb

File tree

11 files changed

+145
-26
lines changed

11 files changed

+145
-26
lines changed

.gitattributes

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
*.pkl binary
2+
*.pickle binary
3+
*.bin binary
4+
*.pt binary
5+
*.zip binary
6+
*.npy binary
7+
*.npz binary

.vscode/settings.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@
55
"tests"
66
],
77
"python.testing.unittestEnabled": false,
8-
"python.testing.pytestEnabled": true
8+
"python.testing.pytestEnabled": true,
9+
"sarif-viewer.connectToGithubCodeScanning": "off"
910
}

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = picklescan
3-
version = 0.0.17
3+
version = 0.0.18
44
author = Matthieu Maitre
55
author_email = [email protected]
66
description = Security scanner detecting Python Pickle files performing suspicious actions

src/picklescan/scanner.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,13 @@ def __str__(self) -> str:
8282
"IntStorage",
8383
"ByteStorage",
8484
},
85+
"numpy": {
86+
"dtype",
87+
"ndarray",
88+
},
89+
"numpy.core.multiarray": {
90+
"_reconstruct",
91+
},
8592
"torch._utils": {"_rebuild_tensor_v2"},
8693
}
8794

@@ -141,8 +148,7 @@ def __str__(self) -> str:
141148
# https://www.tensorflow.org/api_docs/python/tf/keras/models/load_model
142149
#
143150

144-
# TODO: support .npz files
145-
_numpy_file_extensions = {".npy"}
151+
_numpy_file_extensions = {".npy"} # Note: .npz is handled as zip files
146152
_pytorch_file_extensions = {".bin", ".pt", ".pth", ".ckpt"}
147153
_pickle_file_extensions = {".pkl", ".pickle", ".joblib", ".dat", ".data"}
148154
_zip_file_extensions = {".zip", ".npz"}
@@ -301,10 +307,15 @@ def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult:
301307
file_names = zip.namelist()
302308
_log.debug("Files in archive %s: %s", file_id, file_names)
303309
for file_name in file_names:
304-
if os.path.splitext(file_name)[1] in _pickle_file_extensions:
310+
file_ext = os.path.splitext(file_name)[1]
311+
if file_ext in _pickle_file_extensions:
305312
_log.debug("Scanning file %s in zip archive %s", file_name, file_id)
306313
with zip.open(file_name, "r") as file:
307314
result.merge(scan_pickle_bytes(file, f"{file_id}:{file_name}"))
315+
elif file_ext in _numpy_file_extensions:
316+
_log.debug("Scanning file %s in zip archive %s", file_name, file_id)
317+
with zip.open(file_name, "r") as file:
318+
result.merge(scan_numpy(file, f"{file_id}:{file_name}"))
308319

309320
return result
310321

@@ -323,7 +334,7 @@ def scan_numpy(data: IO[bytes], file_id) -> ScanResult:
323334
data.seek(-min(N, len(magic)), 1) # back-up
324335
if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):
325336
# .npz file
326-
raise NotImplementedError("Scanning of .npz files is not implemented yet")
337+
raise ValueError(f".npz file not handled as zip file: {file_id}")
327338
elif magic == np.lib.format.MAGIC_PREFIX:
328339
# .npy file
329340

tests/data2/int_array.npy

144 Bytes
Binary file not shown.

tests/data2/int_arrays.npz

514 Bytes
Binary file not shown.
387 Bytes
Binary file not shown.
File renamed without changes.

tests/data2/object_arrays.npz

810 Bytes
Binary file not shown.
648 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)