@@ -116,7 +116,7 @@ def __str__(self) -> str:
116
116
"open" ,
117
117
"breakpoint" ,
118
118
}, # Pickle versions 3, 4 have those function under 'builtins'
119
- "aiohttp.client " : "*" ,
119
+ "aiohttp" : "*" ,
120
120
"asyncio" : "*" ,
121
121
"bdb" : "*" ,
122
122
"commands" : "*" , # Python 2 precursor to subprocess
@@ -134,7 +134,6 @@ def __str__(self) -> str:
134
134
"ssl" : "*" , # DNS exfiltration via ssl.get_server_certificate()
135
135
"subprocess" : "*" ,
136
136
"sys" : "*" ,
137
- "asyncio.unix_events" : {"_UnixSubprocessTransport._start" },
138
137
"code" : {"InteractiveInterpreter.runcode" },
139
138
"cProfile" : {"runctx" , "run" },
140
139
"doctest" : {"debug_script" },
@@ -257,6 +256,7 @@ def _list_globals(data: IO[bytes], multiple_pickles=True) -> Set[Tuple[str, str]
257
256
for op in pickletools .genops (data ):
258
257
ops .append (op )
259
258
except Exception as e :
259
+ _log .debug (f"Error parsing pickle: { e } " , exc_info = True )
260
260
parsing_pkl_error = str (e )
261
261
last_byte = data .read (1 )
262
262
data .seek (- 1 , 1 )
@@ -329,6 +329,11 @@ def _build_scan_result_from_raw_globals(
329
329
g = Global (rg [0 ], rg [1 ], SafetyLevel .Dangerous )
330
330
safe_filter = _safe_globals .get (g .module )
331
331
unsafe_filter = _unsafe_globals .get (g .module )
332
+
333
+ # If the module as a whole is marked as dangerous, submodules are also dangerous
334
+ if unsafe_filter is None and "." in g .module and _unsafe_globals .get (g .module .split ("." )[0 ]) == "*" :
335
+ unsafe_filter = "*"
336
+
332
337
if "unknown" in g .module or "unknown" in g .name :
333
338
g .safety = SafetyLevel .Dangerous
334
339
_log .warning ("%s: %s import '%s %s' FOUND" , file_id , g .safety .value , g .module , g .name )
@@ -348,11 +353,12 @@ def _build_scan_result_from_raw_globals(
348
353
349
354
def scan_pickle_bytes (data : IO [bytes ], file_id , multiple_pickles = True ) -> ScanResult :
350
355
"""Disassemble a Pickle stream and report issues"""
356
+ _log .debug (f"scan_pickle_bytes({ file_id } )" )
351
357
352
358
try :
353
359
raw_globals = _list_globals (data , multiple_pickles )
354
360
except GenOpsError as e :
355
- _log .error (f"ERROR: parsing pickle in { file_id } : { e } " )
361
+ _log .error (f"ERROR: parsing pickle in { file_id } : { e } " , exc_info = _log . isEnabledFor ( logging . DEBUG ) )
356
362
if e .globals is not None :
357
363
return _build_scan_result_from_raw_globals (e .globals , file_id , scan_err = True )
358
364
else :
@@ -365,6 +371,8 @@ def scan_pickle_bytes(data: IO[bytes], file_id, multiple_pickles=True) -> ScanRe
365
371
366
372
# XXX: it appears there is not way to get the byte stream for a given file within the 7z archive and thus forcing us to unzip to disk before scanning
367
373
def scan_7z_bytes (data : IO [bytes ], file_id ) -> ScanResult :
374
+ _log .debug (f"scan_7z_bytes({ file_id } )" )
375
+
368
376
try :
369
377
import py7zr
370
378
except ImportError :
@@ -387,6 +395,8 @@ def scan_7z_bytes(data: IO[bytes], file_id) -> ScanResult:
387
395
388
396
389
397
def scan_zip_bytes (data : IO [bytes ], file_id ) -> ScanResult :
398
+ _log .debug (f"scan_zip_bytes({ file_id } )" )
399
+
390
400
result = ScanResult ([])
391
401
392
402
with RelaxedZipFile (data , "r" ) as zip :
@@ -415,6 +425,8 @@ def scan_zip_bytes(data: IO[bytes], file_id) -> ScanResult:
415
425
416
426
417
427
def scan_numpy (data : IO [bytes ], file_id ) -> ScanResult :
428
+ _log .debug (f"scan_numpy({ file_id } )" )
429
+
418
430
# Delay import to avoid dependency on NumPy
419
431
import numpy as np
420
432
@@ -445,6 +457,8 @@ def scan_numpy(data: IO[bytes], file_id) -> ScanResult:
445
457
446
458
447
459
def scan_pytorch (data : IO [bytes ], file_id ) -> ScanResult :
460
+ _log .debug (f"scan_pytorch({ file_id } )" )
461
+
448
462
# new pytorch format
449
463
if _is_zipfile (data ):
450
464
return scan_zip_bytes (data , file_id )
@@ -473,26 +487,34 @@ def scan_pytorch(data: IO[bytes], file_id) -> ScanResult:
473
487
474
488
475
489
def scan_bytes (data : IO [bytes ], file_id , file_ext : Optional [str ] = None ) -> ScanResult :
490
+ _log .debug (f"scan_bytes({ file_id } )" )
491
+
476
492
if file_ext is not None and file_ext in _pytorch_file_extensions :
477
493
try :
478
494
return scan_pytorch (data , file_id )
479
495
except InvalidMagicError as e :
480
- _log .error (f"ERROR: Invalid magic number for file { e } " )
481
- return ScanResult ([], scan_err = True )
482
- elif file_ext is not None and file_ext in _numpy_file_extensions :
496
+ _log .warning (
497
+ f"WARNING: Invalid PyTorch magic number for file { e } . Trying to scan as non-PyTorch file." ,
498
+ exc_info = _log .isEnabledFor (logging .DEBUG ),
499
+ )
500
+ data .seek (0 )
501
+
502
+ if file_ext is not None and file_ext in _numpy_file_extensions :
483
503
return scan_numpy (data , file_id )
504
+
505
+ is_zip = zipfile .is_zipfile (data )
506
+ data .seek (0 )
507
+ if is_zip :
508
+ return scan_zip_bytes (data , file_id )
509
+ elif _is_7z_file (data ):
510
+ return scan_7z_bytes (data , file_id )
484
511
else :
485
- is_zip = zipfile .is_zipfile (data )
486
- data .seek (0 )
487
- if is_zip :
488
- return scan_zip_bytes (data , file_id )
489
- elif _is_7z_file (data ):
490
- return scan_7z_bytes (data , file_id )
491
- else :
492
- return scan_pickle_bytes (data , file_id )
512
+ return scan_pickle_bytes (data , file_id )
493
513
494
514
495
515
def scan_huggingface_model (repo_id ):
516
+ _log .debug (f"scan_huggingface_model({ repo_id } )" )
517
+
496
518
# List model files
497
519
model = json .loads (_http_get (f"https://huggingface.co/api/models/{ repo_id } " ).decode ("utf-8" ))
498
520
file_names = [file_name for file_name in (sibling .get ("rfilename" ) for sibling in model ["siblings" ]) if file_name is not None ]
@@ -512,6 +534,8 @@ def scan_huggingface_model(repo_id):
512
534
513
535
514
536
def scan_directory_path (path ) -> ScanResult :
537
+ _log .debug (f"scan_directory_path({ path } )" )
538
+
515
539
scan_result = ScanResult ([])
516
540
517
541
for base_path , _ , file_names in os .walk (path ):
@@ -532,10 +556,14 @@ def scan_directory_path(path) -> ScanResult:
532
556
533
557
534
558
def scan_file_path (path ) -> ScanResult :
559
+ _log .debug (f"scan_file_path({ path } )" )
560
+
535
561
file_ext = os .path .splitext (path )[1 ]
536
562
with open (path , "rb" ) as file :
537
563
return scan_bytes (file , path , file_ext )
538
564
539
565
540
566
def scan_url (url ) -> ScanResult :
567
+ _log .debug (f"scan_url({ url } )" )
568
+
541
569
return scan_bytes (io .BytesIO (_http_get (url )), url )
0 commit comments