@@ -10,6 +10,7 @@ import contextlib
10
10
import datetime
11
11
import gzip
12
12
import hashlib
13
+ import io
13
14
import json
14
15
import lzma
15
16
import multiprocessing
@@ -332,21 +333,41 @@ def gpg_verify_path(path: pathlib.Path, public_key_data: bytes, signature_data:
332
333
subprocess .run (["gpgconf" , "--kill" , "gpg-agent" ], env = env )
333
334
334
335
335
- def open_tar_stream (path : pathlib .Path ):
336
- """"""
337
- if path .suffix == ".bz2" :
338
- return bz2 .open (str (path ), "rb" )
339
- elif path .suffix in (".gz" , ".tgz" ) :
340
- return gzip .open (str (path ), "rb" )
341
- elif path .suffix == ".xz" :
342
- return lzma .open (str (path ), "rb" )
343
- elif path .suffix == ".zst" :
344
- dctx = ZstdDecompressor ()
345
- return dctx .stream_reader (path .open ("rb" ))
346
- elif path .suffix == ".tar" :
347
- return path .open ("rb" )
348
- else :
349
- raise ValueError ("unknown archive format for tar file: %s" % path )
336
+ class ArchiveTypeNotSupported (Exception ):
337
+ def __init__ (self , path : pathlib .Path ):
338
+ super (Exception , self ).__init__ ("Archive type not supported for %s" % path )
339
+
340
+
341
+ def open_stream (path : pathlib .Path ):
342
+ """Attempt to identify a path as an extractable archive by looking at its
343
+ content."""
344
+ fh = path .open (mode = "rb" )
345
+ magic = fh .read (6 )
346
+ fh .seek (0 )
347
+ if magic [:2 ] == b"PK" :
348
+ return "zip" , fh
349
+ if magic [:2 ] == b"\x1f \x8b " :
350
+ fh = gzip .GzipFile (fileobj = fh )
351
+ elif magic [:3 ] == b"BZh" :
352
+ fh = bz2 .BZ2File (fh )
353
+ elif magic == b"\xfd 7zXZ\x00 " :
354
+ fh = lzma .LZMAFile (fh )
355
+ elif magic [:4 ] == b"\x28 \xb5 \x2f \xfd " :
356
+ fh = ZstdDecompressor ().stream_reader (fh )
357
+ fh = io .BufferedReader (fh )
358
+ try :
359
+ # A full tar info header is 512 bytes.
360
+ headers = fh .peek (512 )
361
+ # 257 is the offset of the ustar magic.
362
+ magic = headers [257 : 257 + 8 ]
363
+ # For older unix tar, rely on TarInfo.frombuf's checksum check
364
+ if magic in (b"ustar\x00 00" , b"ustar \x00 " ) or tarfile .TarInfo .frombuf (
365
+ headers [:512 ], tarfile .ENCODING , "surrogateescape"
366
+ ):
367
+ return "tar" , fh
368
+ except Exception as e :
369
+ pass
370
+ raise ArchiveTypeNotSupported (path )
350
371
351
372
352
373
def archive_type (path : pathlib .Path ):
@@ -359,7 +380,7 @@ def archive_type(path: pathlib.Path):
359
380
return None
360
381
361
382
362
- def extract_archive (path , dest_dir , typ ):
383
+ def extract_archive (path , dest_dir ):
363
384
"""Extract an archive to a destination directory."""
364
385
365
386
# Resolve paths to absolute variants.
@@ -371,8 +392,8 @@ def extract_archive(path, dest_dir, typ):
371
392
372
393
# We pipe input to the decompressor program so that we can apply
373
394
# custom decompressors that the program may not know about.
395
+ typ , ifh = open_stream (path )
374
396
if typ == "tar" :
375
- ifh = open_tar_stream (path )
376
397
# On Windows, the tar program doesn't support things like symbolic
377
398
# links, while Windows actually support them. The tarfile module in
378
399
# python does. So use that. But since it's significantly slower than
@@ -419,10 +440,8 @@ def repack_archive(
419
440
):
420
441
assert orig != dest
421
442
log ("Repacking as %s" % dest )
422
- orig_typ = archive_type (orig )
443
+ orig_typ , ifh = open_stream (orig )
423
444
typ = archive_type (dest )
424
- if not orig_typ :
425
- raise Exception ("Archive type not supported for %s" % orig .name )
426
445
if not typ :
427
446
raise Exception ("Archive type not supported for %s" % dest .name )
428
447
@@ -448,7 +467,7 @@ def repack_archive(
448
467
ctx = ZstdCompressor ()
449
468
if orig_typ == "zip" :
450
469
assert typ == "tar"
451
- zip = zipfile .ZipFile (orig )
470
+ zip = zipfile .ZipFile (ifh )
452
471
# Convert the zip stream to a tar on the fly.
453
472
with ctx .stream_writer (fh ) as compressor , tarfile .open (
454
473
fileobj = compressor , mode = "w:"
@@ -490,7 +509,6 @@ def repack_archive(
490
509
raise Exception ("Repacking a tar to zip is not supported" )
491
510
assert typ == "tar"
492
511
493
- ifh = open_tar_stream (orig )
494
512
if filter :
495
513
# To apply the filter, we need to open the tar stream and
496
514
# tweak it.
@@ -533,11 +551,12 @@ def fetch_and_extract(url, dest_dir, extract=True, sha256=None, size=None):
533
551
if not extract :
534
552
return
535
553
536
- typ = archive_type (dest_path )
537
- if typ :
538
- extract_archive (dest_path , dest_dir , typ )
554
+ try :
555
+ extract_archive (dest_path , dest_dir )
539
556
log ("Removing %s" % dest_path )
540
557
dest_path .unlink ()
558
+ except ArchiveTypeNotSupported :
559
+ pass
541
560
542
561
543
562
def fetch_urls (downloads ):
0 commit comments