Skip to content

Commit 98f3091

Browse files
Detect UStar tar archives (#308)
* Detect UStar tar archives UStar tar archives have a `magic` header field at byte offset 257 in each entry whose value begins with the string `ustar`. Identify them with the MIME type `application/x-tar`. Also add test cases for a number of UStar-compatible formats, created by GNU tar 1.29 (with `--format=<format-name>`): * `tar.gnu.tar` * `tar.oldgnu.tar` * `tar.posix.tar` * `tar.ustar.tar` as well as `tar.star.tar` (created by star 1.6) and, for completeness, `tar.v7-gnu.tar` (a v7 tar archive created by GNU tar 1.29). Fixes #307.
1 parent 0966e1d commit 98f3091

File tree

8 files changed

+24
-8
lines changed

8 files changed

+24
-8
lines changed

internal/magic/archive.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,21 @@ func CRX(raw []byte, limit uint32) bool {
7474
}
7575

7676
// Tar matches a (t)ape (ar)chive file.
77-
//
78-
// Signature source: https://www.nationalarchives.gov.uk/PRONOM/Format/proFormatSearch.aspx?status=detailReport&id=385&strPageToDisplay=signatures
7977
func Tar(raw []byte, _ uint32) bool {
78+
// The "magic" header field for files in in UStar (POSIX IEEE P1003.1) archives
79+
// has the prefix "ustar". The values of the remaining bytes in this field vary
80+
// by archiver implementation.
81+
if len(raw) >= 512 && bytes.HasPrefix(raw[257:], []byte{0x75, 0x73, 0x74, 0x61, 0x72}) {
82+
return true
83+
}
84+
8085
if len(raw) < 256 {
8186
return false
8287
}
8388

89+
// The older v7 format has no "magic" field, and therefore must be identified
90+
// with heuristics based on legal ranges of values for other header fields:
91+
// https://www.nationalarchives.gov.uk/PRONOM/Format/proFormatSearch.aspx?status=detailReport&id=385&strPageToDisplay=signatures
8492
rules := []struct {
8593
min, max uint8
8694
i int

mimetype_test.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,12 +180,20 @@ var files = map[string]string{
180180
// the timestamps.
181181
"not.srt.txt": "text/plain; charset=utf-8",
182182
// not.srt.2.txt does not specify milliseconds.
183-
"not.srt.2.txt": "text/plain; charset=utf-8",
184-
"svg.1.svg": "image/svg+xml",
185-
"svg.svg": "image/svg+xml",
186-
"swf.swf": "application/x-shockwave-flash",
187-
"tar.tar": "application/x-tar",
188-
"tar.v7.tar": "application/x-tar",
183+
"not.srt.2.txt": "text/plain; charset=utf-8",
184+
"svg.1.svg": "image/svg+xml",
185+
"svg.svg": "image/svg+xml",
186+
"swf.swf": "application/x-shockwave-flash",
187+
"tar.tar": "application/x-tar",
188+
"tar.gnu.tar": "application/x-tar",
189+
"tar.oldgnu.tar": "application/x-tar",
190+
"tar.posix.tar": "application/x-tar",
191+
// tar.star.tar was generated with star 1.6.
192+
"tar.star.tar": "application/x-tar",
193+
"tar.ustar.tar": "application/x-tar",
194+
"tar.v7.tar": "application/x-tar",
195+
// tar.v7-gnu.tar is a v7 tar archive generated with GNU tar 1.29.
196+
"tar.v7-gnu.tar": "application/x-tar",
189197
"tcl.tcl": "text/x-tcl",
190198
"tcx.tcx": "application/vnd.garmin.tcx+xml",
191199
"tiff.tiff": "image/tiff",

testdata/tar.gnu.tar

10 KB
Binary file not shown.

testdata/tar.oldgnu.tar

10 KB
Binary file not shown.

testdata/tar.posix.tar

10 KB
Binary file not shown.

testdata/tar.star.tar

10 KB
Binary file not shown.

testdata/tar.ustar.tar

10 KB
Binary file not shown.

testdata/tar.v7-gnu.tar

10 KB
Binary file not shown.

0 commit comments

Comments
 (0)