66// Reader and Writer support the classic LZMA format. Reader2 and
77// Writer2 support the decoding and encoding of LZMA2 streams.
88//
9- // The package is written completely in Go and doesn't rely on any external
9+ // The package is written completely in Go and does not rely on any external
1010// library.
1111package lzma
1212
1313import (
1414 "errors"
15+ "fmt"
1516 "io"
1617)
1718
1819// ReaderConfig stores the parameters for the reader of the classic LZMA
1920// format.
2021type ReaderConfig struct {
22+ // Since v0.5.14 this parameter sets an upper limit for a .lzma file's
23+ // dictionary size. This helps to mitigate problems with mangled
24+ // headers.
2125 DictCap int
2226}
2327
2428// fill converts the zero values of the configuration to the default values.
2529func (c * ReaderConfig ) fill () {
2630 if c .DictCap == 0 {
27- c .DictCap = 8 * 1024 * 1024
31+ // set an upper limit of 2 GB for dictionary capacity to address
32+ // the zero prefix security issue.
33+ c .DictCap = 1 << 31
34+ // original: c.DictCap = 8 * 1024 * 1024
2835 }
2936}
3037
@@ -39,10 +46,33 @@ func (c *ReaderConfig) Verify() error {
3946}
4047
4148// Reader provides a reader for LZMA files or streams.
49+ //
50+ // # Security concerns
51+ //
52+ // Note that LZMA format doesn't support a magic marker in the header. So
53+ // [NewReader] cannot determine whether it reads the actual header. For instance
54+ // the LZMA stream might have a zero byte in front of the reader, leading to
55+ // larger dictionary sizes and file sizes. The code will detect later that there
56+ // are problems with the stream, but the dictionary has already been allocated
57+ // and this might consume a lot of memory.
58+ //
59+ // Version 0.5.14 introduces built-in mitigations:
60+ //
61+ // - The [ReaderConfig] DictCap field is now interpreted as a limit for the
62+ // dictionary size.
63+ // - The default is 2 Gigabytes (2^31 bytes).
64+ // - Users can check with the [Reader.Header] method what the actual values are in
65+ // their LZMA files and set a smaller limit using [ReaderConfig].
66+ // - The dictionary size doesn't exceed the larger of the file size and
67+ // the minimum dictionary size. This is another measure to prevent huge
68+ // memory allocations for the dictionary.
69+ // - The code supports stream sizes only up to a pebibyte (1024^5).
4270type Reader struct {
43- lzma io.Reader
44- h header
45- d * decoder
71+ lzma io.Reader
72+ header Header
73+ // headerOrig stores the original header read from the stream.
74+ headerOrig Header
75+ d * decoder
4676}
4777
4878// NewReader creates a new reader for an LZMA stream using the classic
@@ -51,8 +81,37 @@ func NewReader(lzma io.Reader) (r *Reader, err error) {
5181 return ReaderConfig {}.NewReader (lzma )
5282}
5383
84+ // ErrDictSize reports about an error of the dictionary size.
85+ type ErrDictSize struct {
86+ ConfigDictCap int
87+ HeaderDictSize uint32
88+ Message string
89+ }
90+
91+ // Error returns the error message.
92+ func (e * ErrDictSize ) Error () string {
93+ return e .Message
94+ }
95+
96+ func newErrDictSize (messageformat string ,
97+ configDictCap int , headerDictSize uint32 ,
98+ args ... interface {}) * ErrDictSize {
99+ newArgs := make ([]interface {}, len (args )+ 2 )
100+ newArgs [0 ] = configDictCap
101+ newArgs [1 ] = headerDictSize
102+ copy (newArgs [2 :], args )
103+ return & ErrDictSize {
104+ ConfigDictCap : configDictCap ,
105+ HeaderDictSize : headerDictSize ,
106+ Message : fmt .Sprintf (messageformat , newArgs ... ),
107+ }
108+ }
109+
110+ // We support only files not larger than 1 << 50 bytes (a pebibyte, 1024^5).
111+ const maxStreamSize = 1 << 50
112+
54113// NewReader creates a new reader for an LZMA stream in the classic
55- // format. The function reads and verifies the the header of the LZMA
114+ // format. The function reads and verifies the header of the LZMA
56115// stream.
57116func (c ReaderConfig ) NewReader (lzma io.Reader ) (r * Reader , err error ) {
58117 if err = c .Verify (); err != nil {
@@ -66,29 +125,63 @@ func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) {
66125 return nil , err
67126 }
68127 r = & Reader {lzma : lzma }
69- if err = r .h .unmarshalBinary (data ); err != nil {
128+ if err = r .header .unmarshalBinary (data ); err != nil {
70129 return nil , err
71130 }
72- if r .h .dictCap < MinDictCap {
73- r .h .dictCap = MinDictCap
131+ r .headerOrig = r .header
132+ dictSize := int64 (r .header .DictSize )
133+ if int64 (c .DictCap ) < dictSize {
134+ return nil , newErrDictSize (
135+ "lzma: header dictionary size %[2]d exceeds configured dictionary capacity %[1]d" ,
136+ c .DictCap , uint32 (dictSize ),
137+ )
138+ }
139+ if dictSize < MinDictCap {
140+ dictSize = MinDictCap
141+ }
142+ // original code: disabled this because there is no point in increasing
143+ // the dictionary above what is stated in the file.
144+ /*
145+ if int64(c.DictCap) > int64(dictSize) {
146+ dictSize = int64(c.DictCap)
147+ }
148+ */
149+ size := r .header .Size
150+ if size >= 0 && size < dictSize {
151+ dictSize = size
74152 }
75- dictCap := r .h .dictCap
76- if c .DictCap > dictCap {
77- dictCap = c .DictCap
153+ // Protect against modified or malicious headers.
154+ if size > maxStreamSize {
155+ return nil , fmt .Errorf (
156+ "lzma: stream size %d exceeds a pebibyte (1024^5)" ,
157+ size )
78158 }
159+ if dictSize < MinDictCap {
160+ dictSize = MinDictCap
161+ }
162+
163+ r .header .DictSize = uint32 (dictSize )
79164
80- state := newState (r .h . properties )
81- dict , err := newDecoderDict (dictCap )
165+ state := newState (r .header . Properties )
166+ dict , err := newDecoderDict (int ( dictSize ) )
82167 if err != nil {
83168 return nil , err
84169 }
85- r .d , err = newDecoder (ByteReader (lzma ), state , dict , r .h . size )
170+ r .d , err = newDecoder (ByteReader (lzma ), state , dict , r .header . Size )
86171 if err != nil {
87172 return nil , err
88173 }
89174 return r , nil
90175}
91176
177+ // Header returns the header as read from the LZMA stream. It is intended to
178+ // allow the user to understand what parameters are typically provided in the
179+ // headers of the LZMA files and set the DictCap field in [ReaderConfig]
180+ // accordingly.
181+ func (r * Reader ) Header () (h Header , ok bool ) {
182+ return r .headerOrig , r .d != nil
183+ }
184+
92185// EOSMarker indicates that an EOS marker has been encountered.
93186func (r * Reader ) EOSMarker () bool {
94187 return r .d .eosMarker
0 commit comments