// Copyright 2014-2021 Ulrich Kunitz. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lzma import ( "errors" "fmt" "io" ) const ( // maximum size of compressed data in a chunk maxCompressed = 1 << 16 // maximum size of uncompressed data in a chunk maxUncompressed = 1 << 21 ) // chunkType represents the type of an LZMA2 chunk. Note that this // value is an internal representation and no actual encoding of a LZMA2 // chunk header. type chunkType byte // Possible values for the chunk type. const ( // end of stream cEOS chunkType = iota // uncompressed; reset dictionary cUD // uncompressed; no reset of dictionary cU // LZMA compressed; no reset cL // LZMA compressed; reset state cLR // LZMA compressed; reset state; new property value cLRN // LZMA compressed; reset state; new property value; reset dictionary cLRND ) // chunkTypeStrings provide a string representation for the chunk types. var chunkTypeStrings = [...]string{ cEOS: "EOS", cU: "U", cUD: "UD", cL: "L", cLR: "LR", cLRN: "LRN", cLRND: "LRND", } // String returns a string representation of the chunk type. func (c chunkType) String() string { if !(cEOS <= c && c <= cLRND) { return "unknown" } return chunkTypeStrings[c] } // Actual encodings for the chunk types in the value. Note that the high // uncompressed size bits are stored in the header byte additionally. const ( hEOS = 0 hUD = 1 hU = 2 hL = 1 << 7 hLR = 1<<7 | 1<<5 hLRN = 1<<7 | 1<<6 hLRND = 1<<7 | 1<<6 | 1<<5 ) // errHeaderByte indicates an unsupported value for the chunk header // byte. These bytes starts the variable-length chunk header. var errHeaderByte = errors.New("lzma: unsupported chunk header byte") // headerChunkType converts the header byte into a chunk type. It // ignores the uncompressed size bits in the chunk header byte. func headerChunkType(h byte) (c chunkType, err error) { if h&hL == 0 { // no compression switch h { case hEOS: c = cEOS case hUD: c = cUD case hU: c = cU default: return 0, errHeaderByte } return } switch h & hLRND { case hL: c = cL case hLR: c = cLR case hLRN: c = cLRN case hLRND: c = cLRND default: return 0, errHeaderByte } return } // uncompressedHeaderLen provides the length of an uncompressed header const uncompressedHeaderLen = 3 // headerLen returns the length of the LZMA2 header for a given chunk // type. func headerLen(c chunkType) int { switch c { case cEOS: return 1 case cU, cUD: return uncompressedHeaderLen case cL, cLR: return 5 case cLRN, cLRND: return 6 } panic(fmt.Errorf("unsupported chunk type %d", c)) } // chunkHeader represents the contents of a chunk header. type chunkHeader struct { ctype chunkType uncompressed uint32 compressed uint16 props Properties } // String returns a string representation of the chunk header. func (h *chunkHeader) String() string { return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed, h.compressed, &h.props) } // UnmarshalBinary reads the content of the chunk header from the data // slice. The slice must have the correct length. func (h *chunkHeader) UnmarshalBinary(data []byte) error { if len(data) == 0 { return errors.New("no data") } c, err := headerChunkType(data[0]) if err != nil { return err } n := headerLen(c) if len(data) < n { return errors.New("incomplete data") } if len(data) > n { return errors.New("invalid data length") } *h = chunkHeader{ctype: c} if c == cEOS { return nil } h.uncompressed = uint32(uint16BE(data[1:3])) if c <= cU { return nil } h.uncompressed |= uint32(data[0]&^hLRND) << 16 h.compressed = uint16BE(data[3:5]) if c <= cLR { return nil } h.props, err = PropertiesForCode(data[5]) return err } // MarshalBinary encodes the chunk header value. The function checks // whether the content of the chunk header is correct. func (h *chunkHeader) MarshalBinary() (data []byte, err error) { if h.ctype > cLRND { return nil, errors.New("invalid chunk type") } if err = h.props.verify(); err != nil { return nil, err } data = make([]byte, headerLen(h.ctype)) switch h.ctype { case cEOS: return data, nil case cUD: data[0] = hUD case cU: data[0] = hU case cL: data[0] = hL case cLR: data[0] = hLR case cLRN: data[0] = hLRN case cLRND: data[0] = hLRND } putUint16BE(data[1:3], uint16(h.uncompressed)) if h.ctype <= cU { return data, nil } data[0] |= byte(h.uncompressed>>16) &^ hLRND putUint16BE(data[3:5], h.compressed) if h.ctype <= cLR { return data, nil } data[5] = h.props.Code() return data, nil } // readChunkHeader reads the chunk header from the IO reader. func readChunkHeader(r io.Reader) (h *chunkHeader, err error) { p := make([]byte, 1, 6) if _, err = io.ReadFull(r, p); err != nil { return } c, err := headerChunkType(p[0]) if err != nil { return } p = p[:headerLen(c)] if _, err = io.ReadFull(r, p[1:]); err != nil { return } h = new(chunkHeader) if err = h.UnmarshalBinary(p); err != nil { return nil, err } return h, nil } // uint16BE converts a big-endian uint16 representation to an uint16 // value. func uint16BE(p []byte) uint16 { return uint16(p[0])<<8 | uint16(p[1]) } // putUint16BE puts the big-endian uint16 presentation into the given // slice. func putUint16BE(p []byte, x uint16) { p[0] = byte(x >> 8) p[1] = byte(x) } // chunkState is used to manage the state of the chunks type chunkState byte // start and stop define the initial and terminating state of the chunk // state const ( start chunkState = 'S' stop chunkState = 'T' ) // errors for the chunk state handling var ( errChunkType = errors.New("lzma: unexpected chunk type") errState = errors.New("lzma: wrong chunk state") ) // next transitions state based on chunk type input func (c *chunkState) next(ctype chunkType) error { switch *c { // start state case 'S': switch ctype { case cEOS: *c = 'T' case cUD: *c = 'R' case cLRND: *c = 'L' default: return errChunkType } // normal LZMA mode case 'L': switch ctype { case cEOS: *c = 'T' case cUD: *c = 'R' case cU: *c = 'U' case cL, cLR, cLRN, cLRND: break default: return errChunkType } // reset required case 'R': switch ctype { case cEOS: *c = 'T' case cUD, cU: break case cLRN, cLRND: *c = 'L' default: return errChunkType } // uncompressed case 'U': switch ctype { case cEOS: *c = 'T' case cUD: *c = 'R' case cU: break case cL, cLR, cLRN, cLRND: *c = 'L' default: return errChunkType } // terminal state case 'T': return errChunkType default: return errState } return nil } // defaultChunkType returns the default chunk type for each chunk state. func (c chunkState) defaultChunkType() chunkType { switch c { case 'S': return cLRND case 'L', 'U': return cL case 'R': return cLRN default: // no error return cEOS } } // maxDictCap defines the maximum dictionary capacity supported by the // LZMA2 dictionary capacity encoding. const maxDictCap = 1<<32 - 1 // maxDictCapCode defines the maximum dictionary capacity code. const maxDictCapCode = 40 // The function decodes the dictionary capacity byte, but doesn't change // for the correct range of the given byte. func decodeDictCap(c byte) int64 { return (2 | int64(c)&1) << (11 + (c>>1)&0x1f) } // DecodeDictCap decodes the encoded dictionary capacity. The function // returns an error if the code is out of range. func DecodeDictCap(c byte) (n int64, err error) { if c >= maxDictCapCode { if c == maxDictCapCode { return maxDictCap, nil } return 0, errors.New("lzma: invalid dictionary size code") } return decodeDictCap(c), nil } // EncodeDictCap encodes a dictionary capacity. The function returns the // code for the capacity that is greater or equal n. If n exceeds the // maximum support dictionary capacity, the maximum value is returned. func EncodeDictCap(n int64) byte { a, b := byte(0), byte(40) for a < b { c := a + (b-a)>>1 m := decodeDictCap(c) if n <= m { if n == m { return c } b = c } else { a = c + 1 } } return a }