- update
continuous-integration/drone/push Build is passing Details

master
李光春 2 years ago
parent b56472cb82
commit d31a69a439

1
.gitignore vendored

@ -5,5 +5,4 @@
.vscode
*.log
gomod.sh
/vendor/
download_test.go

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Arran Walker
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,68 @@
# go7z
A native Go 7z archive reader.
Features:
- Development in early stages.
- Very little tests.
- Medium probability of crashes.
- Medium probability of using all memory.
- Decompresses:
- [LZMA](https://github.com/ulikunitz/xz)
- [LZMA2](https://github.com/ulikunitz/xz)
- Delta
- BCJ2
- bzip2
- deflate
## Usage
Extracting an archive:
```
package main
import (
"io"
"os"
"github.com/saracen/go7z"
)
func main() {
sz, err := go7z.OpenReader("hello.7z")
if err != nil {
panic(err)
}
defer sz.Close()
for {
hdr, err := sz.Next()
if err == io.EOF {
break // End of archive
}
if err != nil {
panic(err)
}
// If empty stream (no contents) and isn't specifically an empty file...
// then it's a directory.
if hdr.IsEmptyStream && !hdr.IsEmptyFile {
if err := os.MkdirAll(hdr.Name, os.ModePerm); err != nil {
panic(err)
}
continue
}
// Create file
f, err := os.Create(hdr.Name)
if err != nil {
panic(err)
}
defer f.Close()
if _, err := io.Copy(f, sz); err != nil {
panic(err)
}
}
}
```

@ -0,0 +1,132 @@
package filters
import (
"bytes"
"crypto/aes"
"crypto/cipher"
"crypto/sha256"
"encoding/binary"
"hash"
"io"
"strings"
"unicode/utf16"
)
var km keyManager
func init() {
km.cache = make(map[string][]byte)
km.hasher = sha256.New()
}
// AESDecrypter is an AES-256 decryptor.
type AESDecrypter struct {
r io.Reader
rbuf bytes.Buffer
cbc cipher.BlockMode
buf [aes.BlockSize]byte
}
type keyManager struct {
hasher hash.Hash
cache map[string][]byte
}
func (km *keyManager) Key(power int, salt []byte, password string) []byte {
var cacheKey strings.Builder
cacheKey.WriteString(password)
cacheKey.Write(salt)
cacheKey.WriteByte(byte(power))
key, ok := km.cache[cacheKey.String()]
if ok {
return key
}
b := bytes.NewBuffer(nil)
for _, p := range utf16.Encode([]rune(password)) {
binary.Write(b, binary.LittleEndian, p)
}
if power == 0x3f {
key = km.stretch(salt, b.Bytes())
} else {
key = km.sha256Stretch(power, salt, b.Bytes())
}
km.cache[cacheKey.String()] = key
return key
}
func (km *keyManager) stretch(salt, password []byte) []byte {
var key [aes.BlockSize]byte
var pos int
for pos = 0; pos < len(salt); pos++ {
key[pos] = salt[pos]
}
for i := 0; i < len(password) && pos < len(key); i++ {
key[pos] = password[i]
pos++
}
for ; pos < len(key); pos++ {
key[pos] = 0
}
return key[:]
}
func (km *keyManager) sha256Stretch(power int, salt, password []byte) []byte {
var temp [8]byte
for round := 0; round < 1<<power; round++ {
km.hasher.Write(salt)
km.hasher.Write(password)
km.hasher.Write(temp[:])
for i := 0; i < 8; i++ {
temp[i]++
if temp[i] != 0 {
break
}
}
}
defer km.hasher.Reset()
return km.hasher.Sum(nil)
}
// NewAESDecrypter returns a new AES-256 decryptor.
func NewAESDecrypter(r io.Reader, power int, salt, iv []byte, password string) (*AESDecrypter, error) {
key := km.Key(power, salt, password)
cb, err := aes.NewCipher(key)
if err != nil {
return nil, err
}
var aesiv [aes.BlockSize]byte
copy(aesiv[:], iv)
return &AESDecrypter{
r: r,
cbc: cipher.NewCBCDecrypter(cb, aesiv[:]),
}, nil
}
func (d *AESDecrypter) Read(p []byte) (int, error) {
for d.rbuf.Len() < len(p) {
_, err := d.r.Read(d.buf[:])
if err != nil {
return 0, err
}
d.cbc.CryptBlocks(d.buf[:], d.buf[:])
_, err = d.rbuf.Write(d.buf[:])
if err != nil {
return 0, err
}
}
n, err := d.rbuf.Read(p)
return n, err
}

@ -0,0 +1,210 @@
package filters
import (
"bufio"
"bytes"
"encoding/binary"
"io"
)
type rangeDecoder struct {
r io.Reader
nrange uint
code uint
}
func newRangeDecoder(r io.Reader) (*rangeDecoder, error) {
rd := &rangeDecoder{
r: r,
nrange: 0xffffffff,
}
for i := 0; i < 5; i++ {
b, err := rd.ReadByte()
if err != nil {
return nil, err
}
rd.code = (rd.code << 8) | uint(b)
}
return rd, nil
}
func (rd *rangeDecoder) ReadByte() (byte, error) {
var b [1]byte
_, err := rd.r.Read(b[:])
return b[0], err
}
const (
numMoveBits = 5
numbitModelTotalBits = 11
bitModelTotal = uint(1) << numbitModelTotalBits
numTopBits = 24
topValue = uint(1 << numTopBits)
)
type statusDecoder struct {
prob uint
}
func newStatusDecoder() *statusDecoder {
return &statusDecoder{prob: bitModelTotal / 2}
}
func (sd *statusDecoder) Decode(decoder *rangeDecoder) (uint, error) {
var err error
var b byte
newBound := (decoder.nrange >> numbitModelTotalBits) * sd.prob
if decoder.code < newBound {
decoder.nrange = newBound
sd.prob += (bitModelTotal - sd.prob) >> numMoveBits
if decoder.nrange < topValue {
if b, err = decoder.ReadByte(); err != nil {
return 0, err
}
decoder.code = (decoder.code << 8) | uint(b)
decoder.nrange <<= 8
}
return 0, nil
}
decoder.nrange -= newBound
decoder.code -= newBound
sd.prob -= sd.prob >> numMoveBits
if decoder.nrange < topValue {
if b, err = decoder.ReadByte(); err != nil {
return 0, err
}
decoder.code = (decoder.code << 8) | uint(b)
decoder.nrange <<= 8
}
return 1, nil
}
// BCJ2Decoder is a BCJ2 decoder.
type BCJ2Decoder struct {
main *bufio.Reader
call io.Reader
jump io.Reader
rangeDecoder *rangeDecoder
statusDecoder []*statusDecoder
written int64
finished bool
prevByte byte
buf *bytes.Buffer
}
// NewBCJ2Decoder returns a new BCJ2 decoder.
func NewBCJ2Decoder(main, call, jump, rangedecoder io.Reader, limit int64) (*BCJ2Decoder, error) {
rd, err := newRangeDecoder(rangedecoder)
if err != nil {
return nil, err
}
decoder := &BCJ2Decoder{
main: bufio.NewReader(main),
call: call,
jump: jump,
rangeDecoder: rd,
statusDecoder: make([]*statusDecoder, 256+2),
buf: new(bytes.Buffer),
}
decoder.buf.Grow(1 << 16)
for i := range decoder.statusDecoder {
decoder.statusDecoder[i] = newStatusDecoder()
}
return decoder, nil
}
func (d *BCJ2Decoder) isJcc(b0, b1 byte) bool {
return b0 == 0x0f && (b1&0xf0) == 0x80
}
func (d *BCJ2Decoder) isJ(b0, b1 byte) bool {
return (b1&0xfe) == 0xe8 || d.isJcc(b0, b1)
}
func (d *BCJ2Decoder) index(b0, b1 byte) int {
switch b1 {
case 0xe8:
return int(b0)
case 0xe9:
return 256
}
return 257
}
func (d *BCJ2Decoder) Read(p []byte) (int, error) {
err := d.read()
if err != nil && err != io.EOF {
return 0, err
}
return d.buf.Read(p)
}
func (d *BCJ2Decoder) read() error {
b := byte(0)
var err error
for i := 0; i < d.buf.Cap(); i++ {
b, err = d.main.ReadByte()
if err != nil {
return err
}
d.written++
if err = d.buf.WriteByte(b); err != nil {
return err
}
if d.isJ(d.prevByte, b) {
break
}
d.prevByte = b
}
if d.buf.Len() == d.buf.Cap() {
return nil
}
bit, err := d.statusDecoder[d.index(d.prevByte, b)].Decode(d.rangeDecoder)
if err != nil {
return err
}
if bit == 1 {
var r io.Reader
if b == 0xe8 {
r = d.call
} else {
r = d.jump
}
var dest uint32
if err = binary.Read(r, binary.BigEndian, &dest); err != nil {
return err
}
dest -= uint32(d.written + 4)
if err = binary.Write(d.buf, binary.LittleEndian, dest); err != nil {
return err
}
d.prevByte = byte(dest >> 24)
d.written += 4
} else {
d.prevByte = b
}
return nil
}

@ -0,0 +1,45 @@
package filters
import "io"
const deltaStateSize = 256
// DeltaDecoder is a Delta decoder.
type DeltaDecoder struct {
state [deltaStateSize]byte
r io.Reader
delta uint
}
// NewDeltaDecoder returns a new Delta decoder.
func NewDeltaDecoder(r io.Reader, delta uint, limit int64) (*DeltaDecoder, error) {
return &DeltaDecoder{r: r, delta: delta}, nil
}
func (d *DeltaDecoder) Read(p []byte) (int, error) {
n, err := d.r.Read(p)
if err != nil {
return n, err
}
var buf [deltaStateSize]byte
copy(buf[:], d.state[:d.delta])
var i, j uint
for i = 0; i < uint(n); {
for j = 0; j < d.delta && i < uint(n); i++ {
p[i] = buf[j] + p[i]
buf[j] = p[i]
j++
}
}
if j == d.delta {
j = 0
}
copy(d.state[:], buf[j:d.delta])
copy(d.state[d.delta-j:], buf[:j])
return n, err
}

@ -0,0 +1,32 @@
// +build gofuzz
package go7z
import (
"bytes"
"io"
"io/ioutil"
)
func Fuzz(data []byte) int {
sz := new(Reader)
if err := sz.init(bytes.NewReader(data), int64(len(data)), true); err != nil {
return 0
}
for {
_, err := sz.Next()
if err == io.EOF {
return 0
}
if err != nil {
return 0
}
if _, err = io.Copy(ioutil.Discard, sz); err != nil {
return 0
}
}
return 1
}

@ -0,0 +1,25 @@
package headers
import (
"encoding/binary"
"io"
)
// ReadDigests reads an array of uint32 CRCs.
func ReadDigests(r io.Reader, length int) ([]uint32, error) {
defined, _, err := ReadOptionalBoolVector(r, length)
if err != nil {
return nil, err
}
crcs := make([]uint32, length)
for i := range defined {
if defined[i] {
if err := binary.Read(r, binary.LittleEndian, &crcs[i]); err != nil {
return nil, err
}
}
}
return crcs, nil
}

@ -0,0 +1,159 @@
package headers
import (
"encoding/binary"
"errors"
"io"
"time"
"unicode/utf16"
)
// ErrInvalidFileCount is returned when the file count read from the stream
// exceeds the caller supplied maxFileCount.
var ErrInvalidFileCount = errors.New("invalid file count")
// FileInfo is a structure containing the information of an archived file.
type FileInfo struct {
Name string
Attrib uint32
IsEmptyStream bool
IsEmptyFile bool
// Flag indicating a file should be removed upon extraction.
IsAntiFile bool
CreatedAt time.Time
AccessedAt time.Time
ModifiedAt time.Time
}
// ReadFilesInfo reads the files info structure.
func ReadFilesInfo(r io.Reader, maxFileCount int) ([]*FileInfo, error) {
numFiles, err := ReadNumberInt(r)
if err != nil {
return nil, err
}
if numFiles > maxFileCount {
return nil, ErrInvalidFileCount
}
fileInfo := make([]*FileInfo, numFiles)
for i := range fileInfo {
fileInfo[i] = &FileInfo{}
}
var numEmptyStreams int
for {
id, err := ReadByte(r)
if err != nil {
return nil, err
}
if id == k7zEnd {
return fileInfo, nil
}
size, err := ReadNumber(r)
if err != nil {
return nil, err
}
switch id {
case k7zEmptyStream:
var emptyStreams []bool
emptyStreams, numEmptyStreams, err = ReadBoolVector(r, numFiles)
if err != nil {
return nil, err
}
for i, fi := range fileInfo {
fi.IsEmptyStream = emptyStreams[i]
}
case k7zEmptyFile, k7zAnti:
files, _, err := ReadBoolVector(r, numEmptyStreams)
if err != nil {
return nil, err
}
idx := 0
for _, fi := range fileInfo {
if fi.IsEmptyStream {
switch id {
case k7zEmptyFile:
fi.IsEmptyFile = files[idx]
case k7zAnti:
fi.IsAntiFile = files[idx]
}
idx++
}
}
case k7zStartPos:
return nil, ErrUnexpectedPropertyID
case k7zCTime, k7zATime, k7zMTime:
times, err := ReadDateTimeVector(r, numFiles)
if err != nil {
return nil, err
}
for i, fi := range fileInfo {
switch id {
case k7zCTime:
fi.CreatedAt = times[i]
case k7zATime:
fi.AccessedAt = times[i]
case k7zMTime:
fi.ModifiedAt = times[i]
}
}
case k7zName:
external, err := ReadByte(r)
if err != nil {
return nil, err
}
switch external {
case 0:
for _, fi := range fileInfo {
var rune uint16
var name []uint16
for {
if err = binary.Read(r, binary.LittleEndian, &rune); err != nil {
return nil, err
}
if rune == 0 {
break
}
name = append(name, rune)
}
fi.Name = string(utf16.Decode(name))
}
default:
return nil, ErrAdditionalStreamsNotImplemented
}
case k7zWinAttributes:
attributes, err := ReadAttributeVector(r, numFiles)
if err != nil {
return nil, err
}
for i, fi := range fileInfo {
fi.Attrib = attributes[i]
}
case k7zDummy:
for i := uint64(0); i < size; i++ {
if _, err = ReadByte(r); err != nil {
return nil, err
}
}
default:
return nil, ErrUnexpectedPropertyID
}
}
}

@ -0,0 +1,240 @@
package headers
import (
"errors"
"io"
)
const (
// MaxInOutStreams is the maximum allowed stream inputs/outputs into/out
// of a coder.
MaxInOutStreams = 4
// MaxPropertyDataSize is the size in bytes supported for coder property data.
MaxPropertyDataSize = 128
// MaxCodersInFolder is the maximum number of coders allowed to be
// specified in a folder.
MaxCodersInFolder = 4
// MaxPackedStreamsInFolder is the maximum number of packed streams allowed
// to be in a folder.
MaxPackedStreamsInFolder = 4
)
var (
// ErrInvalidStreamCount is the error returned when the input/output stream
// count for a coder is <= 0 || > MaxInOutStreams.
ErrInvalidStreamCount = errors.New("invalid in/out stream count")
// ErrInvalidPropertyDataSize is the error returned when the property data
// size is <= 0 || > MaxInOutStreams.
ErrInvalidPropertyDataSize = errors.New("invalid property data size")
// ErrInvalidCoderInFolderCount is the error returned when the number of
// coders in a folder is <= 0 || > MaxCodersInFolder.
ErrInvalidCoderInFolderCount = errors.New("invalid coder in folder count")
// ErrInvalidPackedStreamsCount is the error returned when the number of
// packed streams exceeds MaxPackedStreamsInFolder
ErrInvalidPackedStreamsCount = errors.New("invalid packed streams count")
)
// Folder is a structure containing information on how a solid block was
// constructed.
type Folder struct {
CoderInfo []*CoderInfo
BindPairsInfo []*BindPairsInfo
PackedIndices []int
UnpackSizes []uint64
UnpackCRC uint32
}
// NumInStreamsTotal is the sum of inputs required by all codecs.
func (f *Folder) NumInStreamsTotal() int {
var count int
for i := range f.CoderInfo {
count += f.CoderInfo[i].NumInStreams
}
return count
}
// NumOutStreamsTotal is the sum of outputs required by all codecs.
func (f *Folder) NumOutStreamsTotal() int {
var count int
for i := range f.CoderInfo {
count += f.CoderInfo[i].NumOutStreams
}
return count
}
// FindBindPairForInStream returns the index of a bindpair by an in index.
func (f *Folder) FindBindPairForInStream(inStreamIndex int) int {
for i := range f.BindPairsInfo {
if f.BindPairsInfo[i].InIndex == inStreamIndex {
return i
}
}
return -1
}
// FindBindPairForOutStream returns the index of a bindpair by an out index.
func (f *Folder) FindBindPairForOutStream(outStreamIndex int) int {
for i := range f.BindPairsInfo {
if f.BindPairsInfo[i].OutIndex == outStreamIndex {
return i
}
}
return -1
}
// UnpackSize returns the final unpacked size of the folder.
func (f *Folder) UnpackSize() uint64 {
for i := range f.UnpackSizes {
if f.FindBindPairForOutStream(i) < 0 {
return f.UnpackSizes[i]
}
}
return 0
}
// ReadFolder reads a folder structure.
func ReadFolder(r io.Reader) (*Folder, error) {
var err error
folder := &Folder{}
numCoders, err := ReadNumberInt(r)
if err != nil {
return nil, err
}
if numCoders == 0 || numCoders > MaxCodersInFolder {
return nil, ErrInvalidCoderInFolderCount
}
folder.CoderInfo = make([]*CoderInfo, numCoders)
for i := range folder.CoderInfo {
if folder.CoderInfo[i], err = ReadCoderInfo(r); err != nil {
return nil, err
}
}
folder.BindPairsInfo = make([]*BindPairsInfo, numCoders-1)
for i := range folder.BindPairsInfo {
if folder.BindPairsInfo[i], err = ReadBindPairsInfo(r); err != nil {
return nil, err
}
}
numInStreamsTotal := folder.NumInStreamsTotal()
numPackedStreams := numInStreamsTotal - len(folder.BindPairsInfo)
if numPackedStreams > 1 {
if numPackedStreams > MaxPackedStreamsInFolder {
return nil, ErrInvalidPackedStreamsCount
}
folder.PackedIndices = make([]int, numPackedStreams)
for i := range folder.PackedIndices {
if folder.PackedIndices[i], err = ReadNumberInt(r); err != nil {
return nil, err
}
}
} else if numPackedStreams == 1 {
for i := 0; i < numInStreamsTotal; i++ {
if folder.FindBindPairForInStream(i) < 0 {
folder.PackedIndices = []int{i}
break
}
}
}
return folder, nil
}
// CoderInfo is a structure holding information about a codec.
type CoderInfo struct {
CodecID uint32
Properties []byte
NumInStreams int
NumOutStreams int
}
// ReadCoderInfo reads a coder info structure.
func ReadCoderInfo(r io.Reader) (*CoderInfo, error) {
attributes, err := ReadByte(r)
if err != nil {
return nil, err
}
coderInfo := &CoderInfo{}
codecIDSize := attributes & 0x0f
isComplexCoder := attributes&0x10 > 0
hasAttributes := attributes&0x20 > 0
if codecIDSize > 0 {
b := make([]byte, codecIDSize)
if _, err = r.Read(b); err != nil {
return nil, err
}
for i := codecIDSize; i > 0; i-- {
coderInfo.CodecID |= uint32(b[i-1]) << ((codecIDSize - i) * 8)
}
}
coderInfo.NumInStreams = 1
coderInfo.NumOutStreams = 1
if isComplexCoder {
if coderInfo.NumInStreams, err = ReadNumberInt(r); err != nil {
return nil, err
}
if coderInfo.NumInStreams == 0 || coderInfo.NumInStreams > MaxInOutStreams {
return nil, ErrInvalidStreamCount
}
if coderInfo.NumOutStreams, err = ReadNumberInt(r); err != nil {
return nil, err
}
if coderInfo.NumOutStreams == 0 || coderInfo.NumOutStreams > MaxInOutStreams {
return nil, ErrInvalidStreamCount
}
}
if hasAttributes {
size, err := ReadNumberInt(r)
if err != nil {
return nil, err
}
if size <= 0 || size > MaxPropertyDataSize {
return nil, ErrInvalidPropertyDataSize
}
coderInfo.Properties = make([]byte, size)
if _, err = r.Read(coderInfo.Properties); err != nil {
return nil, err
}
}
return coderInfo, nil
}
// BindPairsInfo is a structure that binds the in and out indexes of a codec.
type BindPairsInfo struct {
InIndex int
OutIndex int
}
// ReadBindPairsInfo reads a bindpairs info structure.
func ReadBindPairsInfo(r io.Reader) (*BindPairsInfo, error) {
bindPairsInfo := &BindPairsInfo{}
var err error
if bindPairsInfo.InIndex, err = ReadNumberInt(r); err != nil {
return nil, err
}
if bindPairsInfo.OutIndex, err = ReadNumberInt(r); err != nil {
return nil, err
}
return bindPairsInfo, nil
}

@ -0,0 +1,152 @@
package headers
import (
"bytes"
"encoding/binary"
"errors"
"hash/crc32"
"io"
)
const (
// SignatureHeader size is the size of the signature header.
SignatureHeaderSize = 32
// MaxHeaderSize is the maximum header size.
MaxHeaderSize = int64(1 << 62) // 4 exbibyte
)
var (
// MagicBytes is the magic bytes used in the 7z signature.
MagicBytes = [6]byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C}
// ErrInvalidSignatureHeader is returned when signature header is invalid.
ErrInvalidSignatureHeader = errors.New("invalid signature header")
)
// SignatureHeader is the structure found at the top of 7z files.
type SignatureHeader struct {
Signature [6]byte
ArchiveVersion struct {
Major byte
Minor byte
}
StartHeaderCRC uint32
StartHeader struct {
NextHeaderOffset int64
NextHeaderSize int64
NextHeaderCRC uint32
}
}
// ReadSignatureHeader reads the signature header.
func ReadSignatureHeader(r io.Reader) (*SignatureHeader, error) {
var raw [SignatureHeaderSize]byte
_, err := r.Read(raw[:])
if err != nil {
return nil, err
}
var header SignatureHeader
copy(header.Signature[:], raw[:6])
if bytes.Compare(header.Signature[:], MagicBytes[:]) != 0 {
return nil, ErrInvalidSignatureHeader
}
header.ArchiveVersion.Major = raw[6]
header.ArchiveVersion.Minor = raw[7]
header.StartHeaderCRC = binary.LittleEndian.Uint32(raw[8:])
header.StartHeader.NextHeaderOffset = int64(binary.LittleEndian.Uint64(raw[12:]))
header.StartHeader.NextHeaderSize = int64(binary.LittleEndian.Uint64(raw[20:]))
header.StartHeader.NextHeaderCRC = binary.LittleEndian.Uint32(raw[28:])
if header.StartHeader.NextHeaderSize < 0 || header.StartHeader.NextHeaderSize > MaxHeaderSize {
return &header, ErrInvalidSignatureHeader
}
if crc32.ChecksumIEEE(raw[12:]) != header.StartHeaderCRC {
err = ErrChecksumMismatch
}
return &header, err
}
// Header is structure containing file and stream information.
type Header struct {
MainStreamsInfo *StreamsInfo
FilesInfo []*FileInfo
}
// ReadPackedStreamsForHeaders reads either a header or encoded header structure.
func ReadPackedStreamsForHeaders(r *io.LimitedReader) (header *Header, encodedHeader *StreamsInfo, err error) {
id, err := ReadByte(r)
if err != nil {
return nil, nil, err
}
switch id {
case k7zHeader:
if header, err = ReadHeader(r); err != nil && err != io.EOF {
return nil, nil, err
}
case k7zEncodedHeader:
if encodedHeader, err = ReadStreamsInfo(r); err != nil {
return nil, nil, err
}
case k7zEnd:
if header == nil && encodedHeader == nil {
return nil, nil, ErrUnexpectedPropertyID
}
break
default:
return nil, nil, ErrUnexpectedPropertyID
}
return header, encodedHeader, nil
}
// ReadHeader reads a header structure.
func ReadHeader(r *io.LimitedReader) (*Header, error) {
header := &Header{}
for {
id, err := ReadByte(r)
if err != nil {
return nil, err
}
switch id {
case k7zArchiveProperties:
return nil, ErrArchivePropertiesNotImplemented
case k7zAdditionalStreamsInfo:
return nil, ErrAdditionalStreamsNotImplemented
case k7zMainStreamsInfo:
if header.MainStreamsInfo, err = ReadStreamsInfo(r); err != nil {
return nil, err
}
case k7zFilesInfo:
// Limit the maximum amount of FileInfos that get allocated to size
// of the remaining header / 3
if header.FilesInfo, err = ReadFilesInfo(r, int(r.N)/3); err != nil {
return nil, err
}
case k7zEnd:
if header.MainStreamsInfo == nil {
return nil, ErrUnexpectedPropertyID
}
return header, nil
default:
return nil, ErrUnexpectedPropertyID
}
}
}

@ -0,0 +1,51 @@
package headers
import "io"
// PackInfo contains the pack stream sizes of the folders.
type PackInfo struct {
PackPos uint64
PackSizes []uint64
}
// ReadPackInfo reads a pack info structure.
func ReadPackInfo(r io.Reader) (*PackInfo, error) {
packInfo := &PackInfo{}
var err error
if packInfo.PackPos, err = ReadNumber(r); err != nil {
return nil, err
}
numPackStreams, err := ReadNumberInt(r)
if err != nil {
return nil, err
}
for {
id, err := ReadByte(r)
if err != nil {
return nil, err
}
switch id {
case k7zSize:
packInfo.PackSizes = make([]uint64, numPackStreams+1)
for i := 0; i < numPackStreams; i++ {
packInfo.PackSizes[i], err = ReadNumber(r)
if err != nil {
return nil, err
}
}
case k7zCRC:
return nil, ErrPackInfoCRCsNotImplemented
case k7zEnd:
return packInfo, nil
default:
return nil, ErrUnexpectedPropertyID
}
}
}

@ -0,0 +1,262 @@
package headers
import (
"encoding/binary"
"errors"
"io"
"time"
)
const (
k7zEnd = iota
k7zHeader
k7zArchiveProperties
k7zAdditionalStreamsInfo
k7zMainStreamsInfo
k7zFilesInfo
k7zPackInfo
k7zUnpackInfo
k7zSubStreamsInfo
k7zSize
k7zCRC
k7zFolder
k7zCodersUnpackSize
k7zNumUnpackStream
k7zEmptyStream
k7zEmptyFile
k7zAnti
k7zName
k7zCTime
k7zATime
k7zMTime
k7zWinAttributes
k7zComment
k7zEncodedHeader
k7zStartPos
k7zDummy
)
const MaxNumber = 0x7FFFFFFF
var (
// ErrUnexpectedPropertyID is returned when we read a property id that was
// either unexpected, or we don't support.
ErrUnexpectedPropertyID = errors.New("unexpected property id")
// ErrAdditionalStreamsNotImplemented is returned for archives using
// additional streams. These were apparently used in older versions of 7zip.
ErrAdditionalStreamsNotImplemented = errors.New("additional streams are not implemented")
// ErrArchivePropertiesNotImplemented is returned if archive properties
// structure is found. So far, this hasn't been used in any verison of 7zip.
ErrArchivePropertiesNotImplemented = errors.New("archive properties are not implemented")
// ErrChecksumMismatch is returned when a CRC check fails.
ErrChecksumMismatch = errors.New("checksum mismatch")
// ErrPackInfoCRCsNotImplemented is returned if a CRC property id is
// encountered whilst reading packinfo.
ErrPackInfoCRCsNotImplemented = errors.New("packinfo crcs are not implemented")
// ErrInvalidNumber is returned when a number read exceeds 0x7FFFFFFF
ErrInvalidNumber = errors.New("invalid number")
)
// ReadByte reads a single byte.
func ReadByte(r io.Reader) (byte, error) {
var val [1]byte
_, err := r.Read(val[:])
return val[0], err
}
// ReadByteExpect reads a byte to be expected, errors if unexpected.
func ReadByteExpect(r io.Reader, val byte) error {
value, err := ReadByte(r)
if err != nil {
return err
}
if value != val {
return ErrUnexpectedPropertyID
}
return nil
}
// ReadNumber reads a 7z encoded uint64.
func ReadNumber(r io.Reader) (uint64, error) {
first, err := ReadByte(r)
if err != nil {
return 0, err
}
var value uint64
mask := byte(0x80)
for i := uint64(0); i < 8; i++ {
if first&mask == 0 {
hp := uint64(first) & (uint64(mask) - 1)
value += hp << (i * 8)
return value, nil
}
val, err := ReadByte(r)
if err != nil {
return 0, err
}
value |= uint64(val) << (8 * i)
mask >>= 1
}
return value, nil
}
// ReadNumberInt is the same as ReadNumber, but cast to int.
func ReadNumberInt(r io.Reader) (int, error) {
u64, err := ReadNumber(r)
if u64 > MaxNumber {
return 0, ErrInvalidNumber
}
return int(u64), err
}
// ReadUint32 reads a uint32.
func ReadUint32(r io.Reader) (uint32, error) {
var v uint32
return v, binary.Read(r, binary.LittleEndian, &v)
}
// ReadUint64 reads a uint64.
func ReadUint64(r io.Reader) (uint64, error) {
var v uint64
return v, binary.Read(r, binary.LittleEndian, &v)
}
// ReadBoolVector reads a vector of boolean values.
func ReadBoolVector(r io.Reader, length int) ([]bool, int, error) {
var b byte
var mask byte
var err error
v := make([]bool, length)
count := 0
for i := range v {
if mask == 0 {
b, err = ReadByte(r)
if err != nil {
return nil, 0, err
}
mask = 0x80
}
v[i] = (b & mask) != 0
mask >>= 1
if v[i] {
count++
}
}
return v, count, nil
}
// ReadOptionalBoolVector reads a vector of boolean values if they're available,
// otherwise it returns an array of booleans all being true.
func ReadOptionalBoolVector(r io.Reader, length int) ([]bool, int, error) {
allDefined, err := ReadByte(r)
if err != nil {
return nil, 0, err
}
if allDefined == 0 {
return ReadBoolVector(r, length)
}
defined := make([]bool, length)
for i := range defined {
defined[i] = true
}
return defined, length, nil
}
// ReadNumberVector returns a vector of 7z encoded int64s.
func ReadNumberVector(r io.Reader, numFiles int) ([]*int64, error) {
defined, _, err := ReadOptionalBoolVector(r, numFiles)
if err != nil {
return nil, err
}
external, err := ReadByte(r)
if err != nil {
return nil, err
}
if external != 0 {
return nil, ErrAdditionalStreamsNotImplemented
}
numbers := make([]*int64, numFiles)
for i := 0; i < numFiles; i++ {
if defined[i] {
num, err := ReadUint64(r)
if err != nil {
return nil, err
}
val := int64(num)
numbers[i] = &val
} else {
numbers[i] = nil
}
}
return numbers, err
}
// ReadDateTimeVector reads a vector of datetime values.
func ReadDateTimeVector(r io.Reader, numFiles int) ([]time.Time, error) {
timestamps, err := ReadNumberVector(r, numFiles)
if err != nil {
return nil, err
}
times := make([]time.Time, len(timestamps))
for i := range times {
if timestamps[i] != nil {
nsec := *timestamps[i]
nsec -= 116444736000000000
nsec *= 100
times[i] = time.Unix(0, nsec)
}
}
return times, nil
}
// ReadAttributeVector reads a vector of uint32s.
func ReadAttributeVector(r io.Reader, numFiles int) ([]uint32, error) {
defined, _, err := ReadOptionalBoolVector(r, numFiles)
if err != nil {
return nil, err
}
external, err := ReadByte(r)
if err != nil {
return nil, err
}
if external != 0 {
return nil, ErrAdditionalStreamsNotImplemented
}
attributes := make([]uint32, numFiles)
for i := range attributes {
if defined[i] {
val, err := ReadUint32(r)
if err != nil {
return nil, err
}
attributes[i] = val
}
}
return attributes, nil
}

@ -0,0 +1,143 @@
package headers
import (
"io"
)
// StreamsInfo is a top-level structure of the 7z format.
type StreamsInfo struct {
PackInfo *PackInfo
UnpackInfo *UnpackInfo
SubStreamsInfo *SubStreamsInfo
}
// ReadStreamsInfo reads the streams info structure.
func ReadStreamsInfo(r io.Reader) (*StreamsInfo, error) {
streamsInfo := &StreamsInfo{}
for {
id, err := ReadByte(r)
if err != nil {
return nil, err
}
switch id {
case k7zPackInfo:
if streamsInfo.PackInfo, err = ReadPackInfo(r); err != nil {
return nil, err
}
case k7zUnpackInfo:
if streamsInfo.UnpackInfo, err = ReadUnpackInfo(r); err != nil {
return nil, err
}
case k7zSubStreamsInfo:
if streamsInfo.UnpackInfo == nil {
return nil, ErrUnexpectedPropertyID
}
if streamsInfo.SubStreamsInfo, err = ReadSubStreamsInfo(r, streamsInfo.UnpackInfo); err != nil {
return nil, err
}
case k7zEnd:
if streamsInfo.PackInfo == nil || streamsInfo.UnpackInfo == nil {
return nil, ErrUnexpectedPropertyID
}
return streamsInfo, nil
default:
return nil, ErrUnexpectedPropertyID
}
}
}
// SubStreamsInfo is a structure found within the StreamsInfo structure.
type SubStreamsInfo struct {
NumUnpackStreamsInFolders []int
UnpackSizes []uint64
Digests []uint32
}
// ReadSubStreamsInfo reads the substreams info structure.
func ReadSubStreamsInfo(r io.Reader, unpackInfo *UnpackInfo) (*SubStreamsInfo, error) {
id, err := ReadByte(r)
if err != nil {
return nil, err
}
subStreamInfo := &SubStreamsInfo{}
subStreamInfo.NumUnpackStreamsInFolders = make([]int, len(unpackInfo.Folders))
for i := range subStreamInfo.NumUnpackStreamsInFolders {
subStreamInfo.NumUnpackStreamsInFolders[i] = 1
}
if id == k7zNumUnpackStream {
for i := range subStreamInfo.NumUnpackStreamsInFolders {
if subStreamInfo.NumUnpackStreamsInFolders[i], err = ReadNumberInt(r); err != nil {
return nil, err
}
}
id, err = ReadByte(r)
if err != nil {
return nil, err
}
}
for i := range unpackInfo.Folders {
if subStreamInfo.NumUnpackStreamsInFolders[i] == 0 {
continue
}
var sum uint64
if id == k7zSize {
for j := 1; j < subStreamInfo.NumUnpackStreamsInFolders[i]; j++ {
size, err := ReadNumber(r)
if err != nil {
return nil, err
}
sum += size
subStreamInfo.UnpackSizes = append(subStreamInfo.UnpackSizes, size)
}
}
subStreamInfo.UnpackSizes = append(subStreamInfo.UnpackSizes, unpackInfo.Folders[i].UnpackSize()-uint64(sum))
}
if id == k7zSize {
id, err = ReadByte(r)
if err != nil {
return nil, err
}
}
numDigests := 0
for i := range unpackInfo.Folders {
numSubStreams := subStreamInfo.NumUnpackStreamsInFolders[i]
if numSubStreams > 1 || unpackInfo.Folders[i].UnpackCRC == 0 {
numDigests += int(numSubStreams)
}
}
if id == k7zCRC {
subStreamInfo.Digests, err = ReadDigests(r, numDigests)
if err != nil {
return nil, err
}
id, err = ReadByte(r)
if err != nil {
return nil, err
}
}
if id != k7zEnd {
return nil, ErrUnexpectedPropertyID
}
return subStreamInfo, nil
}

@ -0,0 +1,89 @@
package headers
import (
"errors"
"io"
)
const MaxFolderCount = 1 << 30
// ErrInvalidCountExceeded is returned when the folder count is
// < 0 || > MaxFolderCount
var ErrInvalidCountExceeded = errors.New("invalid folder count")
// UnpackInfo is a structure containing folders.
type UnpackInfo struct {
Folders []*Folder
}
// ReadUnpackInfo reads unpack info structures.
func ReadUnpackInfo(r io.Reader) (*UnpackInfo, error) {
err := ReadByteExpect(r, k7zFolder)
if err != nil {
return nil, err
}
numFolders, err := ReadNumberInt(r)
if err != nil {
return nil, err
}
if numFolders > MaxFolderCount {
return nil, ErrInvalidCountExceeded
}
unpackInfo := &UnpackInfo{}
external, err := ReadByte(r)
if err != nil {
return nil, err
}
switch external {
case 0:
unpackInfo.Folders = make([]*Folder, numFolders)
for i := range unpackInfo.Folders {
if unpackInfo.Folders[i], err = ReadFolder(r); err != nil {
return nil, err
}
}
default:
return nil, ErrAdditionalStreamsNotImplemented
}
if err = ReadByteExpect(r, k7zCodersUnpackSize); err != nil {
return nil, err
}
for _, folder := range unpackInfo.Folders {
folder.UnpackSizes = make([]uint64, folder.NumOutStreamsTotal())
for i := range folder.UnpackSizes {
if folder.UnpackSizes[i], err = ReadNumber(r); err != nil {
return nil, err
}
}
}
id, err := ReadByte(r)
if err != nil {
return nil, err
}
if id == k7zCRC {
crcs, err := ReadDigests(r, len(unpackInfo.Folders))
if err != nil {
return nil, err
}
for i := range unpackInfo.Folders {
unpackInfo.Folders[i].UnpackCRC = crcs[i]
}
id, err = ReadByte(r)
if err != nil {
return nil, err
}
}
if id != k7zEnd {
return nil, ErrUnexpectedPropertyID
}
return unpackInfo, nil
}

@ -0,0 +1,377 @@
package go7z
import (
"bufio"
"errors"
"fmt"
"hash/crc32"
"io"
"os"
"sync"
"github.com/saracen/go7z/headers"
"github.com/saracen/solidblock"
)
var (
// ErrNotSupported is returned when an unrecognized archive format is
// encountered.
ErrNotSupported = errors.New("not supported")
// ErrDecompressorNotFound is returned when a requested decompressor has not
// been registered.
ErrDecompressorNotFound = errors.New("decompressor not found")
)
// Reader is a 7z archive reader.
type Reader struct {
r *io.SectionReader
err error
header *headers.Header
folderIndex int
fileIndex int
emptyStream bool
folders []*folderReader
Options ReaderOptions
}
// ReaderOptions are optional options to configure a 7z archive reader.
type ReaderOptions struct {
password string
cb func() string
}
// SetPassword sets the password used for extraction.
func (o *ReaderOptions) SetPassword(password string) {
o.password = password
}
// SetPasswordCallback sets the callback thats used if a password is required,
// but wasn't supplied with SetPassword()
func (o *ReaderOptions) SetPasswordCallback(cb func() string) {
o.cb = cb
}
// Password returns the set password. This will call the password callback
// supplied to SetPasswordCallback() if no password is set.
func (o *ReaderOptions) Password() string {
if o.password != "" {
return o.password
}
if o.cb != nil {
o.password = o.cb()
}
return o.password
}
// ReadCloser provides an io.ReadCloser for the archive when opened with
// OpenReader.
type ReadCloser struct {
f *os.File
Reader
}
// Close closes the 7z file, rendering it unusable for I/O.
func (rc *ReadCloser) Close() error {
return rc.f.Close()
}
// OpenReader will open the 7z file specified by name and return a ReadCloser.
func OpenReader(name string) (*ReadCloser, error) {
f, err := os.Open(name)
if err != nil {
return nil, err
}
fi, err := f.Stat()
if err != nil {
f.Close()
return nil, err
}
r := new(ReadCloser)
if err := r.init(f, fi.Size(), false); err != nil {
f.Close()
return nil, err
}
r.f = f
return r, nil
}
// NewReader returns a new Reader reading from r, which is assumed to
// have the given size in bytes.
func NewReader(r io.ReaderAt, size int64) (*Reader, error) {
szr := new(Reader)
if err := szr.init(r, size, false); err != nil {
return nil, err
}
return szr, nil
}
func (sz *Reader) init(r io.ReaderAt, size int64, ignoreChecksumError bool) error {
sz.r = io.NewSectionReader(r, 0, size)
signatureHeader, err := headers.ReadSignatureHeader(sz.r)
if err != nil {
if !(ignoreChecksumError && err == headers.ErrChecksumMismatch) {
return err
}
}
if _, err := sz.r.Seek(signatureHeader.StartHeader.NextHeaderOffset, io.SeekCurrent); err != nil {
return err
}
if signatureHeader.StartHeader.NextHeaderSize > size-headers.SignatureHeaderSize {
return io.ErrUnexpectedEOF
}
crc := crc32.NewIEEE()
tee := io.TeeReader(bufio.NewReader(io.LimitReader(sz.r, signatureHeader.StartHeader.NextHeaderSize)), crc)
header, encoded, err := headers.ReadPackedStreamsForHeaders(&io.LimitedReader{tee, signatureHeader.StartHeader.NextHeaderSize})
if err != nil {
return err
}
if crc.Sum32() != signatureHeader.StartHeader.NextHeaderCRC {
if !ignoreChecksumError {
return headers.ErrChecksumMismatch
}
}
if encoded != nil {
folders, err := sz.extract(encoded)
if err != nil {
return err
}
if len(folders) != 1 {
return ErrNotSupported
}
if err = folders[0].Next(); err != nil {
return err
}
header, _, err = headers.ReadPackedStreamsForHeaders(&io.LimitedReader{folders[0].sb, folders[0].sb.Size()})
if err != nil {
return err
}
if err = folders[0].Next(); err != io.EOF {
return ErrNotSupported
}
}
if header == nil {
return ErrNotSupported
}
sz.header = header
sz.folders, err = sz.extract(sz.header.MainStreamsInfo)
return err
}
// Next advances to the next entry in the 7z archive.
//
// io.EOF is returned at the end of the input.
func (sz *Reader) Next() (*headers.FileInfo, error) {
if sz.err != nil {
return nil, sz.err
}
hdr, err := sz.next()
sz.err = err
return hdr, err
}
func (sz *Reader) nextFileInfo() *headers.FileInfo {
var fileInfo *headers.FileInfo
if sz.fileIndex < len(sz.header.FilesInfo) {
fileInfo = sz.header.FilesInfo[sz.fileIndex]
sz.fileIndex++
return fileInfo
}
return nil
}
func (sz *Reader) extract(streamsInfo *headers.StreamsInfo) ([]*folderReader, error) {
var sizes []uint64
var crcs []uint32
if streamsInfo.SubStreamsInfo != nil {
sizes = streamsInfo.SubStreamsInfo.UnpackSizes
crcs = streamsInfo.SubStreamsInfo.Digests
}
offset := int64(headers.SignatureHeaderSize)
offset += int64(streamsInfo.PackInfo.PackPos)
packedIndicesOffset := 0
var folders []*folderReader
for i, folder := range streamsInfo.UnpackInfo.Folders {
if len(folder.PackedIndices) == 0 {
folder.PackedIndices = []int{0}
}
fr := &folderReader{}
fr.inputs = make(map[int]io.Reader)
fr.binder = solidblock.Binder{}
// setup codecs
for j := range folder.CoderInfo {
coderInfo := folder.CoderInfo[j]
size := folder.UnpackSizes[j]
d := decompressor(coderInfo.CodecID)
if d == nil {
return folders, ErrDecompressorNotFound
}
fn := func(in []io.Reader) ([]io.Reader, error) {
r, err := d(in, coderInfo.Properties, size, &sz.Options)
return []io.Reader{r}, err
}
fr.binder.AddCodec(fn, coderInfo.NumInStreams, coderInfo.NumOutStreams)
}
// setup initial inputs
for index, input := range folder.PackedIndices {
if packedIndicesOffset+index >= len(streamsInfo.PackInfo.PackSizes) {
return nil, fmt.Errorf("folder references invalid packinfo")
}
size := int64(streamsInfo.PackInfo.PackSizes[packedIndicesOffset+index])
fr.inputs[input] = io.NewSectionReader(sz.r, offset, size)
offset += size
}
packedIndicesOffset += len(folder.PackedIndices)
// setup pairs
for _, bindPairsInfo := range folder.BindPairsInfo {
fr.binder.Pair(bindPairsInfo.InIndex, bindPairsInfo.OutIndex)
}
if streamsInfo.SubStreamsInfo != nil {
numUnpackStreamsInFolders := streamsInfo.SubStreamsInfo.NumUnpackStreamsInFolders
if i >= len(numUnpackStreamsInFolders) {
return nil, fmt.Errorf("folder references invalid unpack stream")
}
off := numUnpackStreamsInFolders[i]
if off > len(sizes) || off > len(crcs) {
return nil, fmt.Errorf("folder references invalid unpack size or digest")
}
fr.sizes = sizes[:off]
fr.crcs = crcs[:off]
sizes = sizes[len(fr.sizes):]
crcs = crcs[len(fr.crcs):]
} else {
fr.sizes = []uint64{folder.UnpackSize()}
fr.crcs = []uint32{folder.UnpackCRC}
}
folders = append(folders, fr)
}
return folders, nil
}
type folderReader struct {
binder solidblock.Binder
sizes []uint64
crcs []uint32
inputs map[int]io.Reader
bufs []*bufio.Reader
sb *solidblock.Solidblock
}
var bufioReaderPool = sync.Pool{
New: func() interface{} {
return bufio.NewReaderSize(nil, 32*1024)
},
}
func (fr *folderReader) Next() error {
if fr.sb == nil {
fr.bufs = make([]*bufio.Reader, 0, len(fr.inputs))
for in, r := range fr.inputs {
br := bufioReaderPool.Get().(*bufio.Reader)
br.Reset(r)
fr.bufs = append(fr.bufs, br)
fr.binder.Reader(br, in)
}
outputs, err := fr.binder.Outputs()
if err != nil {
return err
}
if len(outputs) != 1 {
return ErrNotSupported
}
if outputs[0] == nil {
return ErrNotSupported
}
fr.sb = solidblock.New(outputs[0], fr.sizes, fr.crcs)
}
return fr.sb.Next()
}
func (fr *folderReader) Close() error {
for _, buf := range fr.bufs {
bufioReaderPool.Put(buf)
}
fr.bufs = nil
return nil
}
func (sz *Reader) next() (*headers.FileInfo, error) {
fileInfo := sz.nextFileInfo()
if fileInfo == nil {
return nil, io.EOF
}
sz.emptyStream = fileInfo.IsEmptyStream
if sz.emptyStream {
return fileInfo, nil
}
if sz.folders[sz.folderIndex].Next() == io.EOF {
sz.folders[sz.folderIndex].Close()
sz.folderIndex++
if sz.folderIndex >= len(sz.folders) {
return nil, io.EOF
}
sz.folders[sz.folderIndex].Next()
}
return fileInfo, nil
}
// Read reads from the current file in the 7z archive.
// It returns (0, io.EOF) when it reaches the end of that file,
// until Next is called to advance to the next file.
func (sz *Reader) Read(p []byte) (int, error) {
if sz.err != nil {
return 0, sz.err
}
if sz.emptyStream {
return 0, io.EOF
}
n, err := sz.folders[sz.folderIndex].sb.Read(p)
if err != nil && err != io.EOF {
sz.err = err
}
return n, err
}

@ -0,0 +1,129 @@
package go7z
import (
"bytes"
"compress/bzip2"
"compress/flate"
"encoding/binary"
"io"
"sync"
"github.com/saracen/go7z/filters"
"github.com/ulikunitz/xz/lzma"
)
// Decompressor is a handler function called when a registered decompressor is
// initialized.
type Decompressor func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error)
var (
decompressors sync.Map // map[uint32]Decompressor
)
func init() {
// copy
RegisterDecompressor(0x00, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 {
return nil, ErrNotSupported
}
return r[0], nil
}))
// delta
RegisterDecompressor(0x03, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 || len(options) == 0 || len(options) > 1 {
return nil, ErrNotSupported
}
return filters.NewDeltaDecoder(r[0], uint(options[0])+1, int64(unpackSize))
}))
// lzma
RegisterDecompressor(0x030101, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 {
return nil, ErrNotSupported
}
// We can't set options in the lzma decoder library, so instead we add
// a fake header
header := bytes.NewBuffer(options)
binary.Write(header, binary.LittleEndian, unpackSize)
return lzma.NewReader(io.MultiReader(header, r[0]))
}))
// lzma2
RegisterDecompressor(0x21, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 {
return nil, ErrNotSupported
}
config := lzma.Reader2Config{}
if len(options) > 0 {
config.DictCap = int(2 | (options[0] & 1))
config.DictCap <<= (options[0] >> 1) + 11
}
return config.NewReader2(r[0])
}))
// bcj2
RegisterDecompressor(0x303011b, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 4 {
return nil, ErrNotSupported
}
return filters.NewBCJ2Decoder(r[0], r[1], r[2], r[3], int64(unpackSize))
}))
// deflate
RegisterDecompressor(0x40108, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 {
return nil, ErrNotSupported
}
return flate.NewReader(r[0]), nil
}))
// bzip2
RegisterDecompressor(0x40202, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 {
return nil, ErrNotSupported
}
return bzip2.NewReader(r[0]), nil
}))
// AES
RegisterDecompressor(0x6f10701, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 {
return nil, ErrNotSupported
}
if len(options) < 2 {
return nil, ErrNotSupported
}
saltSize := ((options[0] >> 7) & 1) + (options[1] >> 4)
ivSize := ((options[0] >> 6) & 1) + (options[1] & 0x0F)
power := int(options[0]) & 0x3f
options = options[2:]
salt := options[:saltSize]
iv := options[saltSize : saltSize+ivSize]
return filters.NewAESDecrypter(r[0], power, salt, iv, ro.Password())
}))
}
// RegisterDecompressor registers a decompressor.
func RegisterDecompressor(method uint32, dcomp Decompressor) {
if _, dup := decompressors.LoadOrStore(method, dcomp); dup {
panic("decompressor already registered")
}
}
func decompressor(method uint32) Decompressor {
di, ok := decompressors.Load(method)
if !ok {
return nil
}
return di.(Decompressor)
}

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Arran Walker
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,159 @@
# Solidblock
Solidblock is a Go library providing `io.Reader`s for solid compression and
codec binding/chaining.
## Solid Compression Reader
Wrapped around a compressed solid block of concatenated files, it provides
sequential access to the files:
```
// file contents
files := [][]byte{
[]byte("file 1\n"),
[]byte("file 2\n"),
}
// file metadata
var metadata struct {
sizes []uint64
crcs []uint32
}
metadata.sizes = []uint64{
uint64(len(files[0])),
uint64(len(files[1])),
}
metadata.crcs = []uint32{
crc32.ChecksumIEEE(files[0]),
crc32.ChecksumIEEE(files[1]),
}
// Concatenate files to compressed block
block := new(bytes.Buffer)
w := gzip.NewWriter(block)
w.Write(files[0])
w.Write(files[1])
w.Close()
// Open gzip reader to compressed block
r, err := gzip.NewReader(block)
if err != nil {
panic(err)
}
// Create a new solidblock reader
s := solidblock.New(r, metadata.sizes, metadata.crcs)
for {
err := s.Next()
if err == io.EOF {
break
}
if err != nil {
panic(err)
}
io.Copy(os.Stdout, s)
}
```
## Codec Binding
To improve compression, some codecs (such as BCJ2), split data up into multiple
streams that compress better individually. `solidblock.Binder` provides a simple
way to pair together the inputs and outputs of various codecs/readers.
For example:
```
func BCJ2Decoder(inputs []io.Reader) ([]io.Reader, error) {
// 1. take 4 input readers
// 2. do magic
// 3. return 1 reader
}
func GzipDecoder(inputs []io.Reader) ([]io.Reader, error) {
if len(inputs) != 1 {
panic("unsupported input configuration")
}
r, err := gzip.NewReader(inputs[0])
return []io.Reader{r}, nil
}
file, err := os.Open("file")
if err != nil {
panic(err)
}
// Assume file has 4 concatenated streams. 3 of the streams are from a BCJ2
// encoder, compressed to gzip streams. 1 is the 4th stream of the BCJ2 encoder,
// but left uncompressed.
streams := make([]io.Reader, 4)
streams[0] = io.NewSectionReader(file, 0, 100)
streams[1] = io.NewSectionReader(file, 101, 200)
streams[2] = io.NewSectionReader(file, 201, 300)
streams[3] = io.NewSectionReader(file, 301, 400)
// Create a new binder
binder := solidblock.NewBinder()
// Create gzip decompressors for the 4 initial input streams.
gzip0InputIDs, gzip0OutputIDs := binder.AddCodec(GzipDecoder, 1, 1)
gzip1InputIDs, gzip1OutputIDs := binder.AddCodec(GzipDecoder, 1, 1)
gzip2InputIDs, gzip2OutputIDs := binder.AddCodec(GzipDecoder, 1, 1)
// Create BCJ2 decoder for the 4 gzip decoded streams.
bcj2InputIDs, bcj2outputIDs := binder.AddCodec(BCJ2Decoder, 4, 1)
// Connect initial streams to gzip decoders
binder.Reader(streams[0], gzip0InputIDs[0])
binder.Reader(streams[1], gzip1InputIDs[0])
binder.Reader(streams[2], gzip2InputIDs[0])
// Connect 4th initial stream straight to 4th input of BCJ2 decoder.
binder.Reader(streams[3], bcj2InputIDs[3])
// Pair the 3 gzip output streams to the 1st, 2nd, 3rd input of BCJ2 decoder.
binder.Pair(gzip0OutputIDs[0], bcj2InputIDs[0])
binder.Pair(gzip1OutputIDs[0], bcj2InputIDs[1])
binder.Pair(gzip2OutputIDs[0], bcj2InputIDs[2])
// Create single output to read from
outputs, err := binder.Outputs()
if err != nil {
panic(err)
}
if len(outputs) != 1 {
panic("output should only contain one stream")
}
io.Copy(os.Stdout, outputs[0])
```
A picture says 60 lines of code...
```
+------------+
concatenated file |bcj2 decoder+--->io.Reader
+--------------------+ +-+--+--+--+-+
| | ^ ^ ^ ^
| +--------------+ | +------------+ | | | |
| |gzipped stream+------>gzip decoder+---+ | | |
| +--------------+ | +------------+ | | |
| | | | |
| +--------------+ | +------------+ | | |
| |gzipped stream+------>gzip decoder+------+ | |
| +--------------+ | +------------+ | |
| | | |
| +--------------+ | +------------+ | |
| |gzipped stream+------>gzip decoder+---------+ |
| +--------------+ | +------------+ |
| | |
| +--------------+ | |
| | uncompressed +--------------------------------+
| | stream | |
| +--------------+ |
| |
+--------------------+
```

@ -0,0 +1,123 @@
package solidblock
import (
"errors"
"fmt"
"io"
)
var (
// ErrInputIsUnbound is returned when an input hasn't been binded to either
// a reader/paired without an output.
ErrInputIsUnbound = errors.New("input is unbound")
// ErrUnexpectedOutputCount is returned when the amount of io.Readers
// returned from a codec handler doesn't match the amount specified when
// adding the codec.
ErrUnexpectedOutputCount = errors.New("unexpected output count")
)
type reader struct {
Name string
R io.Reader
}
type codec struct {
fn func([]io.Reader) ([]io.Reader, error)
inIndexes []int
outIndexes []int
}
// Binder holds information regarding codecs, their inputs/outputs and how they
// join together.
type Binder struct {
numInStreams int
numOutStreams int
in []*reader
out []*reader
codecs []*codec
}
// NewBinder returns a new binder.
func NewBinder() *Binder {
return &Binder{}
}
// AddCodec adds a handler function for processing information from input(s) and
// producing output(s).
func (b *Binder) AddCodec(fn func([]io.Reader) ([]io.Reader, error), inputs, outputs int) (in, out []int) {
c := &codec{fn: fn}
b.in = append(b.in, make([]*reader, inputs)...)
b.out = append(b.out, make([]*reader, outputs)...)
for i := 0; i < inputs; i++ {
c.inIndexes = append(c.inIndexes, b.numInStreams+i)
}
for i := 0; i < outputs; i++ {
c.outIndexes = append(c.outIndexes, b.numOutStreams+i)
}
b.numInStreams += inputs
b.numOutStreams += outputs
b.codecs = append(b.codecs, c)
return c.inIndexes, c.outIndexes
}
// Reader binds a reader to an in stream.
func (b *Binder) Reader(r io.Reader, in int) {
if in < 0 || in >= len(b.in) {
return
}
b.in[in] = &reader{fmt.Sprintf("In: %v", in), r}
}
// Pair pairs two streams, binding an in stream to an out stream.
func (b *Binder) Pair(in int, out int) {
if in < 0 || in >= len(b.in) || out < 0 || out >= len(b.out) {
return
}
if b.out[out] == nil {
b.out[out] = &reader{fmt.Sprintf("Bind %v:%v", in, out), nil}
}
b.in[in] = b.out[out]
}
// Outputs returns any unbound output readers to ready from.
func (b *Binder) Outputs() ([]io.Reader, error) {
var unbound []io.Reader
for i := range b.codecs {
var ins []io.Reader
for _, num := range b.codecs[i].inIndexes {
if b.in[num] == nil || b.in[num].R == nil {
return unbound, ErrInputIsUnbound
}
ins = append(ins, b.in[num].R)
}
outs, err := b.codecs[i].fn(ins)
if err != nil {
return nil, err
}
if len(outs) != len(b.codecs[i].outIndexes) {
return unbound, ErrUnexpectedOutputCount
}
for j, num := range b.codecs[i].outIndexes {
if b.out[num] == nil {
b.out[num] = &reader{fmt.Sprintf("Out %v", outs), nil}
unbound = append(unbound, outs[j])
}
b.out[num].R = outs[j]
}
}
return unbound, nil
}

@ -0,0 +1,100 @@
package solidblock
import (
"errors"
"hash"
"hash/crc32"
"io"
"io/ioutil"
)
var (
// ErrChecksumMismatch is returned when a file's crc check fails.
ErrChecksumMismatch = errors.New("checksum mismatch")
)
// Solidblock provides sequential access to files that have been concatenated
// into a single compressed data block.
type Solidblock struct {
sizes []uint64
crcs []uint32
base io.Reader
file io.Reader
crc hash.Hash32
target int
index int
}
// New returns a new solidblock reader.
func New(r io.Reader, sizes []uint64, crcs []uint32) *Solidblock {
if len(sizes) != len(crcs) {
panic("crcs slice needs to be the same length as sizes slice")
}
return &Solidblock{
sizes: sizes,
crcs: crcs,
target: -1,
base: r,
}
}
// Next advances to the next file entry in solid block.
//
// Calling Next without reading the current file is supported. Only when Read
// is called will decompression occur for current file. Any skipped files will
// still need to be decompressed, but their contents is discarded.
//
// io.EOF is returned at the end of the input.
func (fr *Solidblock) Next() error {
if fr.target < len(fr.sizes)-1 {
fr.target++
return nil
}
return io.EOF
}
// Read reads from the current file in solid block.
// It returns (0, io.EOF) when it reaches the end of that file,
// until Next is called to advance to the next file.
func (fr *Solidblock) Read(p []byte) (int, error) {
if fr.file != nil && fr.index != fr.target {
// drain current fileReader
_, err := io.Copy(ioutil.Discard, fr.file)
if err != nil {
return 0, err
}
}
if fr.file == nil || fr.index != fr.target {
// discard until we're at the position we want to be at
for i := fr.index + 1; i < fr.target; i++ {
_, err := io.CopyN(ioutil.Discard, fr.base, int64(fr.sizes[i]))
if err != nil {
return 0, err
}
}
fr.crc = crc32.NewIEEE()
fr.file = io.TeeReader(io.LimitReader(fr.base, int64(fr.sizes[fr.target])), fr.crc)
fr.index = fr.target
}
n, err := fr.file.Read(p)
if err == io.EOF {
if fr.crc.Sum32() != fr.crcs[fr.index] {
return n, ErrChecksumMismatch
}
}
return n, err
}
func (fr *Solidblock) Size() int64 {
if fr.target < 0 {
return 0
}
return int64(fr.sizes[fr.target])
}

@ -0,0 +1,26 @@
Copyright (c) 2014-2021 Ulrich Kunitz
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* My name, Ulrich Kunitz, may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,181 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package hash
// CyclicPoly provides a cyclic polynomial rolling hash.
type CyclicPoly struct {
h uint64
p []uint64
i int
}
// ror rotates the unsigned 64-bit integer to right. The argument s must be
// less than 64.
func ror(x uint64, s uint) uint64 {
return (x >> s) | (x << (64 - s))
}
// NewCyclicPoly creates a new instance of the CyclicPoly structure. The
// argument n gives the number of bytes for which a hash will be executed.
// This number must be positive; the method panics if this isn't the case.
func NewCyclicPoly(n int) *CyclicPoly {
if n < 1 {
panic("argument n must be positive")
}
return &CyclicPoly{p: make([]uint64, 0, n)}
}
// Len returns the length of the byte sequence for which a hash is generated.
func (r *CyclicPoly) Len() int {
return cap(r.p)
}
// RollByte hashes the next byte and returns a hash value. The complete becomes
// available after at least Len() bytes have been hashed.
func (r *CyclicPoly) RollByte(x byte) uint64 {
y := hash[x]
if len(r.p) < cap(r.p) {
r.h = ror(r.h, 1) ^ y
r.p = append(r.p, y)
} else {
r.h ^= ror(r.p[r.i], uint(cap(r.p)-1))
r.h = ror(r.h, 1) ^ y
r.p[r.i] = y
r.i = (r.i + 1) % cap(r.p)
}
return r.h
}
// Stores the hash for the individual bytes.
var hash = [256]uint64{
0x2e4fc3f904065142, 0xc790984cfbc99527,
0x879f95eb8c62f187, 0x3b61be86b5021ef2,
0x65a896a04196f0a5, 0xc5b307b80470b59e,
0xd3bff376a70df14b, 0xc332f04f0b3f1701,
0x753b5f0e9abf3e0d, 0xb41538fdfe66ef53,
0x1906a10c2c1c0208, 0xfb0c712a03421c0d,
0x38be311a65c9552b, 0xfee7ee4ca6445c7e,
0x71aadeded184f21e, 0xd73426fccda23b2d,
0x29773fb5fb9600b5, 0xce410261cd32981a,
0xfe2848b3c62dbc2d, 0x459eaaff6e43e11c,
0xc13e35fc9c73a887, 0xf30ed5c201e76dbc,
0xa5f10b3910482cea, 0x2945d59be02dfaad,
0x06ee334ff70571b5, 0xbabf9d8070f44380,
0xee3e2e9912ffd27c, 0x2a7118d1ea6b8ea7,
0x26183cb9f7b1664c, 0xea71dac7da068f21,
0xea92eca5bd1d0bb7, 0x415595862defcd75,
0x248a386023c60648, 0x9cf021ab284b3c8a,
0xfc9372df02870f6c, 0x2b92d693eeb3b3fc,
0x73e799d139dc6975, 0x7b15ae312486363c,
0xb70e5454a2239c80, 0x208e3fb31d3b2263,
0x01f563cabb930f44, 0x2ac4533d2a3240d8,
0x84231ed1064f6f7c, 0xa9f020977c2a6d19,
0x213c227271c20122, 0x09fe8a9a0a03d07a,
0x4236dc75bcaf910c, 0x460a8b2bead8f17e,
0xd9b27be1aa07055f, 0xd202d5dc4b11c33e,
0x70adb010543bea12, 0xcdae938f7ea6f579,
0x3f3d870208672f4d, 0x8e6ccbce9d349536,
0xe4c0871a389095ae, 0xf5f2a49152bca080,
0x9a43f9b97269934e, 0xc17b3753cb6f475c,
0xd56d941e8e206bd4, 0xac0a4f3e525eda00,
0xa06d5a011912a550, 0x5537ed19537ad1df,
0xa32fe713d611449d, 0x2a1d05b47c3b579f,
0x991d02dbd30a2a52, 0x39e91e7e28f93eb0,
0x40d06adb3e92c9ac, 0x9b9d3afde1c77c97,
0x9a3f3f41c02c616f, 0x22ecd4ba00f60c44,
0x0b63d5d801708420, 0x8f227ca8f37ffaec,
0x0256278670887c24, 0x107e14877dbf540b,
0x32c19f2786ac1c05, 0x1df5b12bb4bc9c61,
0xc0cac129d0d4c4e2, 0x9fdb52ee9800b001,
0x31f601d5d31c48c4, 0x72ff3c0928bcaec7,
0xd99264421147eb03, 0x535a2d6d38aefcfe,
0x6ba8b4454a916237, 0xfa39366eaae4719c,
0x10f00fd7bbb24b6f, 0x5bd23185c76c84d4,
0xb22c3d7e1b00d33f, 0x3efc20aa6bc830a8,
0xd61c2503fe639144, 0x30ce625441eb92d3,
0xe5d34cf359e93100, 0xa8e5aa13f2b9f7a5,
0x5c2b8d851ca254a6, 0x68fb6c5e8b0d5fdf,
0xc7ea4872c96b83ae, 0x6dd5d376f4392382,
0x1be88681aaa9792f, 0xfef465ee1b6c10d9,
0x1f98b65ed43fcb2e, 0x4d1ca11eb6e9a9c9,
0x7808e902b3857d0b, 0x171c9c4ea4607972,
0x58d66274850146df, 0x42b311c10d3981d1,
0x647fa8c621c41a4c, 0xf472771c66ddfedc,
0x338d27e3f847b46b, 0x6402ce3da97545ce,
0x5162db616fc38638, 0x9c83be97bc22a50e,
0x2d3d7478a78d5e72, 0xe621a9b938fd5397,
0x9454614eb0f81c45, 0x395fb6e742ed39b6,
0x77dd9179d06037bf, 0xc478d0fee4d2656d,
0x35d9d6cb772007af, 0x83a56e92c883f0f6,
0x27937453250c00a1, 0x27bd6ebc3a46a97d,
0x9f543bf784342d51, 0xd158f38c48b0ed52,
0x8dd8537c045f66b4, 0x846a57230226f6d5,
0x6b13939e0c4e7cdf, 0xfca25425d8176758,
0x92e5fc6cd52788e6, 0x9992e13d7a739170,
0x518246f7a199e8ea, 0xf104c2a71b9979c7,
0x86b3ffaabea4768f, 0x6388061cf3e351ad,
0x09d9b5295de5bbb5, 0x38bf1638c2599e92,
0x1d759846499e148d, 0x4c0ff015e5f96ef4,
0xa41a94cfa270f565, 0x42d76f9cb2326c0b,
0x0cf385dd3c9c23ba, 0x0508a6c7508d6e7a,
0x337523aabbe6cf8d, 0x646bb14001d42b12,
0xc178729d138adc74, 0xf900ef4491f24086,
0xee1a90d334bb5ac4, 0x9755c92247301a50,
0xb999bf7c4ff1b610, 0x6aeeb2f3b21e8fc9,
0x0fa8084cf91ac6ff, 0x10d226cf136e6189,
0xd302057a07d4fb21, 0x5f03800e20a0fcc3,
0x80118d4ae46bd210, 0x58ab61a522843733,
0x51edd575c5432a4b, 0x94ee6ff67f9197f7,
0x765669e0e5e8157b, 0xa5347830737132f0,
0x3ba485a69f01510c, 0x0b247d7b957a01c3,
0x1b3d63449fd807dc, 0x0fdc4721c30ad743,
0x8b535ed3829b2b14, 0xee41d0cad65d232c,
0xe6a99ed97a6a982f, 0x65ac6194c202003d,
0x692accf3a70573eb, 0xcc3c02c3e200d5af,
0x0d419e8b325914a3, 0x320f160f42c25e40,
0x00710d647a51fe7a, 0x3c947692330aed60,
0x9288aa280d355a7a, 0xa1806a9b791d1696,
0x5d60e38496763da1, 0x6c69e22e613fd0f4,
0x977fc2a5aadffb17, 0xfb7bd063fc5a94ba,
0x460c17992cbaece1, 0xf7822c5444d3297f,
0x344a9790c69b74aa, 0xb80a42e6cae09dce,
0x1b1361eaf2b1e757, 0xd84c1e758e236f01,
0x88e0b7be347627cc, 0x45246009b7a99490,
0x8011c6dd3fe50472, 0xc341d682bffb99d7,
0x2511be93808e2d15, 0xd5bc13d7fd739840,
0x2a3cd030679ae1ec, 0x8ad9898a4b9ee157,
0x3245fef0a8eaf521, 0x3d6d8dbbb427d2b0,
0x1ed146d8968b3981, 0x0c6a28bf7d45f3fc,
0x4a1fd3dbcee3c561, 0x4210ff6a476bf67e,
0xa559cce0d9199aac, 0xde39d47ef3723380,
0xe5b69d848ce42e35, 0xefa24296f8e79f52,
0x70190b59db9a5afc, 0x26f166cdb211e7bf,
0x4deaf2df3c6b8ef5, 0xf171dbdd670f1017,
0xb9059b05e9420d90, 0x2f0da855c9388754,
0x611d5e9ab77949cc, 0x2912038ac01163f4,
0x0231df50402b2fba, 0x45660fc4f3245f58,
0xb91cc97c7c8dac50, 0xb72d2aafe4953427,
0xfa6463f87e813d6b, 0x4515f7ee95d5c6a2,
0x1310e1c1a48d21c3, 0xad48a7810cdd8544,
0x4d5bdfefd5c9e631, 0xa43ed43f1fdcb7de,
0xe70cfc8fe1ee9626, 0xef4711b0d8dda442,
0xb80dd9bd4dab6c93, 0xa23be08d31ba4d93,
0x9b37db9d0335a39c, 0x494b6f870f5cfebc,
0x6d1b3c1149dda943, 0x372c943a518c1093,
0xad27af45e77c09c4, 0x3b6f92b646044604,
0xac2917909f5fcf4f, 0x2069a60e977e5557,
0x353a469e71014de5, 0x24be356281f55c15,
0x2b6d710ba8e9adea, 0x404ad1751c749c29,
0xed7311bf23d7f185, 0xba4f6976b4acc43e,
0x32d7198d2bc39000, 0xee667019014d6e01,
0x494ef3e128d14c83, 0x1f95a152baecd6be,
0x201648dff1f483a5, 0x68c28550c8384af6,
0x5fc834a6824a7f48, 0x7cd06cb7365eaf28,
0xd82bbd95e9b30909, 0x234f0d1694c53f6d,
0xd2fb7f4a96d83f4a, 0xff0d5da83acac05e,
0xf8f6b97f5585080a, 0x74236084be57b95b,
0xa25e40c03bbc36ad, 0x6b6e5c14ce88465b,
0x4378ffe93e1528c5, 0x94ca92a17118e2d2,
}

@ -0,0 +1,14 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package hash provides rolling hashes.
Rolling hashes have to be used for maintaining the positions of n-byte
sequences in the dictionary buffer.
The package provides currently the Rabin-Karp rolling hash and a Cyclic
Polynomial hash. Both support the Hashes method to be used with an interface.
*/
package hash

@ -0,0 +1,66 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package hash
// A is the default constant for Robin-Karp rolling hash. This is a random
// prime.
const A = 0x97b548add41d5da1
// RabinKarp supports the computation of a rolling hash.
type RabinKarp struct {
A uint64
// a^n
aOldest uint64
h uint64
p []byte
i int
}
// NewRabinKarp creates a new RabinKarp value. The argument n defines the
// length of the byte sequence to be hashed. The default constant will will be
// used.
func NewRabinKarp(n int) *RabinKarp {
return NewRabinKarpConst(n, A)
}
// NewRabinKarpConst creates a new RabinKarp value. The argument n defines the
// length of the byte sequence to be hashed. The argument a provides the
// constant used to compute the hash.
func NewRabinKarpConst(n int, a uint64) *RabinKarp {
if n <= 0 {
panic("number of bytes n must be positive")
}
aOldest := uint64(1)
// There are faster methods. For the small n required by the LZMA
// compressor O(n) is sufficient.
for i := 0; i < n; i++ {
aOldest *= a
}
return &RabinKarp{
A: a, aOldest: aOldest,
p: make([]byte, 0, n),
}
}
// Len returns the length of the byte sequence.
func (r *RabinKarp) Len() int {
return cap(r.p)
}
// RollByte computes the hash after x has been added.
func (r *RabinKarp) RollByte(x byte) uint64 {
if len(r.p) < cap(r.p) {
r.h += uint64(x)
r.h *= r.A
r.p = append(r.p, x)
} else {
r.h -= uint64(r.p[r.i]) * r.aOldest
r.h += uint64(x)
r.h *= r.A
r.p[r.i] = x
r.i = (r.i + 1) % cap(r.p)
}
return r.h
}

@ -0,0 +1,29 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package hash
// Roller provides an interface for rolling hashes. The hash value will become
// valid after hash has been called Len times.
type Roller interface {
Len() int
RollByte(x byte) uint64
}
// Hashes computes all hash values for the array p. Note that the state of the
// roller is changed.
func Hashes(r Roller, p []byte) []uint64 {
n := r.Len()
if len(p) < n {
return nil
}
h := make([]uint64, len(p)-n+1)
for i := 0; i < n-1; i++ {
r.RollByte(p[i])
}
for i := range h {
h[i] = r.RollByte(p[i+n-1])
}
return h
}

@ -0,0 +1,457 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package xlog provides a simple logging package that allows to disable
// certain message categories. It defines a type, Logger, with multiple
// methods for formatting output. The package has also a predefined
// 'standard' Logger accessible through helper function Print[f|ln],
// Fatal[f|ln], Panic[f|ln], Warn[f|ln], Print[f|ln] and Debug[f|ln]
// that are easier to use then creating a Logger manually. That logger
// writes to standard error and prints the date and time of each logged
// message, which can be configured using the function SetFlags.
//
// The Fatal functions call os.Exit(1) after the message is output
// unless not suppressed by the flags. The Panic functions call panic
// after the writing the log message unless suppressed.
package xlog
import (
"fmt"
"io"
"os"
"runtime"
"sync"
"time"
)
// The flags define what information is prefixed to each log entry
// generated by the Logger. The Lno* versions allow the suppression of
// specific output. The bits are or'ed together to control what will be
// printed. There is no control over the order of the items printed and
// the format. The full format is:
//
// 2009-01-23 01:23:23.123123 /a/b/c/d.go:23: message
//
const (
Ldate = 1 << iota // the date: 2009-01-23
Ltime // the time: 01:23:23
Lmicroseconds // microsecond resolution: 01:23:23.123123
Llongfile // full file name and line number: /a/b/c/d.go:23
Lshortfile // final file name element and line number: d.go:23
Lnopanic // suppresses output from Panic[f|ln] but not the panic call
Lnofatal // suppresses output from Fatal[f|ln] but not the exit
Lnowarn // suppresses output from Warn[f|ln]
Lnoprint // suppresses output from Print[f|ln]
Lnodebug // suppresses output from Debug[f|ln]
// initial values for the standard logger
Lstdflags = Ldate | Ltime | Lnodebug
)
// A Logger represents an active logging object that generates lines of
// output to an io.Writer. Each logging operation if not suppressed
// makes a single call to the Writer's Write method. A Logger can be
// used simultaneously from multiple goroutines; it guarantees to
// serialize access to the Writer.
type Logger struct {
mu sync.Mutex // ensures atomic writes; and protects the following
// fields
prefix string // prefix to write at beginning of each line
flag int // properties
out io.Writer // destination for output
buf []byte // for accumulating text to write
}
// New creates a new Logger. The out argument sets the destination to
// which the log output will be written. The prefix appears at the
// beginning of each log line. The flag argument defines the logging
// properties.
func New(out io.Writer, prefix string, flag int) *Logger {
return &Logger{out: out, prefix: prefix, flag: flag}
}
// std is the standard logger used by the package scope functions.
var std = New(os.Stderr, "", Lstdflags)
// itoa converts the integer to ASCII. A negative widths will avoid
// zero-padding. The function supports only non-negative integers.
func itoa(buf *[]byte, i int, wid int) {
var u = uint(i)
if u == 0 && wid <= 1 {
*buf = append(*buf, '0')
return
}
var b [32]byte
bp := len(b)
for ; u > 0 || wid > 0; u /= 10 {
bp--
wid--
b[bp] = byte(u%10) + '0'
}
*buf = append(*buf, b[bp:]...)
}
// formatHeader puts the header into the buf field of the buffer.
func (l *Logger) formatHeader(t time.Time, file string, line int) {
l.buf = append(l.buf, l.prefix...)
if l.flag&(Ldate|Ltime|Lmicroseconds) != 0 {
if l.flag&Ldate != 0 {
year, month, day := t.Date()
itoa(&l.buf, year, 4)
l.buf = append(l.buf, '-')
itoa(&l.buf, int(month), 2)
l.buf = append(l.buf, '-')
itoa(&l.buf, day, 2)
l.buf = append(l.buf, ' ')
}
if l.flag&(Ltime|Lmicroseconds) != 0 {
hour, min, sec := t.Clock()
itoa(&l.buf, hour, 2)
l.buf = append(l.buf, ':')
itoa(&l.buf, min, 2)
l.buf = append(l.buf, ':')
itoa(&l.buf, sec, 2)
if l.flag&Lmicroseconds != 0 {
l.buf = append(l.buf, '.')
itoa(&l.buf, t.Nanosecond()/1e3, 6)
}
l.buf = append(l.buf, ' ')
}
}
if l.flag&(Lshortfile|Llongfile) != 0 {
if l.flag&Lshortfile != 0 {
short := file
for i := len(file) - 1; i > 0; i-- {
if file[i] == '/' {
short = file[i+1:]
break
}
}
file = short
}
l.buf = append(l.buf, file...)
l.buf = append(l.buf, ':')
itoa(&l.buf, line, -1)
l.buf = append(l.buf, ": "...)
}
}
func (l *Logger) output(calldepth int, now time.Time, s string) error {
var file string
var line int
if l.flag&(Lshortfile|Llongfile) != 0 {
l.mu.Unlock()
var ok bool
_, file, line, ok = runtime.Caller(calldepth)
if !ok {
file = "???"
line = 0
}
l.mu.Lock()
}
l.buf = l.buf[:0]
l.formatHeader(now, file, line)
l.buf = append(l.buf, s...)
if len(s) == 0 || s[len(s)-1] != '\n' {
l.buf = append(l.buf, '\n')
}
_, err := l.out.Write(l.buf)
return err
}
// Output writes the string s with the header controlled by the flags to
// the l.out writer. A newline will be appended if s doesn't end in a
// newline. Calldepth is used to recover the PC, although all current
// calls of Output use the call depth 2. Access to the function is serialized.
func (l *Logger) Output(calldepth, noflag int, v ...interface{}) error {
now := time.Now()
l.mu.Lock()
defer l.mu.Unlock()
if l.flag&noflag != 0 {
return nil
}
s := fmt.Sprint(v...)
return l.output(calldepth+1, now, s)
}
// Outputf works like output but formats the output like Printf.
func (l *Logger) Outputf(calldepth int, noflag int, format string, v ...interface{}) error {
now := time.Now()
l.mu.Lock()
defer l.mu.Unlock()
if l.flag&noflag != 0 {
return nil
}
s := fmt.Sprintf(format, v...)
return l.output(calldepth+1, now, s)
}
// Outputln works like output but formats the output like Println.
func (l *Logger) Outputln(calldepth int, noflag int, v ...interface{}) error {
now := time.Now()
l.mu.Lock()
defer l.mu.Unlock()
if l.flag&noflag != 0 {
return nil
}
s := fmt.Sprintln(v...)
return l.output(calldepth+1, now, s)
}
// Panic prints the message like Print and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func (l *Logger) Panic(v ...interface{}) {
l.Output(2, Lnopanic, v...)
s := fmt.Sprint(v...)
panic(s)
}
// Panic prints the message like Print and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func Panic(v ...interface{}) {
std.Output(2, Lnopanic, v...)
s := fmt.Sprint(v...)
panic(s)
}
// Panicf prints the message like Printf and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func (l *Logger) Panicf(format string, v ...interface{}) {
l.Outputf(2, Lnopanic, format, v...)
s := fmt.Sprintf(format, v...)
panic(s)
}
// Panicf prints the message like Printf and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func Panicf(format string, v ...interface{}) {
std.Outputf(2, Lnopanic, format, v...)
s := fmt.Sprintf(format, v...)
panic(s)
}
// Panicln prints the message like Println and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func (l *Logger) Panicln(v ...interface{}) {
l.Outputln(2, Lnopanic, v...)
s := fmt.Sprintln(v...)
panic(s)
}
// Panicln prints the message like Println and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func Panicln(v ...interface{}) {
std.Outputln(2, Lnopanic, v...)
s := fmt.Sprintln(v...)
panic(s)
}
// Fatal prints the message like Print and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func (l *Logger) Fatal(v ...interface{}) {
l.Output(2, Lnofatal, v...)
os.Exit(1)
}
// Fatal prints the message like Print and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func Fatal(v ...interface{}) {
std.Output(2, Lnofatal, v...)
os.Exit(1)
}
// Fatalf prints the message like Printf and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func (l *Logger) Fatalf(format string, v ...interface{}) {
l.Outputf(2, Lnofatal, format, v...)
os.Exit(1)
}
// Fatalf prints the message like Printf and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func Fatalf(format string, v ...interface{}) {
std.Outputf(2, Lnofatal, format, v...)
os.Exit(1)
}
// Fatalln prints the message like Println and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func (l *Logger) Fatalln(format string, v ...interface{}) {
l.Outputln(2, Lnofatal, v...)
os.Exit(1)
}
// Fatalln prints the message like Println and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func Fatalln(format string, v ...interface{}) {
std.Outputln(2, Lnofatal, v...)
os.Exit(1)
}
// Warn prints the message like Print. The printing might be suppressed
// by the flag Lnowarn.
func (l *Logger) Warn(v ...interface{}) {
l.Output(2, Lnowarn, v...)
}
// Warn prints the message like Print. The printing might be suppressed
// by the flag Lnowarn.
func Warn(v ...interface{}) {
std.Output(2, Lnowarn, v...)
}
// Warnf prints the message like Printf. The printing might be suppressed
// by the flag Lnowarn.
func (l *Logger) Warnf(format string, v ...interface{}) {
l.Outputf(2, Lnowarn, format, v...)
}
// Warnf prints the message like Printf. The printing might be suppressed
// by the flag Lnowarn.
func Warnf(format string, v ...interface{}) {
std.Outputf(2, Lnowarn, format, v...)
}
// Warnln prints the message like Println. The printing might be suppressed
// by the flag Lnowarn.
func (l *Logger) Warnln(v ...interface{}) {
l.Outputln(2, Lnowarn, v...)
}
// Warnln prints the message like Println. The printing might be suppressed
// by the flag Lnowarn.
func Warnln(v ...interface{}) {
std.Outputln(2, Lnowarn, v...)
}
// Print prints the message like fmt.Print. The printing might be suppressed
// by the flag Lnoprint.
func (l *Logger) Print(v ...interface{}) {
l.Output(2, Lnoprint, v...)
}
// Print prints the message like fmt.Print. The printing might be suppressed
// by the flag Lnoprint.
func Print(v ...interface{}) {
std.Output(2, Lnoprint, v...)
}
// Printf prints the message like fmt.Printf. The printing might be suppressed
// by the flag Lnoprint.
func (l *Logger) Printf(format string, v ...interface{}) {
l.Outputf(2, Lnoprint, format, v...)
}
// Printf prints the message like fmt.Printf. The printing might be suppressed
// by the flag Lnoprint.
func Printf(format string, v ...interface{}) {
std.Outputf(2, Lnoprint, format, v...)
}
// Println prints the message like fmt.Println. The printing might be
// suppressed by the flag Lnoprint.
func (l *Logger) Println(v ...interface{}) {
l.Outputln(2, Lnoprint, v...)
}
// Println prints the message like fmt.Println. The printing might be
// suppressed by the flag Lnoprint.
func Println(v ...interface{}) {
std.Outputln(2, Lnoprint, v...)
}
// Debug prints the message like Print. The printing might be suppressed
// by the flag Lnodebug.
func (l *Logger) Debug(v ...interface{}) {
l.Output(2, Lnodebug, v...)
}
// Debug prints the message like Print. The printing might be suppressed
// by the flag Lnodebug.
func Debug(v ...interface{}) {
std.Output(2, Lnodebug, v...)
}
// Debugf prints the message like Printf. The printing might be suppressed
// by the flag Lnodebug.
func (l *Logger) Debugf(format string, v ...interface{}) {
l.Outputf(2, Lnodebug, format, v...)
}
// Debugf prints the message like Printf. The printing might be suppressed
// by the flag Lnodebug.
func Debugf(format string, v ...interface{}) {
std.Outputf(2, Lnodebug, format, v...)
}
// Debugln prints the message like Println. The printing might be suppressed
// by the flag Lnodebug.
func (l *Logger) Debugln(v ...interface{}) {
l.Outputln(2, Lnodebug, v...)
}
// Debugln prints the message like Println. The printing might be suppressed
// by the flag Lnodebug.
func Debugln(v ...interface{}) {
std.Outputln(2, Lnodebug, v...)
}
// Flags returns the current flags used by the logger.
func (l *Logger) Flags() int {
l.mu.Lock()
defer l.mu.Unlock()
return l.flag
}
// Flags returns the current flags used by the standard logger.
func Flags() int {
return std.Flags()
}
// SetFlags sets the flags of the logger.
func (l *Logger) SetFlags(flag int) {
l.mu.Lock()
defer l.mu.Unlock()
l.flag = flag
}
// SetFlags sets the flags for the standard logger.
func SetFlags(flag int) {
std.SetFlags(flag)
}
// Prefix returns the prefix used by the logger.
func (l *Logger) Prefix() string {
l.mu.Lock()
defer l.mu.Unlock()
return l.prefix
}
// Prefix returns the prefix used by the standard logger of the package.
func Prefix() string {
return std.Prefix()
}
// SetPrefix sets the prefix for the logger.
func (l *Logger) SetPrefix(prefix string) {
l.mu.Lock()
defer l.mu.Unlock()
l.prefix = prefix
}
// SetPrefix sets the prefix of the standard logger of the package.
func SetPrefix(prefix string) {
std.SetPrefix(prefix)
}
// SetOutput sets the output of the logger.
func (l *Logger) SetOutput(w io.Writer) {
l.mu.Lock()
defer l.mu.Unlock()
l.out = w
}
// SetOutput sets the output for the standard logger of the package.
func SetOutput(w io.Writer) {
std.SetOutput(w)
}

@ -0,0 +1,522 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"unicode"
)
// node represents a node in the binary tree.
type node struct {
// x is the search value
x uint32
// p parent node
p uint32
// l left child
l uint32
// r right child
r uint32
}
// wordLen is the number of bytes represented by the v field of a node.
const wordLen = 4
// binTree supports the identification of the next operation based on a
// binary tree.
//
// Nodes will be identified by their index into the ring buffer.
type binTree struct {
dict *encoderDict
// ring buffer of nodes
node []node
// absolute offset of the entry for the next node. Position 4
// byte larger.
hoff int64
// front position in the node ring buffer
front uint32
// index of the root node
root uint32
// current x value
x uint32
// preallocated array
data []byte
}
// null represents the nonexistent index. We can't use zero because it
// would always exist or we would need to decrease the index for each
// reference.
const null uint32 = 1<<32 - 1
// newBinTree initializes the binTree structure. The capacity defines
// the size of the buffer and defines the maximum distance for which
// matches will be found.
func newBinTree(capacity int) (t *binTree, err error) {
if capacity < 1 {
return nil, errors.New(
"newBinTree: capacity must be larger than zero")
}
if int64(capacity) >= int64(null) {
return nil, errors.New(
"newBinTree: capacity must less 2^{32}-1")
}
t = &binTree{
node: make([]node, capacity),
hoff: -int64(wordLen),
root: null,
data: make([]byte, maxMatchLen),
}
return t, nil
}
func (t *binTree) SetDict(d *encoderDict) { t.dict = d }
// WriteByte writes a single byte into the binary tree.
func (t *binTree) WriteByte(c byte) error {
t.x = (t.x << 8) | uint32(c)
t.hoff++
if t.hoff < 0 {
return nil
}
v := t.front
if int64(v) < t.hoff {
// We are overwriting old nodes stored in the tree.
t.remove(v)
}
t.node[v].x = t.x
t.add(v)
t.front++
if int64(t.front) >= int64(len(t.node)) {
t.front = 0
}
return nil
}
// Writes writes a sequence of bytes into the binTree structure.
func (t *binTree) Write(p []byte) (n int, err error) {
for _, c := range p {
t.WriteByte(c)
}
return len(p), nil
}
// add puts the node v into the tree. The node must not be part of the
// tree before.
func (t *binTree) add(v uint32) {
vn := &t.node[v]
// Set left and right to null indices.
vn.l, vn.r = null, null
// If the binary tree is empty make v the root.
if t.root == null {
t.root = v
vn.p = null
return
}
x := vn.x
p := t.root
// Search for the right leave link and add the new node.
for {
pn := &t.node[p]
if x <= pn.x {
if pn.l == null {
pn.l = v
vn.p = p
return
}
p = pn.l
} else {
if pn.r == null {
pn.r = v
vn.p = p
return
}
p = pn.r
}
}
}
// parent returns the parent node index of v and the pointer to v value
// in the parent.
func (t *binTree) parent(v uint32) (p uint32, ptr *uint32) {
if t.root == v {
return null, &t.root
}
p = t.node[v].p
if t.node[p].l == v {
ptr = &t.node[p].l
} else {
ptr = &t.node[p].r
}
return
}
// Remove node v.
func (t *binTree) remove(v uint32) {
vn := &t.node[v]
p, ptr := t.parent(v)
l, r := vn.l, vn.r
if l == null {
// Move the right child up.
*ptr = r
if r != null {
t.node[r].p = p
}
return
}
if r == null {
// Move the left child up.
*ptr = l
t.node[l].p = p
return
}
// Search the in-order predecessor u.
un := &t.node[l]
ur := un.r
if ur == null {
// In order predecessor is l. Move it up.
un.r = r
t.node[r].p = l
un.p = p
*ptr = l
return
}
var u uint32
for {
// Look for the max value in the tree where l is root.
u = ur
ur = t.node[u].r
if ur == null {
break
}
}
// replace u with ul
un = &t.node[u]
ul := un.l
up := un.p
t.node[up].r = ul
if ul != null {
t.node[ul].p = up
}
// replace v by u
un.l, un.r = l, r
t.node[l].p = u
t.node[r].p = u
*ptr = u
un.p = p
}
// search looks for the node that have the value x or for the nodes that
// brace it. The node highest in the tree with the value x will be
// returned. All other nodes with the same value live in left subtree of
// the returned node.
func (t *binTree) search(v uint32, x uint32) (a, b uint32) {
a, b = null, null
if v == null {
return
}
for {
vn := &t.node[v]
if x <= vn.x {
if x == vn.x {
return v, v
}
b = v
if vn.l == null {
return
}
v = vn.l
} else {
a = v
if vn.r == null {
return
}
v = vn.r
}
}
}
// max returns the node with maximum value in the subtree with v as
// root.
func (t *binTree) max(v uint32) uint32 {
if v == null {
return null
}
for {
r := t.node[v].r
if r == null {
return v
}
v = r
}
}
// min returns the node with the minimum value in the subtree with v as
// root.
func (t *binTree) min(v uint32) uint32 {
if v == null {
return null
}
for {
l := t.node[v].l
if l == null {
return v
}
v = l
}
}
// pred returns the in-order predecessor of node v.
func (t *binTree) pred(v uint32) uint32 {
if v == null {
return null
}
u := t.max(t.node[v].l)
if u != null {
return u
}
for {
p := t.node[v].p
if p == null {
return null
}
if t.node[p].r == v {
return p
}
v = p
}
}
// succ returns the in-order successor of node v.
func (t *binTree) succ(v uint32) uint32 {
if v == null {
return null
}
u := t.min(t.node[v].r)
if u != null {
return u
}
for {
p := t.node[v].p
if p == null {
return null
}
if t.node[p].l == v {
return p
}
v = p
}
}
// xval converts the first four bytes of a into an 32-bit unsigned
// integer in big-endian order.
func xval(a []byte) uint32 {
var x uint32
switch len(a) {
default:
x |= uint32(a[3])
fallthrough
case 3:
x |= uint32(a[2]) << 8
fallthrough
case 2:
x |= uint32(a[1]) << 16
fallthrough
case 1:
x |= uint32(a[0]) << 24
case 0:
}
return x
}
// dumpX converts value x into a four-letter string.
func dumpX(x uint32) string {
a := make([]byte, 4)
for i := 0; i < 4; i++ {
c := byte(x >> uint((3-i)*8))
if unicode.IsGraphic(rune(c)) {
a[i] = c
} else {
a[i] = '.'
}
}
return string(a)
}
/*
// dumpNode writes a representation of the node v into the io.Writer.
func (t *binTree) dumpNode(w io.Writer, v uint32, indent int) {
if v == null {
return
}
vn := &t.node[v]
t.dumpNode(w, vn.r, indent+2)
for i := 0; i < indent; i++ {
fmt.Fprint(w, " ")
}
if vn.p == null {
fmt.Fprintf(w, "node %d %q parent null\n", v, dumpX(vn.x))
} else {
fmt.Fprintf(w, "node %d %q parent %d\n", v, dumpX(vn.x), vn.p)
}
t.dumpNode(w, vn.l, indent+2)
}
// dump prints a representation of the binary tree into the writer.
func (t *binTree) dump(w io.Writer) error {
bw := bufio.NewWriter(w)
t.dumpNode(bw, t.root, 0)
return bw.Flush()
}
*/
func (t *binTree) distance(v uint32) int {
dist := int(t.front) - int(v)
if dist <= 0 {
dist += len(t.node)
}
return dist
}
type matchParams struct {
rep [4]uint32
// length when match will be accepted
nAccept int
// nodes to check
check int
// finish if length get shorter
stopShorter bool
}
func (t *binTree) match(m match, distIter func() (int, bool), p matchParams,
) (r match, checked int, accepted bool) {
buf := &t.dict.buf
for {
if checked >= p.check {
return m, checked, true
}
dist, ok := distIter()
if !ok {
return m, checked, false
}
checked++
if m.n > 0 {
i := buf.rear - dist + m.n - 1
if i < 0 {
i += len(buf.data)
} else if i >= len(buf.data) {
i -= len(buf.data)
}
if buf.data[i] != t.data[m.n-1] {
if p.stopShorter {
return m, checked, false
}
continue
}
}
n := buf.matchLen(dist, t.data)
switch n {
case 0:
if p.stopShorter {
return m, checked, false
}
continue
case 1:
if uint32(dist-minDistance) != p.rep[0] {
continue
}
}
if n < m.n || (n == m.n && int64(dist) >= m.distance) {
continue
}
m = match{int64(dist), n}
if n >= p.nAccept {
return m, checked, true
}
}
}
func (t *binTree) NextOp(rep [4]uint32) operation {
// retrieve maxMatchLen data
n, _ := t.dict.buf.Peek(t.data[:maxMatchLen])
if n == 0 {
panic("no data in buffer")
}
t.data = t.data[:n]
var (
m match
x, u, v uint32
iterPred, iterSucc func() (int, bool)
)
p := matchParams{
rep: rep,
nAccept: maxMatchLen,
check: 32,
}
i := 4
iterSmall := func() (dist int, ok bool) {
i--
if i <= 0 {
return 0, false
}
return i, true
}
m, checked, accepted := t.match(m, iterSmall, p)
if accepted {
goto end
}
p.check -= checked
x = xval(t.data)
u, v = t.search(t.root, x)
if u == v && len(t.data) == 4 {
iter := func() (dist int, ok bool) {
if u == null {
return 0, false
}
dist = t.distance(u)
u, v = t.search(t.node[u].l, x)
if u != v {
u = null
}
return dist, true
}
m, _, _ = t.match(m, iter, p)
goto end
}
p.stopShorter = true
iterSucc = func() (dist int, ok bool) {
if v == null {
return 0, false
}
dist = t.distance(v)
v = t.succ(v)
return dist, true
}
m, checked, accepted = t.match(m, iterSucc, p)
if accepted {
goto end
}
p.check -= checked
iterPred = func() (dist int, ok bool) {
if u == null {
return 0, false
}
dist = t.distance(u)
u = t.pred(u)
return dist, true
}
m, _, _ = t.match(m, iterPred, p)
end:
if m.n == 0 {
return lit{t.data[0]}
}
return m
}

@ -0,0 +1,47 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
/* Naming conventions follows the CodeReviewComments in the Go Wiki. */
// ntz32Const is used by the functions NTZ and NLZ.
const ntz32Const = 0x04d7651f
// ntz32Table is a helper table for de Bruijn algorithm by Danny Dubé.
// See Henry S. Warren, Jr. "Hacker's Delight" section 5-1 figure 5-26.
var ntz32Table = [32]int8{
0, 1, 2, 24, 3, 19, 6, 25,
22, 4, 20, 10, 16, 7, 12, 26,
31, 23, 18, 5, 21, 9, 15, 11,
30, 17, 8, 14, 29, 13, 28, 27,
}
/*
// ntz32 computes the number of trailing zeros for an unsigned 32-bit integer.
func ntz32(x uint32) int {
if x == 0 {
return 32
}
x = (x & -x) * ntz32Const
return int(ntz32Table[x>>27])
}
*/
// nlz32 computes the number of leading zeros for an unsigned 32-bit integer.
func nlz32(x uint32) int {
// Smear left most bit to the right
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
// Use ntz mechanism to calculate nlz.
x++
if x == 0 {
return 0
}
x *= ntz32Const
return 32 - int(ntz32Table[x>>27])
}

@ -0,0 +1,39 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"io"
)
// breader provides the ReadByte function for a Reader. It doesn't read
// more data from the reader than absolutely necessary.
type breader struct {
io.Reader
// helper slice to save allocations
p []byte
}
// ByteReader converts an io.Reader into an io.ByteReader.
func ByteReader(r io.Reader) io.ByteReader {
br, ok := r.(io.ByteReader)
if !ok {
return &breader{r, make([]byte, 1)}
}
return br
}
// ReadByte read byte function.
func (r *breader) ReadByte() (c byte, err error) {
n, err := r.Reader.Read(r.p)
if n < 1 {
if err == nil {
err = errors.New("breader.ReadByte: no data")
}
return 0, err
}
return r.p[0], nil
}

@ -0,0 +1,171 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
)
// buffer provides a circular buffer of bytes. If the front index equals
// the rear index the buffer is empty. As a consequence front cannot be
// equal rear for a full buffer. So a full buffer has a length that is
// one byte less the the length of the data slice.
type buffer struct {
data []byte
front int
rear int
}
// newBuffer creates a buffer with the given size.
func newBuffer(size int) *buffer {
return &buffer{data: make([]byte, size+1)}
}
// Cap returns the capacity of the buffer.
func (b *buffer) Cap() int {
return len(b.data) - 1
}
// Resets the buffer. The front and rear index are set to zero.
func (b *buffer) Reset() {
b.front = 0
b.rear = 0
}
// Buffered returns the number of bytes buffered.
func (b *buffer) Buffered() int {
delta := b.front - b.rear
if delta < 0 {
delta += len(b.data)
}
return delta
}
// Available returns the number of bytes available for writing.
func (b *buffer) Available() int {
delta := b.rear - 1 - b.front
if delta < 0 {
delta += len(b.data)
}
return delta
}
// addIndex adds a non-negative integer to the index i and returns the
// resulting index. The function takes care of wrapping the index as
// well as potential overflow situations.
func (b *buffer) addIndex(i int, n int) int {
// subtraction of len(b.data) prevents overflow
i += n - len(b.data)
if i < 0 {
i += len(b.data)
}
return i
}
// Read reads bytes from the buffer into p and returns the number of
// bytes read. The function never returns an error but might return less
// data than requested.
func (b *buffer) Read(p []byte) (n int, err error) {
n, err = b.Peek(p)
b.rear = b.addIndex(b.rear, n)
return n, err
}
// Peek reads bytes from the buffer into p without changing the buffer.
// Peek will never return an error but might return less data than
// requested.
func (b *buffer) Peek(p []byte) (n int, err error) {
m := b.Buffered()
n = len(p)
if m < n {
n = m
p = p[:n]
}
k := copy(p, b.data[b.rear:])
if k < n {
copy(p[k:], b.data)
}
return n, nil
}
// Discard skips the n next bytes to read from the buffer, returning the
// bytes discarded.
//
// If Discards skips fewer than n bytes, it returns an error.
func (b *buffer) Discard(n int) (discarded int, err error) {
if n < 0 {
return 0, errors.New("buffer.Discard: negative argument")
}
m := b.Buffered()
if m < n {
n = m
err = errors.New(
"buffer.Discard: discarded less bytes then requested")
}
b.rear = b.addIndex(b.rear, n)
return n, err
}
// ErrNoSpace indicates that there is insufficient space for the Write
// operation.
var ErrNoSpace = errors.New("insufficient space")
// Write puts data into the buffer. If less bytes are written than
// requested ErrNoSpace is returned.
func (b *buffer) Write(p []byte) (n int, err error) {
m := b.Available()
n = len(p)
if m < n {
n = m
p = p[:m]
err = ErrNoSpace
}
k := copy(b.data[b.front:], p)
if k < n {
copy(b.data, p[k:])
}
b.front = b.addIndex(b.front, n)
return n, err
}
// WriteByte writes a single byte into the buffer. The error ErrNoSpace
// is returned if no single byte is available in the buffer for writing.
func (b *buffer) WriteByte(c byte) error {
if b.Available() < 1 {
return ErrNoSpace
}
b.data[b.front] = c
b.front = b.addIndex(b.front, 1)
return nil
}
// prefixLen returns the length of the common prefix of a and b.
func prefixLen(a, b []byte) int {
if len(a) > len(b) {
a, b = b, a
}
for i, c := range a {
if b[i] != c {
return i
}
}
return len(a)
}
// matchLen returns the length of the common prefix for the given
// distance from the rear and the byte slice p.
func (b *buffer) matchLen(distance int, p []byte) int {
var n int
i := b.rear - distance
if i < 0 {
if n = prefixLen(p, b.data[len(b.data)+i:]); n < -i {
return n
}
p = p[n:]
i = 0
}
n += prefixLen(p, b.data[i:])
return n
}

@ -0,0 +1,37 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"io"
)
// ErrLimit indicates that the limit of the LimitedByteWriter has been
// reached.
var ErrLimit = errors.New("limit reached")
// LimitedByteWriter provides a byte writer that can be written until a
// limit is reached. The field N provides the number of remaining
// bytes.
type LimitedByteWriter struct {
BW io.ByteWriter
N int64
}
// WriteByte writes a single byte to the limited byte writer. It returns
// ErrLimit if the limit has been reached. If the byte is successfully
// written the field N of the LimitedByteWriter will be decremented by
// one.
func (l *LimitedByteWriter) WriteByte(c byte) error {
if l.N <= 0 {
return ErrLimit
}
if err := l.BW.WriteByte(c); err != nil {
return err
}
l.N--
return nil
}

@ -0,0 +1,277 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
"io"
)
// decoder decodes a raw LZMA stream without any header.
type decoder struct {
// dictionary; the rear pointer of the buffer will be used for
// reading the data.
Dict *decoderDict
// decoder state
State *state
// range decoder
rd *rangeDecoder
// start stores the head value of the dictionary for the LZMA
// stream
start int64
// size of uncompressed data
size int64
// end-of-stream encountered
eos bool
// EOS marker found
eosMarker bool
}
// newDecoder creates a new decoder instance. The parameter size provides
// the expected byte size of the decompressed data. If the size is
// unknown use a negative value. In that case the decoder will look for
// a terminating end-of-stream marker.
func newDecoder(br io.ByteReader, state *state, dict *decoderDict, size int64) (d *decoder, err error) {
rd, err := newRangeDecoder(br)
if err != nil {
return nil, err
}
d = &decoder{
State: state,
Dict: dict,
rd: rd,
size: size,
start: dict.pos(),
}
return d, nil
}
// Reopen restarts the decoder with a new byte reader and a new size. Reopen
// resets the Decompressed counter to zero.
func (d *decoder) Reopen(br io.ByteReader, size int64) error {
var err error
if d.rd, err = newRangeDecoder(br); err != nil {
return err
}
d.start = d.Dict.pos()
d.size = size
d.eos = false
return nil
}
// decodeLiteral decodes a single literal from the LZMA stream.
func (d *decoder) decodeLiteral() (op operation, err error) {
litState := d.State.litState(d.Dict.byteAt(1), d.Dict.head)
match := d.Dict.byteAt(int(d.State.rep[0]) + 1)
s, err := d.State.litCodec.Decode(d.rd, d.State.state, match, litState)
if err != nil {
return nil, err
}
return lit{s}, nil
}
// errEOS indicates that an EOS marker has been found.
var errEOS = errors.New("EOS marker found")
// readOp decodes the next operation from the compressed stream. It
// returns the operation. If an explicit end of stream marker is
// identified the eos error is returned.
func (d *decoder) readOp() (op operation, err error) {
// Value of the end of stream (EOS) marker
const eosDist = 1<<32 - 1
state, state2, posState := d.State.states(d.Dict.head)
b, err := d.State.isMatch[state2].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
// literal
op, err := d.decodeLiteral()
if err != nil {
return nil, err
}
d.State.updateStateLiteral()
return op, nil
}
b, err = d.State.isRep[state].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
// simple match
d.State.rep[3], d.State.rep[2], d.State.rep[1] =
d.State.rep[2], d.State.rep[1], d.State.rep[0]
d.State.updateStateMatch()
// The length decoder returns the length offset.
n, err := d.State.lenCodec.Decode(d.rd, posState)
if err != nil {
return nil, err
}
// The dist decoder returns the distance offset. The actual
// distance is 1 higher.
d.State.rep[0], err = d.State.distCodec.Decode(d.rd, n)
if err != nil {
return nil, err
}
if d.State.rep[0] == eosDist {
d.eosMarker = true
return nil, errEOS
}
op = match{n: int(n) + minMatchLen,
distance: int64(d.State.rep[0]) + minDistance}
return op, nil
}
b, err = d.State.isRepG0[state].Decode(d.rd)
if err != nil {
return nil, err
}
dist := d.State.rep[0]
if b == 0 {
// rep match 0
b, err = d.State.isRepG0Long[state2].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
d.State.updateStateShortRep()
op = match{n: 1, distance: int64(dist) + minDistance}
return op, nil
}
} else {
b, err = d.State.isRepG1[state].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
dist = d.State.rep[1]
} else {
b, err = d.State.isRepG2[state].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
dist = d.State.rep[2]
} else {
dist = d.State.rep[3]
d.State.rep[3] = d.State.rep[2]
}
d.State.rep[2] = d.State.rep[1]
}
d.State.rep[1] = d.State.rep[0]
d.State.rep[0] = dist
}
n, err := d.State.repLenCodec.Decode(d.rd, posState)
if err != nil {
return nil, err
}
d.State.updateStateRep()
op = match{n: int(n) + minMatchLen, distance: int64(dist) + minDistance}
return op, nil
}
// apply takes the operation and transforms the decoder dictionary accordingly.
func (d *decoder) apply(op operation) error {
var err error
switch x := op.(type) {
case match:
err = d.Dict.writeMatch(x.distance, x.n)
case lit:
err = d.Dict.WriteByte(x.b)
default:
panic("op is neither a match nor a literal")
}
return err
}
// decompress fills the dictionary unless no space for new data is
// available. If the end of the LZMA stream has been reached io.EOF will
// be returned.
func (d *decoder) decompress() error {
if d.eos {
return io.EOF
}
for d.Dict.Available() >= maxMatchLen {
op, err := d.readOp()
switch err {
case nil:
// break
case errEOS:
d.eos = true
if !d.rd.possiblyAtEnd() {
return errDataAfterEOS
}
if d.size >= 0 && d.size != d.Decompressed() {
return errSize
}
return io.EOF
case io.EOF:
d.eos = true
return io.ErrUnexpectedEOF
default:
return err
}
if err = d.apply(op); err != nil {
return err
}
if d.size >= 0 && d.Decompressed() >= d.size {
d.eos = true
if d.Decompressed() > d.size {
return errSize
}
if !d.rd.possiblyAtEnd() {
switch _, err = d.readOp(); err {
case nil:
return errSize
case io.EOF:
return io.ErrUnexpectedEOF
case errEOS:
break
default:
return err
}
}
return io.EOF
}
}
return nil
}
// Errors that may be returned while decoding data.
var (
errDataAfterEOS = errors.New("lzma: data after end of stream marker")
errSize = errors.New("lzma: wrong uncompressed data size")
)
// Read reads data from the buffer. If no more data is available io.EOF is
// returned.
func (d *decoder) Read(p []byte) (n int, err error) {
var k int
for {
// Read of decoder dict never returns an error.
k, err = d.Dict.Read(p[n:])
if err != nil {
panic(fmt.Errorf("dictionary read error %s", err))
}
if k == 0 && d.eos {
return n, io.EOF
}
n += k
if n >= len(p) {
return n, nil
}
if err = d.decompress(); err != nil && err != io.EOF {
return n, err
}
}
}
// Decompressed returns the number of bytes decompressed by the decoder.
func (d *decoder) Decompressed() int64 {
return d.Dict.pos() - d.start
}

@ -0,0 +1,128 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
)
// decoderDict provides the dictionary for the decoder. The whole
// dictionary is used as reader buffer.
type decoderDict struct {
buf buffer
head int64
}
// newDecoderDict creates a new decoder dictionary. The whole dictionary
// will be used as reader buffer.
func newDecoderDict(dictCap int) (d *decoderDict, err error) {
// lower limit supports easy test cases
if !(1 <= dictCap && int64(dictCap) <= MaxDictCap) {
return nil, errors.New("lzma: dictCap out of range")
}
d = &decoderDict{buf: *newBuffer(dictCap)}
return d, nil
}
// Reset clears the dictionary. The read buffer is not changed, so the
// buffered data can still be read.
func (d *decoderDict) Reset() {
d.head = 0
}
// WriteByte writes a single byte into the dictionary. It is used to
// write literals into the dictionary.
func (d *decoderDict) WriteByte(c byte) error {
if err := d.buf.WriteByte(c); err != nil {
return err
}
d.head++
return nil
}
// pos returns the position of the dictionary head.
func (d *decoderDict) pos() int64 { return d.head }
// dictLen returns the actual length of the dictionary.
func (d *decoderDict) dictLen() int {
capacity := d.buf.Cap()
if d.head >= int64(capacity) {
return capacity
}
return int(d.head)
}
// byteAt returns a byte stored in the dictionary. If the distance is
// non-positive or exceeds the current length of the dictionary the zero
// byte is returned.
func (d *decoderDict) byteAt(dist int) byte {
if !(0 < dist && dist <= d.dictLen()) {
return 0
}
i := d.buf.front - dist
if i < 0 {
i += len(d.buf.data)
}
return d.buf.data[i]
}
// writeMatch writes the match at the top of the dictionary. The given
// distance must point in the current dictionary and the length must not
// exceed the maximum length 273 supported in LZMA.
//
// The error value ErrNoSpace indicates that no space is available in
// the dictionary for writing. You need to read from the dictionary
// first.
func (d *decoderDict) writeMatch(dist int64, length int) error {
if !(0 < dist && dist <= int64(d.dictLen())) {
return errors.New("writeMatch: distance out of range")
}
if !(0 < length && length <= maxMatchLen) {
return errors.New("writeMatch: length out of range")
}
if length > d.buf.Available() {
return ErrNoSpace
}
d.head += int64(length)
i := d.buf.front - int(dist)
if i < 0 {
i += len(d.buf.data)
}
for length > 0 {
var p []byte
if i >= d.buf.front {
p = d.buf.data[i:]
i = 0
} else {
p = d.buf.data[i:d.buf.front]
i = d.buf.front
}
if len(p) > length {
p = p[:length]
}
if _, err := d.buf.Write(p); err != nil {
panic(fmt.Errorf("d.buf.Write returned error %s", err))
}
length -= len(p)
}
return nil
}
// Write writes the given bytes into the dictionary and advances the
// head.
func (d *decoderDict) Write(p []byte) (n int, err error) {
n, err = d.buf.Write(p)
d.head += int64(n)
return n, err
}
// Available returns the number of available bytes for writing into the
// decoder dictionary.
func (d *decoderDict) Available() int { return d.buf.Available() }
// Read reads data from the buffer contained in the decoder dictionary.
func (d *decoderDict) Read(p []byte) (n int, err error) { return d.buf.Read(p) }

@ -0,0 +1,38 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
// directCodec allows the encoding and decoding of values with a fixed number
// of bits. The number of bits must be in the range [1,32].
type directCodec byte
// Bits returns the number of bits supported by this codec.
func (dc directCodec) Bits() int {
return int(dc)
}
// Encode uses the range encoder to encode a value with the fixed number of
// bits. The most-significant bit is encoded first.
func (dc directCodec) Encode(e *rangeEncoder, v uint32) error {
for i := int(dc) - 1; i >= 0; i-- {
if err := e.DirectEncodeBit(v >> uint(i)); err != nil {
return err
}
}
return nil
}
// Decode uses the range decoder to decode a value with the given number of
// given bits. The most-significant bit is decoded first.
func (dc directCodec) Decode(d *rangeDecoder) (v uint32, err error) {
for i := int(dc) - 1; i >= 0; i-- {
x, err := d.DirectDecodeBit()
if err != nil {
return 0, err
}
v = (v << 1) | x
}
return v, nil
}

@ -0,0 +1,140 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
// Constants used by the distance codec.
const (
// minimum supported distance
minDistance = 1
// maximum supported distance, value is used for the eos marker.
maxDistance = 1 << 32
// number of the supported len states
lenStates = 4
// start for the position models
startPosModel = 4
// first index with align bits support
endPosModel = 14
// bits for the position slots
posSlotBits = 6
// number of align bits
alignBits = 4
)
// distCodec provides encoding and decoding of distance values.
type distCodec struct {
posSlotCodecs [lenStates]treeCodec
posModel [endPosModel - startPosModel]treeReverseCodec
alignCodec treeReverseCodec
}
// deepcopy initializes dc as deep copy of the source.
func (dc *distCodec) deepcopy(src *distCodec) {
if dc == src {
return
}
for i := range dc.posSlotCodecs {
dc.posSlotCodecs[i].deepcopy(&src.posSlotCodecs[i])
}
for i := range dc.posModel {
dc.posModel[i].deepcopy(&src.posModel[i])
}
dc.alignCodec.deepcopy(&src.alignCodec)
}
// newDistCodec creates a new distance codec.
func (dc *distCodec) init() {
for i := range dc.posSlotCodecs {
dc.posSlotCodecs[i] = makeTreeCodec(posSlotBits)
}
for i := range dc.posModel {
posSlot := startPosModel + i
bits := (posSlot >> 1) - 1
dc.posModel[i] = makeTreeReverseCodec(bits)
}
dc.alignCodec = makeTreeReverseCodec(alignBits)
}
// lenState converts the value l to a supported lenState value.
func lenState(l uint32) uint32 {
if l >= lenStates {
l = lenStates - 1
}
return l
}
// Encode encodes the distance using the parameter l. Dist can have values from
// the full range of uint32 values. To get the distance offset the actual match
// distance has to be decreased by 1. A distance offset of 0xffffffff (eos)
// indicates the end of the stream.
func (dc *distCodec) Encode(e *rangeEncoder, dist uint32, l uint32) (err error) {
// Compute the posSlot using nlz32
var posSlot uint32
var bits uint32
if dist < startPosModel {
posSlot = dist
} else {
bits = uint32(30 - nlz32(dist))
posSlot = startPosModel - 2 + (bits << 1)
posSlot += (dist >> uint(bits)) & 1
}
if err = dc.posSlotCodecs[lenState(l)].Encode(e, posSlot); err != nil {
return
}
switch {
case posSlot < startPosModel:
return nil
case posSlot < endPosModel:
tc := &dc.posModel[posSlot-startPosModel]
return tc.Encode(dist, e)
}
dic := directCodec(bits - alignBits)
if err = dic.Encode(e, dist>>alignBits); err != nil {
return
}
return dc.alignCodec.Encode(dist, e)
}
// Decode decodes the distance offset using the parameter l. The dist value
// 0xffffffff (eos) indicates the end of the stream. Add one to the distance
// offset to get the actual match distance.
func (dc *distCodec) Decode(d *rangeDecoder, l uint32) (dist uint32, err error) {
posSlot, err := dc.posSlotCodecs[lenState(l)].Decode(d)
if err != nil {
return
}
// posSlot equals distance
if posSlot < startPosModel {
return posSlot, nil
}
// posSlot uses the individual models
bits := (posSlot >> 1) - 1
dist = (2 | (posSlot & 1)) << bits
var u uint32
if posSlot < endPosModel {
tc := &dc.posModel[posSlot-startPosModel]
if u, err = tc.Decode(d); err != nil {
return 0, err
}
dist += u
return dist, nil
}
// posSlots use direct encoding and a single model for the four align
// bits.
dic := directCodec(bits - alignBits)
if u, err = dic.Decode(d); err != nil {
return 0, err
}
dist += u << alignBits
if u, err = dc.alignCodec.Decode(d); err != nil {
return 0, err
}
dist += u
return dist, nil
}

@ -0,0 +1,268 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"fmt"
"io"
)
// opLenMargin provides the upper limit of the number of bytes required
// to encode a single operation.
const opLenMargin = 16
// compressFlags control the compression process.
type compressFlags uint32
// Values for compressFlags.
const (
// all data should be compressed, even if compression is not
// optimal.
all compressFlags = 1 << iota
)
// encoderFlags provide the flags for an encoder.
type encoderFlags uint32
// Flags for the encoder.
const (
// eosMarker requests an EOS marker to be written.
eosMarker encoderFlags = 1 << iota
)
// Encoder compresses data buffered in the encoder dictionary and writes
// it into a byte writer.
type encoder struct {
dict *encoderDict
state *state
re *rangeEncoder
start int64
// generate eos marker
marker bool
limit bool
margin int
}
// newEncoder creates a new encoder. If the byte writer must be
// limited use LimitedByteWriter provided by this package. The flags
// argument supports the eosMarker flag, controlling whether a
// terminating end-of-stream marker must be written.
func newEncoder(bw io.ByteWriter, state *state, dict *encoderDict,
flags encoderFlags) (e *encoder, err error) {
re, err := newRangeEncoder(bw)
if err != nil {
return nil, err
}
e = &encoder{
dict: dict,
state: state,
re: re,
marker: flags&eosMarker != 0,
start: dict.Pos(),
margin: opLenMargin,
}
if e.marker {
e.margin += 5
}
return e, nil
}
// Write writes the bytes from p into the dictionary. If not enough
// space is available the data in the dictionary buffer will be
// compressed to make additional space available. If the limit of the
// underlying writer has been reached ErrLimit will be returned.
func (e *encoder) Write(p []byte) (n int, err error) {
for {
k, err := e.dict.Write(p[n:])
n += k
if err == ErrNoSpace {
if err = e.compress(0); err != nil {
return n, err
}
continue
}
return n, err
}
}
// Reopen reopens the encoder with a new byte writer.
func (e *encoder) Reopen(bw io.ByteWriter) error {
var err error
if e.re, err = newRangeEncoder(bw); err != nil {
return err
}
e.start = e.dict.Pos()
e.limit = false
return nil
}
// writeLiteral writes a literal into the LZMA stream
func (e *encoder) writeLiteral(l lit) error {
var err error
state, state2, _ := e.state.states(e.dict.Pos())
if err = e.state.isMatch[state2].Encode(e.re, 0); err != nil {
return err
}
litState := e.state.litState(e.dict.ByteAt(1), e.dict.Pos())
match := e.dict.ByteAt(int(e.state.rep[0]) + 1)
err = e.state.litCodec.Encode(e.re, l.b, state, match, litState)
if err != nil {
return err
}
e.state.updateStateLiteral()
return nil
}
// iverson implements the Iverson operator as proposed by Donald Knuth in his
// book Concrete Mathematics.
func iverson(ok bool) uint32 {
if ok {
return 1
}
return 0
}
// writeMatch writes a repetition operation into the operation stream
func (e *encoder) writeMatch(m match) error {
var err error
if !(minDistance <= m.distance && m.distance <= maxDistance) {
panic(fmt.Errorf("match distance %d out of range", m.distance))
}
dist := uint32(m.distance - minDistance)
if !(minMatchLen <= m.n && m.n <= maxMatchLen) &&
!(dist == e.state.rep[0] && m.n == 1) {
panic(fmt.Errorf(
"match length %d out of range; dist %d rep[0] %d",
m.n, dist, e.state.rep[0]))
}
state, state2, posState := e.state.states(e.dict.Pos())
if err = e.state.isMatch[state2].Encode(e.re, 1); err != nil {
return err
}
g := 0
for ; g < 4; g++ {
if e.state.rep[g] == dist {
break
}
}
b := iverson(g < 4)
if err = e.state.isRep[state].Encode(e.re, b); err != nil {
return err
}
n := uint32(m.n - minMatchLen)
if b == 0 {
// simple match
e.state.rep[3], e.state.rep[2], e.state.rep[1], e.state.rep[0] =
e.state.rep[2], e.state.rep[1], e.state.rep[0], dist
e.state.updateStateMatch()
if err = e.state.lenCodec.Encode(e.re, n, posState); err != nil {
return err
}
return e.state.distCodec.Encode(e.re, dist, n)
}
b = iverson(g != 0)
if err = e.state.isRepG0[state].Encode(e.re, b); err != nil {
return err
}
if b == 0 {
// g == 0
b = iverson(m.n != 1)
if err = e.state.isRepG0Long[state2].Encode(e.re, b); err != nil {
return err
}
if b == 0 {
e.state.updateStateShortRep()
return nil
}
} else {
// g in {1,2,3}
b = iverson(g != 1)
if err = e.state.isRepG1[state].Encode(e.re, b); err != nil {
return err
}
if b == 1 {
// g in {2,3}
b = iverson(g != 2)
err = e.state.isRepG2[state].Encode(e.re, b)
if err != nil {
return err
}
if b == 1 {
e.state.rep[3] = e.state.rep[2]
}
e.state.rep[2] = e.state.rep[1]
}
e.state.rep[1] = e.state.rep[0]
e.state.rep[0] = dist
}
e.state.updateStateRep()
return e.state.repLenCodec.Encode(e.re, n, posState)
}
// writeOp writes a single operation to the range encoder. The function
// checks whether there is enough space available to close the LZMA
// stream.
func (e *encoder) writeOp(op operation) error {
if e.re.Available() < int64(e.margin) {
return ErrLimit
}
switch x := op.(type) {
case lit:
return e.writeLiteral(x)
case match:
return e.writeMatch(x)
default:
panic("unexpected operation")
}
}
// compress compressed data from the dictionary buffer. If the flag all
// is set, all data in the dictionary buffer will be compressed. The
// function returns ErrLimit if the underlying writer has reached its
// limit.
func (e *encoder) compress(flags compressFlags) error {
n := 0
if flags&all == 0 {
n = maxMatchLen - 1
}
d := e.dict
m := d.m
for d.Buffered() > n {
op := m.NextOp(e.state.rep)
if err := e.writeOp(op); err != nil {
return err
}
d.Discard(op.Len())
}
return nil
}
// eosMatch is a pseudo operation that indicates the end of the stream.
var eosMatch = match{distance: maxDistance, n: minMatchLen}
// Close terminates the LZMA stream. If requested the end-of-stream
// marker will be written. If the byte writer limit has been or will be
// reached during compression of the remaining data in the buffer the
// LZMA stream will be closed and data will remain in the buffer.
func (e *encoder) Close() error {
err := e.compress(all)
if err != nil && err != ErrLimit {
return err
}
if e.marker {
if err := e.writeMatch(eosMatch); err != nil {
return err
}
}
err = e.re.Close()
return err
}
// Compressed returns the number bytes of the input data that been
// compressed.
func (e *encoder) Compressed() int64 {
return e.dict.Pos() - e.start
}

@ -0,0 +1,149 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
"io"
)
// matcher is an interface that supports the identification of the next
// operation.
type matcher interface {
io.Writer
SetDict(d *encoderDict)
NextOp(rep [4]uint32) operation
}
// encoderDict provides the dictionary of the encoder. It includes an
// additional buffer atop of the actual dictionary.
type encoderDict struct {
buf buffer
m matcher
head int64
capacity int
// preallocated array
data [maxMatchLen]byte
}
// newEncoderDict creates the encoder dictionary. The argument bufSize
// defines the size of the additional buffer.
func newEncoderDict(dictCap, bufSize int, m matcher) (d *encoderDict, err error) {
if !(1 <= dictCap && int64(dictCap) <= MaxDictCap) {
return nil, errors.New(
"lzma: dictionary capacity out of range")
}
if bufSize < 1 {
return nil, errors.New(
"lzma: buffer size must be larger than zero")
}
d = &encoderDict{
buf: *newBuffer(dictCap + bufSize),
capacity: dictCap,
m: m,
}
m.SetDict(d)
return d, nil
}
// Discard discards n bytes. Note that n must not be larger than
// MaxMatchLen.
func (d *encoderDict) Discard(n int) {
p := d.data[:n]
k, _ := d.buf.Read(p)
if k < n {
panic(fmt.Errorf("lzma: can't discard %d bytes", n))
}
d.head += int64(n)
d.m.Write(p)
}
// Len returns the data available in the encoder dictionary.
func (d *encoderDict) Len() int {
n := d.buf.Available()
if int64(n) > d.head {
return int(d.head)
}
return n
}
// DictLen returns the actual length of data in the dictionary.
func (d *encoderDict) DictLen() int {
if d.head < int64(d.capacity) {
return int(d.head)
}
return d.capacity
}
// Available returns the number of bytes that can be written by a
// following Write call.
func (d *encoderDict) Available() int {
return d.buf.Available() - d.DictLen()
}
// Write writes data into the dictionary buffer. Note that the position
// of the dictionary head will not be moved. If there is not enough
// space in the buffer ErrNoSpace will be returned.
func (d *encoderDict) Write(p []byte) (n int, err error) {
m := d.Available()
if len(p) > m {
p = p[:m]
err = ErrNoSpace
}
var e error
if n, e = d.buf.Write(p); e != nil {
err = e
}
return n, err
}
// Pos returns the position of the head.
func (d *encoderDict) Pos() int64 { return d.head }
// ByteAt returns the byte at the given distance.
func (d *encoderDict) ByteAt(distance int) byte {
if !(0 < distance && distance <= d.Len()) {
return 0
}
i := d.buf.rear - distance
if i < 0 {
i += len(d.buf.data)
}
return d.buf.data[i]
}
// CopyN copies the last n bytes from the dictionary into the provided
// writer. This is used for copying uncompressed data into an
// uncompressed segment.
func (d *encoderDict) CopyN(w io.Writer, n int) (written int, err error) {
if n <= 0 {
return 0, nil
}
m := d.Len()
if n > m {
n = m
err = ErrNoSpace
}
i := d.buf.rear - n
var e error
if i < 0 {
i += len(d.buf.data)
if written, e = w.Write(d.buf.data[i:]); e != nil {
return written, e
}
i = 0
}
var k int
k, e = w.Write(d.buf.data[i:d.buf.rear])
written += k
if e != nil {
err = e
}
return written, err
}
// Buffered returns the number of bytes in the buffer.
func (d *encoderDict) Buffered() int { return d.buf.Buffered() }

Binary file not shown.

@ -0,0 +1,309 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
"github.com/ulikunitz/xz/internal/hash"
)
/* For compression we need to find byte sequences that match the byte
* sequence at the dictionary head. A hash table is a simple method to
* provide this capability.
*/
// maxMatches limits the number of matches requested from the Matches
// function. This controls the speed of the overall encoding.
const maxMatches = 16
// shortDists defines the number of short distances supported by the
// implementation.
const shortDists = 8
// The minimum is somehow arbitrary but the maximum is limited by the
// memory requirements of the hash table.
const (
minTableExponent = 9
maxTableExponent = 20
)
// newRoller contains the function used to create an instance of the
// hash.Roller.
var newRoller = func(n int) hash.Roller { return hash.NewCyclicPoly(n) }
// hashTable stores the hash table including the rolling hash method.
//
// We implement chained hashing into a circular buffer. Each entry in
// the circular buffer stores the delta distance to the next position with a
// word that has the same hash value.
type hashTable struct {
dict *encoderDict
// actual hash table
t []int64
// circular list data with the offset to the next word
data []uint32
front int
// mask for computing the index for the hash table
mask uint64
// hash offset; initial value is -int64(wordLen)
hoff int64
// length of the hashed word
wordLen int
// hash roller for computing the hash values for the Write
// method
wr hash.Roller
// hash roller for computing arbitrary hashes
hr hash.Roller
// preallocated slices
p [maxMatches]int64
distances [maxMatches + shortDists]int
}
// hashTableExponent derives the hash table exponent from the dictionary
// capacity.
func hashTableExponent(n uint32) int {
e := 30 - nlz32(n)
switch {
case e < minTableExponent:
e = minTableExponent
case e > maxTableExponent:
e = maxTableExponent
}
return e
}
// newHashTable creates a new hash table for words of length wordLen
func newHashTable(capacity int, wordLen int) (t *hashTable, err error) {
if !(0 < capacity) {
return nil, errors.New(
"newHashTable: capacity must not be negative")
}
exp := hashTableExponent(uint32(capacity))
if !(1 <= wordLen && wordLen <= 4) {
return nil, errors.New("newHashTable: " +
"argument wordLen out of range")
}
n := 1 << uint(exp)
if n <= 0 {
panic("newHashTable: exponent is too large")
}
t = &hashTable{
t: make([]int64, n),
data: make([]uint32, capacity),
mask: (uint64(1) << uint(exp)) - 1,
hoff: -int64(wordLen),
wordLen: wordLen,
wr: newRoller(wordLen),
hr: newRoller(wordLen),
}
return t, nil
}
func (t *hashTable) SetDict(d *encoderDict) { t.dict = d }
// buffered returns the number of bytes that are currently hashed.
func (t *hashTable) buffered() int {
n := t.hoff + 1
switch {
case n <= 0:
return 0
case n >= int64(len(t.data)):
return len(t.data)
}
return int(n)
}
// addIndex adds n to an index ensuring that is stays inside the
// circular buffer for the hash chain.
func (t *hashTable) addIndex(i, n int) int {
i += n - len(t.data)
if i < 0 {
i += len(t.data)
}
return i
}
// putDelta puts the delta instance at the current front of the circular
// chain buffer.
func (t *hashTable) putDelta(delta uint32) {
t.data[t.front] = delta
t.front = t.addIndex(t.front, 1)
}
// putEntry puts a new entry into the hash table. If there is already a
// value stored it is moved into the circular chain buffer.
func (t *hashTable) putEntry(h uint64, pos int64) {
if pos < 0 {
return
}
i := h & t.mask
old := t.t[i] - 1
t.t[i] = pos + 1
var delta int64
if old >= 0 {
delta = pos - old
if delta > 1<<32-1 || delta > int64(t.buffered()) {
delta = 0
}
}
t.putDelta(uint32(delta))
}
// WriteByte converts a single byte into a hash and puts them into the hash
// table.
func (t *hashTable) WriteByte(b byte) error {
h := t.wr.RollByte(b)
t.hoff++
t.putEntry(h, t.hoff)
return nil
}
// Write converts the bytes provided into hash tables and stores the
// abbreviated offsets into the hash table. The method will never return an
// error.
func (t *hashTable) Write(p []byte) (n int, err error) {
for _, b := range p {
// WriteByte doesn't generate an error.
t.WriteByte(b)
}
return len(p), nil
}
// getMatches the matches for a specific hash. The functions returns the
// number of positions found.
//
// TODO: Make a getDistances because that we are actually interested in.
func (t *hashTable) getMatches(h uint64, positions []int64) (n int) {
if t.hoff < 0 || len(positions) == 0 {
return 0
}
buffered := t.buffered()
tailPos := t.hoff + 1 - int64(buffered)
rear := t.front - buffered
if rear >= 0 {
rear -= len(t.data)
}
// get the slot for the hash
pos := t.t[h&t.mask] - 1
delta := pos - tailPos
for {
if delta < 0 {
return n
}
positions[n] = tailPos + delta
n++
if n >= len(positions) {
return n
}
i := rear + int(delta)
if i < 0 {
i += len(t.data)
}
u := t.data[i]
if u == 0 {
return n
}
delta -= int64(u)
}
}
// hash computes the rolling hash for the word stored in p. For correct
// results its length must be equal to t.wordLen.
func (t *hashTable) hash(p []byte) uint64 {
var h uint64
for _, b := range p {
h = t.hr.RollByte(b)
}
return h
}
// Matches fills the positions slice with potential matches. The
// functions returns the number of positions filled into positions. The
// byte slice p must have word length of the hash table.
func (t *hashTable) Matches(p []byte, positions []int64) int {
if len(p) != t.wordLen {
panic(fmt.Errorf(
"byte slice must have length %d", t.wordLen))
}
h := t.hash(p)
return t.getMatches(h, positions)
}
// NextOp identifies the next operation using the hash table.
//
// TODO: Use all repetitions to find matches.
func (t *hashTable) NextOp(rep [4]uint32) operation {
// get positions
data := t.dict.data[:maxMatchLen]
n, _ := t.dict.buf.Peek(data)
data = data[:n]
var p []int64
if n < t.wordLen {
p = t.p[:0]
} else {
p = t.p[:maxMatches]
n = t.Matches(data[:t.wordLen], p)
p = p[:n]
}
// convert positions in potential distances
head := t.dict.head
dists := append(t.distances[:0], 1, 2, 3, 4, 5, 6, 7, 8)
for _, pos := range p {
dis := int(head - pos)
if dis > shortDists {
dists = append(dists, dis)
}
}
// check distances
var m match
dictLen := t.dict.DictLen()
for _, dist := range dists {
if dist > dictLen {
continue
}
// Here comes a trick. We are only interested in matches
// that are longer than the matches we have been found
// before. So before we test the whole byte sequence at
// the given distance, we test the first byte that would
// make the match longer. If it doesn't match the byte
// to match, we don't to care any longer.
i := t.dict.buf.rear - dist + m.n
if i < 0 {
i += len(t.dict.buf.data)
}
if t.dict.buf.data[i] != data[m.n] {
// We can't get a longer match. Jump to the next
// distance.
continue
}
n := t.dict.buf.matchLen(dist, data)
switch n {
case 0:
continue
case 1:
if uint32(dist-minDistance) != rep[0] {
continue
}
}
if n > m.n {
m = match{int64(dist), n}
if n == len(data) {
// No better match will be found.
break
}
}
}
if m.n == 0 {
return lit{data[0]}
}
return m
}

@ -0,0 +1,167 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
)
// uint32LE reads an uint32 integer from a byte slice
func uint32LE(b []byte) uint32 {
x := uint32(b[3]) << 24
x |= uint32(b[2]) << 16
x |= uint32(b[1]) << 8
x |= uint32(b[0])
return x
}
// uint64LE converts the uint64 value stored as little endian to an uint64
// value.
func uint64LE(b []byte) uint64 {
x := uint64(b[7]) << 56
x |= uint64(b[6]) << 48
x |= uint64(b[5]) << 40
x |= uint64(b[4]) << 32
x |= uint64(b[3]) << 24
x |= uint64(b[2]) << 16
x |= uint64(b[1]) << 8
x |= uint64(b[0])
return x
}
// putUint32LE puts an uint32 integer into a byte slice that must have at least
// a length of 4 bytes.
func putUint32LE(b []byte, x uint32) {
b[0] = byte(x)
b[1] = byte(x >> 8)
b[2] = byte(x >> 16)
b[3] = byte(x >> 24)
}
// putUint64LE puts the uint64 value into the byte slice as little endian
// value. The byte slice b must have at least place for 8 bytes.
func putUint64LE(b []byte, x uint64) {
b[0] = byte(x)
b[1] = byte(x >> 8)
b[2] = byte(x >> 16)
b[3] = byte(x >> 24)
b[4] = byte(x >> 32)
b[5] = byte(x >> 40)
b[6] = byte(x >> 48)
b[7] = byte(x >> 56)
}
// noHeaderSize defines the value of the length field in the LZMA header.
const noHeaderSize uint64 = 1<<64 - 1
// HeaderLen provides the length of the LZMA file header.
const HeaderLen = 13
// header represents the header of an LZMA file.
type header struct {
properties Properties
dictCap int
// uncompressed size; negative value if no size is given
size int64
}
// marshalBinary marshals the header.
func (h *header) marshalBinary() (data []byte, err error) {
if err = h.properties.verify(); err != nil {
return nil, err
}
if !(0 <= h.dictCap && int64(h.dictCap) <= MaxDictCap) {
return nil, fmt.Errorf("lzma: DictCap %d out of range",
h.dictCap)
}
data = make([]byte, 13)
// property byte
data[0] = h.properties.Code()
// dictionary capacity
putUint32LE(data[1:5], uint32(h.dictCap))
// uncompressed size
var s uint64
if h.size > 0 {
s = uint64(h.size)
} else {
s = noHeaderSize
}
putUint64LE(data[5:], s)
return data, nil
}
// unmarshalBinary unmarshals the header.
func (h *header) unmarshalBinary(data []byte) error {
if len(data) != HeaderLen {
return errors.New("lzma.unmarshalBinary: data has wrong length")
}
// properties
var err error
if h.properties, err = PropertiesForCode(data[0]); err != nil {
return err
}
// dictionary capacity
h.dictCap = int(uint32LE(data[1:]))
if h.dictCap < 0 {
return errors.New(
"LZMA header: dictionary capacity exceeds maximum " +
"integer")
}
// uncompressed size
s := uint64LE(data[5:])
if s == noHeaderSize {
h.size = -1
} else {
h.size = int64(s)
if h.size < 0 {
return errors.New(
"LZMA header: uncompressed size " +
"out of int64 range")
}
}
return nil
}
// validDictCap checks whether the dictionary capacity is correct. This
// is used to weed out wrong file headers.
func validDictCap(dictcap int) bool {
if int64(dictcap) == MaxDictCap {
return true
}
for n := uint(10); n < 32; n++ {
if dictcap == 1<<n {
return true
}
if dictcap == 1<<n+1<<(n-1) {
return true
}
}
return false
}
// ValidHeader checks for a valid LZMA file header. It allows only
// dictionary sizes of 2^n or 2^n+2^(n-1) with n >= 10 or 2^32-1. If
// there is an explicit size it must not exceed 256 GiB. The length of
// the data argument must be HeaderLen.
func ValidHeader(data []byte) bool {
var h header
if err := h.unmarshalBinary(data); err != nil {
return false
}
if !validDictCap(h.dictCap) {
return false
}
return h.size < 0 || h.size <= 1<<38
}

@ -0,0 +1,398 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
"io"
)
const (
// maximum size of compressed data in a chunk
maxCompressed = 1 << 16
// maximum size of uncompressed data in a chunk
maxUncompressed = 1 << 21
)
// chunkType represents the type of an LZMA2 chunk. Note that this
// value is an internal representation and no actual encoding of a LZMA2
// chunk header.
type chunkType byte
// Possible values for the chunk type.
const (
// end of stream
cEOS chunkType = iota
// uncompressed; reset dictionary
cUD
// uncompressed; no reset of dictionary
cU
// LZMA compressed; no reset
cL
// LZMA compressed; reset state
cLR
// LZMA compressed; reset state; new property value
cLRN
// LZMA compressed; reset state; new property value; reset dictionary
cLRND
)
// chunkTypeStrings provide a string representation for the chunk types.
var chunkTypeStrings = [...]string{
cEOS: "EOS",
cU: "U",
cUD: "UD",
cL: "L",
cLR: "LR",
cLRN: "LRN",
cLRND: "LRND",
}
// String returns a string representation of the chunk type.
func (c chunkType) String() string {
if !(cEOS <= c && c <= cLRND) {
return "unknown"
}
return chunkTypeStrings[c]
}
// Actual encodings for the chunk types in the value. Note that the high
// uncompressed size bits are stored in the header byte additionally.
const (
hEOS = 0
hUD = 1
hU = 2
hL = 1 << 7
hLR = 1<<7 | 1<<5
hLRN = 1<<7 | 1<<6
hLRND = 1<<7 | 1<<6 | 1<<5
)
// errHeaderByte indicates an unsupported value for the chunk header
// byte. These bytes starts the variable-length chunk header.
var errHeaderByte = errors.New("lzma: unsupported chunk header byte")
// headerChunkType converts the header byte into a chunk type. It
// ignores the uncompressed size bits in the chunk header byte.
func headerChunkType(h byte) (c chunkType, err error) {
if h&hL == 0 {
// no compression
switch h {
case hEOS:
c = cEOS
case hUD:
c = cUD
case hU:
c = cU
default:
return 0, errHeaderByte
}
return
}
switch h & hLRND {
case hL:
c = cL
case hLR:
c = cLR
case hLRN:
c = cLRN
case hLRND:
c = cLRND
default:
return 0, errHeaderByte
}
return
}
// uncompressedHeaderLen provides the length of an uncompressed header
const uncompressedHeaderLen = 3
// headerLen returns the length of the LZMA2 header for a given chunk
// type.
func headerLen(c chunkType) int {
switch c {
case cEOS:
return 1
case cU, cUD:
return uncompressedHeaderLen
case cL, cLR:
return 5
case cLRN, cLRND:
return 6
}
panic(fmt.Errorf("unsupported chunk type %d", c))
}
// chunkHeader represents the contents of a chunk header.
type chunkHeader struct {
ctype chunkType
uncompressed uint32
compressed uint16
props Properties
}
// String returns a string representation of the chunk header.
func (h *chunkHeader) String() string {
return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed,
h.compressed, &h.props)
}
// UnmarshalBinary reads the content of the chunk header from the data
// slice. The slice must have the correct length.
func (h *chunkHeader) UnmarshalBinary(data []byte) error {
if len(data) == 0 {
return errors.New("no data")
}
c, err := headerChunkType(data[0])
if err != nil {
return err
}
n := headerLen(c)
if len(data) < n {
return errors.New("incomplete data")
}
if len(data) > n {
return errors.New("invalid data length")
}
*h = chunkHeader{ctype: c}
if c == cEOS {
return nil
}
h.uncompressed = uint32(uint16BE(data[1:3]))
if c <= cU {
return nil
}
h.uncompressed |= uint32(data[0]&^hLRND) << 16
h.compressed = uint16BE(data[3:5])
if c <= cLR {
return nil
}
h.props, err = PropertiesForCode(data[5])
return err
}
// MarshalBinary encodes the chunk header value. The function checks
// whether the content of the chunk header is correct.
func (h *chunkHeader) MarshalBinary() (data []byte, err error) {
if h.ctype > cLRND {
return nil, errors.New("invalid chunk type")
}
if err = h.props.verify(); err != nil {
return nil, err
}
data = make([]byte, headerLen(h.ctype))
switch h.ctype {
case cEOS:
return data, nil
case cUD:
data[0] = hUD
case cU:
data[0] = hU
case cL:
data[0] = hL
case cLR:
data[0] = hLR
case cLRN:
data[0] = hLRN
case cLRND:
data[0] = hLRND
}
putUint16BE(data[1:3], uint16(h.uncompressed))
if h.ctype <= cU {
return data, nil
}
data[0] |= byte(h.uncompressed>>16) &^ hLRND
putUint16BE(data[3:5], h.compressed)
if h.ctype <= cLR {
return data, nil
}
data[5] = h.props.Code()
return data, nil
}
// readChunkHeader reads the chunk header from the IO reader.
func readChunkHeader(r io.Reader) (h *chunkHeader, err error) {
p := make([]byte, 1, 6)
if _, err = io.ReadFull(r, p); err != nil {
return
}
c, err := headerChunkType(p[0])
if err != nil {
return
}
p = p[:headerLen(c)]
if _, err = io.ReadFull(r, p[1:]); err != nil {
return
}
h = new(chunkHeader)
if err = h.UnmarshalBinary(p); err != nil {
return nil, err
}
return h, nil
}
// uint16BE converts a big-endian uint16 representation to an uint16
// value.
func uint16BE(p []byte) uint16 {
return uint16(p[0])<<8 | uint16(p[1])
}
// putUint16BE puts the big-endian uint16 presentation into the given
// slice.
func putUint16BE(p []byte, x uint16) {
p[0] = byte(x >> 8)
p[1] = byte(x)
}
// chunkState is used to manage the state of the chunks
type chunkState byte
// start and stop define the initial and terminating state of the chunk
// state
const (
start chunkState = 'S'
stop chunkState = 'T'
)
// errors for the chunk state handling
var (
errChunkType = errors.New("lzma: unexpected chunk type")
errState = errors.New("lzma: wrong chunk state")
)
// next transitions state based on chunk type input
func (c *chunkState) next(ctype chunkType) error {
switch *c {
// start state
case 'S':
switch ctype {
case cEOS:
*c = 'T'
case cUD:
*c = 'R'
case cLRND:
*c = 'L'
default:
return errChunkType
}
// normal LZMA mode
case 'L':
switch ctype {
case cEOS:
*c = 'T'
case cUD:
*c = 'R'
case cU:
*c = 'U'
case cL, cLR, cLRN, cLRND:
break
default:
return errChunkType
}
// reset required
case 'R':
switch ctype {
case cEOS:
*c = 'T'
case cUD, cU:
break
case cLRN, cLRND:
*c = 'L'
default:
return errChunkType
}
// uncompressed
case 'U':
switch ctype {
case cEOS:
*c = 'T'
case cUD:
*c = 'R'
case cU:
break
case cL, cLR, cLRN, cLRND:
*c = 'L'
default:
return errChunkType
}
// terminal state
case 'T':
return errChunkType
default:
return errState
}
return nil
}
// defaultChunkType returns the default chunk type for each chunk state.
func (c chunkState) defaultChunkType() chunkType {
switch c {
case 'S':
return cLRND
case 'L', 'U':
return cL
case 'R':
return cLRN
default:
// no error
return cEOS
}
}
// maxDictCap defines the maximum dictionary capacity supported by the
// LZMA2 dictionary capacity encoding.
const maxDictCap = 1<<32 - 1
// maxDictCapCode defines the maximum dictionary capacity code.
const maxDictCapCode = 40
// The function decodes the dictionary capacity byte, but doesn't change
// for the correct range of the given byte.
func decodeDictCap(c byte) int64 {
return (2 | int64(c)&1) << (11 + (c>>1)&0x1f)
}
// DecodeDictCap decodes the encoded dictionary capacity. The function
// returns an error if the code is out of range.
func DecodeDictCap(c byte) (n int64, err error) {
if c >= maxDictCapCode {
if c == maxDictCapCode {
return maxDictCap, nil
}
return 0, errors.New("lzma: invalid dictionary size code")
}
return decodeDictCap(c), nil
}
// EncodeDictCap encodes a dictionary capacity. The function returns the
// code for the capacity that is greater or equal n. If n exceeds the
// maximum support dictionary capacity, the maximum value is returned.
func EncodeDictCap(n int64) byte {
a, b := byte(0), byte(40)
for a < b {
c := a + (b-a)>>1
m := decodeDictCap(c)
if n <= m {
if n == m {
return c
}
b = c
} else {
a = c + 1
}
}
return a
}

@ -0,0 +1,116 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import "errors"
// maxPosBits defines the number of bits of the position value that are used to
// to compute the posState value. The value is used to select the tree codec
// for length encoding and decoding.
const maxPosBits = 4
// minMatchLen and maxMatchLen give the minimum and maximum values for
// encoding and decoding length values. minMatchLen is also used as base
// for the encoded length values.
const (
minMatchLen = 2
maxMatchLen = minMatchLen + 16 + 256 - 1
)
// lengthCodec support the encoding of the length value.
type lengthCodec struct {
choice [2]prob
low [1 << maxPosBits]treeCodec
mid [1 << maxPosBits]treeCodec
high treeCodec
}
// deepcopy initializes the lc value as deep copy of the source value.
func (lc *lengthCodec) deepcopy(src *lengthCodec) {
if lc == src {
return
}
lc.choice = src.choice
for i := range lc.low {
lc.low[i].deepcopy(&src.low[i])
}
for i := range lc.mid {
lc.mid[i].deepcopy(&src.mid[i])
}
lc.high.deepcopy(&src.high)
}
// init initializes a new length codec.
func (lc *lengthCodec) init() {
for i := range lc.choice {
lc.choice[i] = probInit
}
for i := range lc.low {
lc.low[i] = makeTreeCodec(3)
}
for i := range lc.mid {
lc.mid[i] = makeTreeCodec(3)
}
lc.high = makeTreeCodec(8)
}
// Encode encodes the length offset. The length offset l can be compute by
// subtracting minMatchLen (2) from the actual length.
//
// l = length - minMatchLen
//
func (lc *lengthCodec) Encode(e *rangeEncoder, l uint32, posState uint32,
) (err error) {
if l > maxMatchLen-minMatchLen {
return errors.New("lengthCodec.Encode: l out of range")
}
if l < 8 {
if err = lc.choice[0].Encode(e, 0); err != nil {
return
}
return lc.low[posState].Encode(e, l)
}
if err = lc.choice[0].Encode(e, 1); err != nil {
return
}
if l < 16 {
if err = lc.choice[1].Encode(e, 0); err != nil {
return
}
return lc.mid[posState].Encode(e, l-8)
}
if err = lc.choice[1].Encode(e, 1); err != nil {
return
}
if err = lc.high.Encode(e, l-16); err != nil {
return
}
return nil
}
// Decode reads the length offset. Add minMatchLen to compute the actual length
// to the length offset l.
func (lc *lengthCodec) Decode(d *rangeDecoder, posState uint32,
) (l uint32, err error) {
var b uint32
if b, err = lc.choice[0].Decode(d); err != nil {
return
}
if b == 0 {
l, err = lc.low[posState].Decode(d)
return
}
if b, err = lc.choice[1].Decode(d); err != nil {
return
}
if b == 0 {
l, err = lc.mid[posState].Decode(d)
l += 8
return
}
l, err = lc.high.Decode(d)
l += 16
return
}

@ -0,0 +1,125 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
// literalCodec supports the encoding of literal. It provides 768 probability
// values per literal state. The upper 512 probabilities are used with the
// context of a match bit.
type literalCodec struct {
probs []prob
}
// deepcopy initializes literal codec c as a deep copy of the source.
func (c *literalCodec) deepcopy(src *literalCodec) {
if c == src {
return
}
c.probs = make([]prob, len(src.probs))
copy(c.probs, src.probs)
}
// init initializes the literal codec.
func (c *literalCodec) init(lc, lp int) {
switch {
case !(minLC <= lc && lc <= maxLC):
panic("lc out of range")
case !(minLP <= lp && lp <= maxLP):
panic("lp out of range")
}
c.probs = make([]prob, 0x300<<uint(lc+lp))
for i := range c.probs {
c.probs[i] = probInit
}
}
// Encode encodes the byte s using a range encoder as well as the current LZMA
// encoder state, a match byte and the literal state.
func (c *literalCodec) Encode(e *rangeEncoder, s byte,
state uint32, match byte, litState uint32,
) (err error) {
k := litState * 0x300
probs := c.probs[k : k+0x300]
symbol := uint32(1)
r := uint32(s)
if state >= 7 {
m := uint32(match)
for {
matchBit := (m >> 7) & 1
m <<= 1
bit := (r >> 7) & 1
r <<= 1
i := ((1 + matchBit) << 8) | symbol
if err = probs[i].Encode(e, bit); err != nil {
return
}
symbol = (symbol << 1) | bit
if matchBit != bit {
break
}
if symbol >= 0x100 {
break
}
}
}
for symbol < 0x100 {
bit := (r >> 7) & 1
r <<= 1
if err = probs[symbol].Encode(e, bit); err != nil {
return
}
symbol = (symbol << 1) | bit
}
return nil
}
// Decode decodes a literal byte using the range decoder as well as the LZMA
// state, a match byte, and the literal state.
func (c *literalCodec) Decode(d *rangeDecoder,
state uint32, match byte, litState uint32,
) (s byte, err error) {
k := litState * 0x300
probs := c.probs[k : k+0x300]
symbol := uint32(1)
if state >= 7 {
m := uint32(match)
for {
matchBit := (m >> 7) & 1
m <<= 1
i := ((1 + matchBit) << 8) | symbol
bit, err := d.DecodeBit(&probs[i])
if err != nil {
return 0, err
}
symbol = (symbol << 1) | bit
if matchBit != bit {
break
}
if symbol >= 0x100 {
break
}
}
}
for symbol < 0x100 {
bit, err := d.DecodeBit(&probs[symbol])
if err != nil {
return 0, err
}
symbol = (symbol << 1) | bit
}
s = byte(symbol - 0x100)
return s, nil
}
// minLC and maxLC define the range for LC values.
const (
minLC = 0
maxLC = 8
)
// minLC and maxLC define the range for LP values.
const (
minLP = 0
maxLP = 4
)

@ -0,0 +1,52 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import "errors"
// MatchAlgorithm identifies an algorithm to find matches in the
// dictionary.
type MatchAlgorithm byte
// Supported matcher algorithms.
const (
HashTable4 MatchAlgorithm = iota
BinaryTree
)
// maStrings are used by the String method.
var maStrings = map[MatchAlgorithm]string{
HashTable4: "HashTable4",
BinaryTree: "BinaryTree",
}
// String returns a string representation of the Matcher.
func (a MatchAlgorithm) String() string {
if s, ok := maStrings[a]; ok {
return s
}
return "unknown"
}
var errUnsupportedMatchAlgorithm = errors.New(
"lzma: unsupported match algorithm value")
// verify checks whether the matcher value is supported.
func (a MatchAlgorithm) verify() error {
if _, ok := maStrings[a]; !ok {
return errUnsupportedMatchAlgorithm
}
return nil
}
func (a MatchAlgorithm) new(dictCap int) (m matcher, err error) {
switch a {
case HashTable4:
return newHashTable(dictCap, 4)
case BinaryTree:
return newBinTree(dictCap)
}
return nil, errUnsupportedMatchAlgorithm
}

@ -0,0 +1,55 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"fmt"
"unicode"
)
// operation represents an operation on the dictionary during encoding or
// decoding.
type operation interface {
Len() int
}
// rep represents a repetition at the given distance and the given length
type match struct {
// supports all possible distance values, including the eos marker
distance int64
// length
n int
}
// Len returns the number of bytes matched.
func (m match) Len() int {
return m.n
}
// String returns a string representation for the repetition.
func (m match) String() string {
return fmt.Sprintf("M{%d,%d}", m.distance, m.n)
}
// lit represents a single byte literal.
type lit struct {
b byte
}
// Len returns 1 for the single byte literal.
func (l lit) Len() int {
return 1
}
// String returns a string representation for the literal.
func (l lit) String() string {
var c byte
if unicode.IsPrint(rune(l.b)) {
c = l.b
} else {
c = '.'
}
return fmt.Sprintf("L{%c/%02x}", c, l.b)
}

@ -0,0 +1,53 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
// movebits defines the number of bits used for the updates of probability
// values.
const movebits = 5
// probbits defines the number of bits of a probability value.
const probbits = 11
// probInit defines 0.5 as initial value for prob values.
const probInit prob = 1 << (probbits - 1)
// Type prob represents probabilities. The type can also be used to encode and
// decode single bits.
type prob uint16
// Dec decreases the probability. The decrease is proportional to the
// probability value.
func (p *prob) dec() {
*p -= *p >> movebits
}
// Inc increases the probability. The Increase is proportional to the
// difference of 1 and the probability value.
func (p *prob) inc() {
*p += ((1 << probbits) - *p) >> movebits
}
// Computes the new bound for a given range using the probability value.
func (p prob) bound(r uint32) uint32 {
return (r >> probbits) * uint32(p)
}
// Bits returns 1. One is the number of bits that can be encoded or decoded
// with a single prob value.
func (p prob) Bits() int {
return 1
}
// Encode encodes the least-significant bit of v. Note that the p value will be
// changed.
func (p *prob) Encode(e *rangeEncoder, v uint32) error {
return e.EncodeBit(v, p)
}
// Decode decodes a single bit. Note that the p value will change.
func (p *prob) Decode(d *rangeDecoder) (v uint32, err error) {
return d.DecodeBit(p)
}

@ -0,0 +1,69 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
)
// maximum and minimum values for the LZMA properties.
const (
minPB = 0
maxPB = 4
)
// maxPropertyCode is the possible maximum of a properties code byte.
const maxPropertyCode = (maxPB+1)*(maxLP+1)*(maxLC+1) - 1
// Properties contains the parameters LC, LP and PB. The parameter LC
// defines the number of literal context bits; parameter LP the number
// of literal position bits and PB the number of position bits.
type Properties struct {
LC int
LP int
PB int
}
// String returns the properties in a string representation.
func (p *Properties) String() string {
return fmt.Sprintf("LC %d LP %d PB %d", p.LC, p.LP, p.PB)
}
// PropertiesForCode converts a properties code byte into a Properties value.
func PropertiesForCode(code byte) (p Properties, err error) {
if code > maxPropertyCode {
return p, errors.New("lzma: invalid properties code")
}
p.LC = int(code % 9)
code /= 9
p.LP = int(code % 5)
code /= 5
p.PB = int(code % 5)
return p, err
}
// verify checks the properties for correctness.
func (p *Properties) verify() error {
if p == nil {
return errors.New("lzma: properties are nil")
}
if !(minLC <= p.LC && p.LC <= maxLC) {
return errors.New("lzma: lc out of range")
}
if !(minLP <= p.LP && p.LP <= maxLP) {
return errors.New("lzma: lp out of range")
}
if !(minPB <= p.PB && p.PB <= maxPB) {
return errors.New("lzma: pb out of range")
}
return nil
}
// Code converts the properties to a byte. The function assumes that
// the properties components are all in range.
func (p Properties) Code() byte {
return byte((p.PB*5+p.LP)*9 + p.LC)
}

@ -0,0 +1,222 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"io"
)
// rangeEncoder implements range encoding of single bits. The low value can
// overflow therefore we need uint64. The cache value is used to handle
// overflows.
type rangeEncoder struct {
lbw *LimitedByteWriter
nrange uint32
low uint64
cacheLen int64
cache byte
}
// maxInt64 provides the maximal value of the int64 type
const maxInt64 = 1<<63 - 1
// newRangeEncoder creates a new range encoder.
func newRangeEncoder(bw io.ByteWriter) (re *rangeEncoder, err error) {
lbw, ok := bw.(*LimitedByteWriter)
if !ok {
lbw = &LimitedByteWriter{BW: bw, N: maxInt64}
}
return &rangeEncoder{
lbw: lbw,
nrange: 0xffffffff,
cacheLen: 1}, nil
}
// Available returns the number of bytes that still can be written. The
// method takes the bytes that will be currently written by Close into
// account.
func (e *rangeEncoder) Available() int64 {
return e.lbw.N - (e.cacheLen + 4)
}
// writeByte writes a single byte to the underlying writer. An error is
// returned if the limit is reached. The written byte will be counted if
// the underlying writer doesn't return an error.
func (e *rangeEncoder) writeByte(c byte) error {
if e.Available() < 1 {
return ErrLimit
}
return e.lbw.WriteByte(c)
}
// DirectEncodeBit encodes the least-significant bit of b with probability 1/2.
func (e *rangeEncoder) DirectEncodeBit(b uint32) error {
e.nrange >>= 1
e.low += uint64(e.nrange) & (0 - (uint64(b) & 1))
// normalize
const top = 1 << 24
if e.nrange >= top {
return nil
}
e.nrange <<= 8
return e.shiftLow()
}
// EncodeBit encodes the least significant bit of b. The p value will be
// updated by the function depending on the bit encoded.
func (e *rangeEncoder) EncodeBit(b uint32, p *prob) error {
bound := p.bound(e.nrange)
if b&1 == 0 {
e.nrange = bound
p.inc()
} else {
e.low += uint64(bound)
e.nrange -= bound
p.dec()
}
// normalize
const top = 1 << 24
if e.nrange >= top {
return nil
}
e.nrange <<= 8
return e.shiftLow()
}
// Close writes a complete copy of the low value.
func (e *rangeEncoder) Close() error {
for i := 0; i < 5; i++ {
if err := e.shiftLow(); err != nil {
return err
}
}
return nil
}
// shiftLow shifts the low value for 8 bit. The shifted byte is written into
// the byte writer. The cache value is used to handle overflows.
func (e *rangeEncoder) shiftLow() error {
if uint32(e.low) < 0xff000000 || (e.low>>32) != 0 {
tmp := e.cache
for {
err := e.writeByte(tmp + byte(e.low>>32))
if err != nil {
return err
}
tmp = 0xff
e.cacheLen--
if e.cacheLen <= 0 {
if e.cacheLen < 0 {
panic("negative cacheLen")
}
break
}
}
e.cache = byte(uint32(e.low) >> 24)
}
e.cacheLen++
e.low = uint64(uint32(e.low) << 8)
return nil
}
// rangeDecoder decodes single bits of the range encoding stream.
type rangeDecoder struct {
br io.ByteReader
nrange uint32
code uint32
}
// newRangeDecoder initializes a range decoder. It reads five bytes from the
// reader and therefore may return an error.
func newRangeDecoder(br io.ByteReader) (d *rangeDecoder, err error) {
d = &rangeDecoder{br: br, nrange: 0xffffffff}
b, err := d.br.ReadByte()
if err != nil {
return nil, err
}
if b != 0 {
return nil, errors.New("newRangeDecoder: first byte not zero")
}
for i := 0; i < 4; i++ {
if err = d.updateCode(); err != nil {
return nil, err
}
}
if d.code >= d.nrange {
return nil, errors.New("newRangeDecoder: d.code >= d.nrange")
}
return d, nil
}
// possiblyAtEnd checks whether the decoder may be at the end of the stream.
func (d *rangeDecoder) possiblyAtEnd() bool {
return d.code == 0
}
// DirectDecodeBit decodes a bit with probability 1/2. The return value b will
// contain the bit at the least-significant position. All other bits will be
// zero.
func (d *rangeDecoder) DirectDecodeBit() (b uint32, err error) {
d.nrange >>= 1
d.code -= d.nrange
t := 0 - (d.code >> 31)
d.code += d.nrange & t
b = (t + 1) & 1
// d.code will stay less then d.nrange
// normalize
// assume d.code < d.nrange
const top = 1 << 24
if d.nrange >= top {
return b, nil
}
d.nrange <<= 8
// d.code < d.nrange will be maintained
return b, d.updateCode()
}
// decodeBit decodes a single bit. The bit will be returned at the
// least-significant position. All other bits will be zero. The probability
// value will be updated.
func (d *rangeDecoder) DecodeBit(p *prob) (b uint32, err error) {
bound := p.bound(d.nrange)
if d.code < bound {
d.nrange = bound
p.inc()
b = 0
} else {
d.code -= bound
d.nrange -= bound
p.dec()
b = 1
}
// normalize
// assume d.code < d.nrange
const top = 1 << 24
if d.nrange >= top {
return b, nil
}
d.nrange <<= 8
// d.code < d.nrange will be maintained
return b, d.updateCode()
}
// updateCode reads a new byte into the code.
func (d *rangeDecoder) updateCode() error {
b, err := d.br.ReadByte()
if err != nil {
return err
}
d.code = (d.code << 8) | uint32(b)
return nil
}

@ -0,0 +1,100 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package lzma supports the decoding and encoding of LZMA streams.
// Reader and Writer support the classic LZMA format. Reader2 and
// Writer2 support the decoding and encoding of LZMA2 streams.
//
// The package is written completely in Go and doesn't rely on any external
// library.
package lzma
import (
"errors"
"io"
)
// ReaderConfig stores the parameters for the reader of the classic LZMA
// format.
type ReaderConfig struct {
DictCap int
}
// fill converts the zero values of the configuration to the default values.
func (c *ReaderConfig) fill() {
if c.DictCap == 0 {
c.DictCap = 8 * 1024 * 1024
}
}
// Verify checks the reader configuration for errors. Zero values will
// be replaced by default values.
func (c *ReaderConfig) Verify() error {
c.fill()
if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
return errors.New("lzma: dictionary capacity is out of range")
}
return nil
}
// Reader provides a reader for LZMA files or streams.
type Reader struct {
lzma io.Reader
h header
d *decoder
}
// NewReader creates a new reader for an LZMA stream using the classic
// format. NewReader reads and checks the header of the LZMA stream.
func NewReader(lzma io.Reader) (r *Reader, err error) {
return ReaderConfig{}.NewReader(lzma)
}
// NewReader creates a new reader for an LZMA stream in the classic
// format. The function reads and verifies the the header of the LZMA
// stream.
func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) {
if err = c.Verify(); err != nil {
return nil, err
}
data := make([]byte, HeaderLen)
if _, err := io.ReadFull(lzma, data); err != nil {
if err == io.EOF {
return nil, errors.New("lzma: unexpected EOF")
}
return nil, err
}
r = &Reader{lzma: lzma}
if err = r.h.unmarshalBinary(data); err != nil {
return nil, err
}
if r.h.dictCap < MinDictCap {
return nil, errors.New("lzma: dictionary capacity too small")
}
dictCap := r.h.dictCap
if c.DictCap > dictCap {
dictCap = c.DictCap
}
state := newState(r.h.properties)
dict, err := newDecoderDict(dictCap)
if err != nil {
return nil, err
}
r.d, err = newDecoder(ByteReader(lzma), state, dict, r.h.size)
if err != nil {
return nil, err
}
return r, nil
}
// EOSMarker indicates that an EOS marker has been encountered.
func (r *Reader) EOSMarker() bool {
return r.d.eosMarker
}
// Read returns uncompressed data.
func (r *Reader) Read(p []byte) (n int, err error) {
return r.d.Read(p)
}

@ -0,0 +1,231 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"io"
"github.com/ulikunitz/xz/internal/xlog"
)
// Reader2Config stores the parameters for the LZMA2 reader.
// format.
type Reader2Config struct {
DictCap int
}
// fill converts the zero values of the configuration to the default values.
func (c *Reader2Config) fill() {
if c.DictCap == 0 {
c.DictCap = 8 * 1024 * 1024
}
}
// Verify checks the reader configuration for errors. Zero configuration values
// will be replaced by default values.
func (c *Reader2Config) Verify() error {
c.fill()
if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
return errors.New("lzma: dictionary capacity is out of range")
}
return nil
}
// Reader2 supports the reading of LZMA2 chunk sequences. Note that the
// first chunk should have a dictionary reset and the first compressed
// chunk a properties reset. The chunk sequence may not be terminated by
// an end-of-stream chunk.
type Reader2 struct {
r io.Reader
err error
dict *decoderDict
ur *uncompressedReader
decoder *decoder
chunkReader io.Reader
cstate chunkState
}
// NewReader2 creates a reader for an LZMA2 chunk sequence.
func NewReader2(lzma2 io.Reader) (r *Reader2, err error) {
return Reader2Config{}.NewReader2(lzma2)
}
// NewReader2 creates an LZMA2 reader using the given configuration.
func (c Reader2Config) NewReader2(lzma2 io.Reader) (r *Reader2, err error) {
if err = c.Verify(); err != nil {
return nil, err
}
r = &Reader2{r: lzma2, cstate: start}
r.dict, err = newDecoderDict(c.DictCap)
if err != nil {
return nil, err
}
if err = r.startChunk(); err != nil {
r.err = err
}
return r, nil
}
// uncompressed tests whether the chunk type specifies an uncompressed
// chunk.
func uncompressed(ctype chunkType) bool {
return ctype == cU || ctype == cUD
}
// startChunk parses a new chunk.
func (r *Reader2) startChunk() error {
r.chunkReader = nil
header, err := readChunkHeader(r.r)
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err
}
xlog.Debugf("chunk header %v", header)
if err = r.cstate.next(header.ctype); err != nil {
return err
}
if r.cstate == stop {
return io.EOF
}
if header.ctype == cUD || header.ctype == cLRND {
r.dict.Reset()
}
size := int64(header.uncompressed) + 1
if uncompressed(header.ctype) {
if r.ur != nil {
r.ur.Reopen(r.r, size)
} else {
r.ur = newUncompressedReader(r.r, r.dict, size)
}
r.chunkReader = r.ur
return nil
}
br := ByteReader(io.LimitReader(r.r, int64(header.compressed)+1))
if r.decoder == nil {
state := newState(header.props)
r.decoder, err = newDecoder(br, state, r.dict, size)
if err != nil {
return err
}
r.chunkReader = r.decoder
return nil
}
switch header.ctype {
case cLR:
r.decoder.State.Reset()
case cLRN, cLRND:
r.decoder.State = newState(header.props)
}
err = r.decoder.Reopen(br, size)
if err != nil {
return err
}
r.chunkReader = r.decoder
return nil
}
// Read reads data from the LZMA2 chunk sequence.
func (r *Reader2) Read(p []byte) (n int, err error) {
if r.err != nil {
return 0, r.err
}
for n < len(p) {
var k int
k, err = r.chunkReader.Read(p[n:])
n += k
if err != nil {
if err == io.EOF {
err = r.startChunk()
if err == nil {
continue
}
}
r.err = err
return n, err
}
if k == 0 {
r.err = errors.New("lzma: Reader2 doesn't get data")
return n, r.err
}
}
return n, nil
}
// EOS returns whether the LZMA2 stream has been terminated by an
// end-of-stream chunk.
func (r *Reader2) EOS() bool {
return r.cstate == stop
}
// uncompressedReader is used to read uncompressed chunks.
type uncompressedReader struct {
lr io.LimitedReader
Dict *decoderDict
eof bool
err error
}
// newUncompressedReader initializes a new uncompressedReader.
func newUncompressedReader(r io.Reader, dict *decoderDict, size int64) *uncompressedReader {
ur := &uncompressedReader{
lr: io.LimitedReader{R: r, N: size},
Dict: dict,
}
return ur
}
// Reopen reinitializes an uncompressed reader.
func (ur *uncompressedReader) Reopen(r io.Reader, size int64) {
ur.err = nil
ur.eof = false
ur.lr = io.LimitedReader{R: r, N: size}
}
// fill reads uncompressed data into the dictionary.
func (ur *uncompressedReader) fill() error {
if !ur.eof {
n, err := io.CopyN(ur.Dict, &ur.lr, int64(ur.Dict.Available()))
if err != io.EOF {
return err
}
ur.eof = true
if n > 0 {
return nil
}
}
if ur.lr.N != 0 {
return io.ErrUnexpectedEOF
}
return io.EOF
}
// Read reads uncompressed data from the limited reader.
func (ur *uncompressedReader) Read(p []byte) (n int, err error) {
if ur.err != nil {
return 0, ur.err
}
for {
var k int
k, err = ur.Dict.Read(p[n:])
n += k
if n >= len(p) {
return n, nil
}
if err != nil {
break
}
err = ur.fill()
if err != nil {
break
}
}
ur.err = err
return n, err
}

@ -0,0 +1,145 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
// states defines the overall state count
const states = 12
// State maintains the full state of the operation encoding or decoding
// process.
type state struct {
rep [4]uint32
isMatch [states << maxPosBits]prob
isRepG0Long [states << maxPosBits]prob
isRep [states]prob
isRepG0 [states]prob
isRepG1 [states]prob
isRepG2 [states]prob
litCodec literalCodec
lenCodec lengthCodec
repLenCodec lengthCodec
distCodec distCodec
state uint32
posBitMask uint32
Properties Properties
}
// initProbSlice initializes a slice of probabilities.
func initProbSlice(p []prob) {
for i := range p {
p[i] = probInit
}
}
// Reset sets all state information to the original values.
func (s *state) Reset() {
p := s.Properties
*s = state{
Properties: p,
// dict: s.dict,
posBitMask: (uint32(1) << uint(p.PB)) - 1,
}
initProbSlice(s.isMatch[:])
initProbSlice(s.isRep[:])
initProbSlice(s.isRepG0[:])
initProbSlice(s.isRepG1[:])
initProbSlice(s.isRepG2[:])
initProbSlice(s.isRepG0Long[:])
s.litCodec.init(p.LC, p.LP)
s.lenCodec.init()
s.repLenCodec.init()
s.distCodec.init()
}
// newState creates a new state from the give Properties.
func newState(p Properties) *state {
s := &state{Properties: p}
s.Reset()
return s
}
// deepcopy initializes s as a deep copy of the source.
func (s *state) deepcopy(src *state) {
if s == src {
return
}
s.rep = src.rep
s.isMatch = src.isMatch
s.isRepG0Long = src.isRepG0Long
s.isRep = src.isRep
s.isRepG0 = src.isRepG0
s.isRepG1 = src.isRepG1
s.isRepG2 = src.isRepG2
s.litCodec.deepcopy(&src.litCodec)
s.lenCodec.deepcopy(&src.lenCodec)
s.repLenCodec.deepcopy(&src.repLenCodec)
s.distCodec.deepcopy(&src.distCodec)
s.state = src.state
s.posBitMask = src.posBitMask
s.Properties = src.Properties
}
// cloneState creates a new clone of the give state.
func cloneState(src *state) *state {
s := new(state)
s.deepcopy(src)
return s
}
// updateStateLiteral updates the state for a literal.
func (s *state) updateStateLiteral() {
switch {
case s.state < 4:
s.state = 0
return
case s.state < 10:
s.state -= 3
return
}
s.state -= 6
}
// updateStateMatch updates the state for a match.
func (s *state) updateStateMatch() {
if s.state < 7 {
s.state = 7
} else {
s.state = 10
}
}
// updateStateRep updates the state for a repetition.
func (s *state) updateStateRep() {
if s.state < 7 {
s.state = 8
} else {
s.state = 11
}
}
// updateStateShortRep updates the state for a short repetition.
func (s *state) updateStateShortRep() {
if s.state < 7 {
s.state = 9
} else {
s.state = 11
}
}
// states computes the states of the operation codec.
func (s *state) states(dictHead int64) (state1, state2, posState uint32) {
state1 = s.state
posState = uint32(dictHead) & s.posBitMask
state2 = (s.state << maxPosBits) | posState
return
}
// litState computes the literal state.
func (s *state) litState(prev byte, dictHead int64) uint32 {
lp, lc := uint(s.Properties.LP), uint(s.Properties.LC)
litState := ((uint32(dictHead) & ((1 << lp) - 1)) << lc) |
(uint32(prev) >> (8 - lc))
return litState
}

@ -0,0 +1,133 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
// treeCodec encodes or decodes values with a fixed bit size. It is using a
// tree of probability value. The root of the tree is the most-significant bit.
type treeCodec struct {
probTree
}
// makeTreeCodec makes a tree codec. The bits value must be inside the range
// [1,32].
func makeTreeCodec(bits int) treeCodec {
return treeCodec{makeProbTree(bits)}
}
// deepcopy initializes tc as a deep copy of the source.
func (tc *treeCodec) deepcopy(src *treeCodec) {
tc.probTree.deepcopy(&src.probTree)
}
// Encode uses the range encoder to encode a fixed-bit-size value.
func (tc *treeCodec) Encode(e *rangeEncoder, v uint32) (err error) {
m := uint32(1)
for i := int(tc.bits) - 1; i >= 0; i-- {
b := (v >> uint(i)) & 1
if err := e.EncodeBit(b, &tc.probs[m]); err != nil {
return err
}
m = (m << 1) | b
}
return nil
}
// Decodes uses the range decoder to decode a fixed-bit-size value. Errors may
// be caused by the range decoder.
func (tc *treeCodec) Decode(d *rangeDecoder) (v uint32, err error) {
m := uint32(1)
for j := 0; j < int(tc.bits); j++ {
b, err := d.DecodeBit(&tc.probs[m])
if err != nil {
return 0, err
}
m = (m << 1) | b
}
return m - (1 << uint(tc.bits)), nil
}
// treeReverseCodec is another tree codec, where the least-significant bit is
// the start of the probability tree.
type treeReverseCodec struct {
probTree
}
// deepcopy initializes the treeReverseCodec as a deep copy of the
// source.
func (tc *treeReverseCodec) deepcopy(src *treeReverseCodec) {
tc.probTree.deepcopy(&src.probTree)
}
// makeTreeReverseCodec creates treeReverseCodec value. The bits argument must
// be in the range [1,32].
func makeTreeReverseCodec(bits int) treeReverseCodec {
return treeReverseCodec{makeProbTree(bits)}
}
// Encode uses range encoder to encode a fixed-bit-size value. The range
// encoder may cause errors.
func (tc *treeReverseCodec) Encode(v uint32, e *rangeEncoder) (err error) {
m := uint32(1)
for i := uint(0); i < uint(tc.bits); i++ {
b := (v >> i) & 1
if err := e.EncodeBit(b, &tc.probs[m]); err != nil {
return err
}
m = (m << 1) | b
}
return nil
}
// Decodes uses the range decoder to decode a fixed-bit-size value. Errors
// returned by the range decoder will be returned.
func (tc *treeReverseCodec) Decode(d *rangeDecoder) (v uint32, err error) {
m := uint32(1)
for j := uint(0); j < uint(tc.bits); j++ {
b, err := d.DecodeBit(&tc.probs[m])
if err != nil {
return 0, err
}
m = (m << 1) | b
v |= b << j
}
return v, nil
}
// probTree stores enough probability values to be used by the treeEncode and
// treeDecode methods of the range coder types.
type probTree struct {
probs []prob
bits byte
}
// deepcopy initializes the probTree value as a deep copy of the source.
func (t *probTree) deepcopy(src *probTree) {
if t == src {
return
}
t.probs = make([]prob, len(src.probs))
copy(t.probs, src.probs)
t.bits = src.bits
}
// makeProbTree initializes a probTree structure.
func makeProbTree(bits int) probTree {
if !(1 <= bits && bits <= 32) {
panic("bits outside of range [1,32]")
}
t := probTree{
bits: byte(bits),
probs: make([]prob, 1<<uint(bits)),
}
for i := range t.probs {
t.probs[i] = probInit
}
return t
}
// Bits provides the number of bits for the values to de- or encode.
func (t *probTree) Bits() int {
return int(t.bits)
}

@ -0,0 +1,209 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"bufio"
"errors"
"io"
)
// MinDictCap and MaxDictCap provide the range of supported dictionary
// capacities.
const (
MinDictCap = 1 << 12
MaxDictCap = 1<<32 - 1
)
// WriterConfig defines the configuration parameter for a writer.
type WriterConfig struct {
// Properties for the encoding. If the it is nil the value
// {LC: 3, LP: 0, PB: 2} will be chosen.
Properties *Properties
// The capacity of the dictionary. If DictCap is zero, the value
// 8 MiB will be chosen.
DictCap int
// Size of the lookahead buffer; value 0 indicates default size
// 4096
BufSize int
// Match algorithm
Matcher MatchAlgorithm
// SizeInHeader indicates that the header will contain an
// explicit size.
SizeInHeader bool
// Size of the data to be encoded. A positive value will imply
// than an explicit size will be set in the header.
Size int64
// EOSMarker requests whether the EOSMarker needs to be written.
// If no explicit size is been given the EOSMarker will be
// set automatically.
EOSMarker bool
}
// fill converts zero-value fields to their explicit default values.
func (c *WriterConfig) fill() {
if c.Properties == nil {
c.Properties = &Properties{LC: 3, LP: 0, PB: 2}
}
if c.DictCap == 0 {
c.DictCap = 8 * 1024 * 1024
}
if c.BufSize == 0 {
c.BufSize = 4096
}
if c.Size > 0 {
c.SizeInHeader = true
}
if !c.SizeInHeader {
c.EOSMarker = true
}
}
// Verify checks WriterConfig for errors. Verify will replace zero
// values with default values.
func (c *WriterConfig) Verify() error {
c.fill()
var err error
if c == nil {
return errors.New("lzma: WriterConfig is nil")
}
if c.Properties == nil {
return errors.New("lzma: WriterConfig has no Properties set")
}
if err = c.Properties.verify(); err != nil {
return err
}
if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
return errors.New("lzma: dictionary capacity is out of range")
}
if !(maxMatchLen <= c.BufSize) {
return errors.New("lzma: lookahead buffer size too small")
}
if c.SizeInHeader {
if c.Size < 0 {
return errors.New("lzma: negative size not supported")
}
} else if !c.EOSMarker {
return errors.New("lzma: EOS marker is required")
}
if err = c.Matcher.verify(); err != nil {
return err
}
return nil
}
// header returns the header structure for this configuration.
func (c *WriterConfig) header() header {
h := header{
properties: *c.Properties,
dictCap: c.DictCap,
size: -1,
}
if c.SizeInHeader {
h.size = c.Size
}
return h
}
// Writer writes an LZMA stream in the classic format.
type Writer struct {
h header
bw io.ByteWriter
buf *bufio.Writer
e *encoder
}
// NewWriter creates a new LZMA writer for the classic format. The
// method will write the header to the underlying stream.
func (c WriterConfig) NewWriter(lzma io.Writer) (w *Writer, err error) {
if err = c.Verify(); err != nil {
return nil, err
}
w = &Writer{h: c.header()}
var ok bool
w.bw, ok = lzma.(io.ByteWriter)
if !ok {
w.buf = bufio.NewWriter(lzma)
w.bw = w.buf
}
state := newState(w.h.properties)
m, err := c.Matcher.new(w.h.dictCap)
if err != nil {
return nil, err
}
dict, err := newEncoderDict(w.h.dictCap, c.BufSize, m)
if err != nil {
return nil, err
}
var flags encoderFlags
if c.EOSMarker {
flags = eosMarker
}
if w.e, err = newEncoder(w.bw, state, dict, flags); err != nil {
return nil, err
}
if err = w.writeHeader(); err != nil {
return nil, err
}
return w, nil
}
// NewWriter creates a new LZMA writer using the classic format. The
// function writes the header to the underlying stream.
func NewWriter(lzma io.Writer) (w *Writer, err error) {
return WriterConfig{}.NewWriter(lzma)
}
// writeHeader writes the LZMA header into the stream.
func (w *Writer) writeHeader() error {
data, err := w.h.marshalBinary()
if err != nil {
return err
}
_, err = w.bw.(io.Writer).Write(data)
return err
}
// Write puts data into the Writer.
func (w *Writer) Write(p []byte) (n int, err error) {
if w.h.size >= 0 {
m := w.h.size
m -= w.e.Compressed() + int64(w.e.dict.Buffered())
if m < 0 {
m = 0
}
if m < int64(len(p)) {
p = p[:m]
err = ErrNoSpace
}
}
var werr error
if n, werr = w.e.Write(p); werr != nil {
err = werr
}
return n, err
}
// Close closes the writer stream. It ensures that all data from the
// buffer will be compressed and the LZMA stream will be finished.
func (w *Writer) Close() error {
if w.h.size >= 0 {
n := w.e.Compressed() + int64(w.e.dict.Buffered())
if n != w.h.size {
return errSize
}
}
err := w.e.Close()
if w.buf != nil {
ferr := w.buf.Flush()
if err == nil {
err = ferr
}
}
return err
}

@ -0,0 +1,305 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"bytes"
"errors"
"io"
)
// Writer2Config is used to create a Writer2 using parameters.
type Writer2Config struct {
// The properties for the encoding. If the it is nil the value
// {LC: 3, LP: 0, PB: 2} will be chosen.
Properties *Properties
// The capacity of the dictionary. If DictCap is zero, the value
// 8 MiB will be chosen.
DictCap int
// Size of the lookahead buffer; value 0 indicates default size
// 4096
BufSize int
// Match algorithm
Matcher MatchAlgorithm
}
// fill replaces zero values with default values.
func (c *Writer2Config) fill() {
if c.Properties == nil {
c.Properties = &Properties{LC: 3, LP: 0, PB: 2}
}
if c.DictCap == 0 {
c.DictCap = 8 * 1024 * 1024
}
if c.BufSize == 0 {
c.BufSize = 4096
}
}
// Verify checks the Writer2Config for correctness. Zero values will be
// replaced by default values.
func (c *Writer2Config) Verify() error {
c.fill()
var err error
if c == nil {
return errors.New("lzma: WriterConfig is nil")
}
if c.Properties == nil {
return errors.New("lzma: WriterConfig has no Properties set")
}
if err = c.Properties.verify(); err != nil {
return err
}
if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
return errors.New("lzma: dictionary capacity is out of range")
}
if !(maxMatchLen <= c.BufSize) {
return errors.New("lzma: lookahead buffer size too small")
}
if c.Properties.LC+c.Properties.LP > 4 {
return errors.New("lzma: sum of lc and lp exceeds 4")
}
if err = c.Matcher.verify(); err != nil {
return err
}
return nil
}
// Writer2 supports the creation of an LZMA2 stream. But note that
// written data is buffered, so call Flush or Close to write data to the
// underlying writer. The Close method writes the end-of-stream marker
// to the stream. So you may be able to concatenate the output of two
// writers as long the output of the first writer has only been flushed
// but not closed.
//
// Any change to the fields Properties, DictCap must be done before the
// first call to Write, Flush or Close.
type Writer2 struct {
w io.Writer
start *state
encoder *encoder
cstate chunkState
ctype chunkType
buf bytes.Buffer
lbw LimitedByteWriter
}
// NewWriter2 creates an LZMA2 chunk sequence writer with the default
// parameters and options.
func NewWriter2(lzma2 io.Writer) (w *Writer2, err error) {
return Writer2Config{}.NewWriter2(lzma2)
}
// NewWriter2 creates a new LZMA2 writer using the given configuration.
func (c Writer2Config) NewWriter2(lzma2 io.Writer) (w *Writer2, err error) {
if err = c.Verify(); err != nil {
return nil, err
}
w = &Writer2{
w: lzma2,
start: newState(*c.Properties),
cstate: start,
ctype: start.defaultChunkType(),
}
w.buf.Grow(maxCompressed)
w.lbw = LimitedByteWriter{BW: &w.buf, N: maxCompressed}
m, err := c.Matcher.new(c.DictCap)
if err != nil {
return nil, err
}
d, err := newEncoderDict(c.DictCap, c.BufSize, m)
if err != nil {
return nil, err
}
w.encoder, err = newEncoder(&w.lbw, cloneState(w.start), d, 0)
if err != nil {
return nil, err
}
return w, nil
}
// written returns the number of bytes written to the current chunk
func (w *Writer2) written() int {
if w.encoder == nil {
return 0
}
return int(w.encoder.Compressed()) + w.encoder.dict.Buffered()
}
// errClosed indicates that the writer is closed.
var errClosed = errors.New("lzma: writer closed")
// Writes data to LZMA2 stream. Note that written data will be buffered.
// Use Flush or Close to ensure that data is written to the underlying
// writer.
func (w *Writer2) Write(p []byte) (n int, err error) {
if w.cstate == stop {
return 0, errClosed
}
for n < len(p) {
m := maxUncompressed - w.written()
if m <= 0 {
panic("lzma: maxUncompressed reached")
}
var q []byte
if n+m < len(p) {
q = p[n : n+m]
} else {
q = p[n:]
}
k, err := w.encoder.Write(q)
n += k
if err != nil && err != ErrLimit {
return n, err
}
if err == ErrLimit || k == m {
if err = w.flushChunk(); err != nil {
return n, err
}
}
}
return n, nil
}
// writeUncompressedChunk writes an uncompressed chunk to the LZMA2
// stream.
func (w *Writer2) writeUncompressedChunk() error {
u := w.encoder.Compressed()
if u <= 0 {
return errors.New("lzma: can't write empty uncompressed chunk")
}
if u > maxUncompressed {
panic("overrun of uncompressed data limit")
}
switch w.ctype {
case cLRND:
w.ctype = cUD
default:
w.ctype = cU
}
w.encoder.state = w.start
header := chunkHeader{
ctype: w.ctype,
uncompressed: uint32(u - 1),
}
hdata, err := header.MarshalBinary()
if err != nil {
return err
}
if _, err = w.w.Write(hdata); err != nil {
return err
}
_, err = w.encoder.dict.CopyN(w.w, int(u))
return err
}
// writeCompressedChunk writes a compressed chunk to the underlying
// writer.
func (w *Writer2) writeCompressedChunk() error {
if w.ctype == cU || w.ctype == cUD {
panic("chunk type uncompressed")
}
u := w.encoder.Compressed()
if u <= 0 {
return errors.New("writeCompressedChunk: empty chunk")
}
if u > maxUncompressed {
panic("overrun of uncompressed data limit")
}
c := w.buf.Len()
if c <= 0 {
panic("no compressed data")
}
if c > maxCompressed {
panic("overrun of compressed data limit")
}
header := chunkHeader{
ctype: w.ctype,
uncompressed: uint32(u - 1),
compressed: uint16(c - 1),
props: w.encoder.state.Properties,
}
hdata, err := header.MarshalBinary()
if err != nil {
return err
}
if _, err = w.w.Write(hdata); err != nil {
return err
}
_, err = io.Copy(w.w, &w.buf)
return err
}
// writes a single chunk to the underlying writer.
func (w *Writer2) writeChunk() error {
u := int(uncompressedHeaderLen + w.encoder.Compressed())
c := headerLen(w.ctype) + w.buf.Len()
if u < c {
return w.writeUncompressedChunk()
}
return w.writeCompressedChunk()
}
// flushChunk terminates the current chunk. The encoder will be reset
// to support the next chunk.
func (w *Writer2) flushChunk() error {
if w.written() == 0 {
return nil
}
var err error
if err = w.encoder.Close(); err != nil {
return err
}
if err = w.writeChunk(); err != nil {
return err
}
w.buf.Reset()
w.lbw.N = maxCompressed
if err = w.encoder.Reopen(&w.lbw); err != nil {
return err
}
if err = w.cstate.next(w.ctype); err != nil {
return err
}
w.ctype = w.cstate.defaultChunkType()
w.start = cloneState(w.encoder.state)
return nil
}
// Flush writes all buffered data out to the underlying stream. This
// could result in multiple chunks to be created.
func (w *Writer2) Flush() error {
if w.cstate == stop {
return errClosed
}
for w.written() > 0 {
if err := w.flushChunk(); err != nil {
return err
}
}
return nil
}
// Close terminates the LZMA2 stream with an EOS chunk.
func (w *Writer2) Close() error {
if w.cstate == stop {
return errClosed
}
if err := w.Flush(); err != nil {
return nil
}
// write zero byte EOS chunk
_, err := w.w.Write([]byte{0})
if err != nil {
return err
}
w.cstate = stop
return nil
}

@ -0,0 +1,17 @@
kind: pipeline
type: docker
name: clone
steps:
- name: Test
image: golang:1.18
commands:
- go env -w GO111MODULE=on
- go env -w GOPROXY=https://goproxy.cn,direct
- go test -v ./...
- name: Benchmark
image: golang:1.18
commands:
- go env -w GO111MODULE=on
- go env -w GOPROXY=https://goproxy.cn,direct
- go test -bench=. -benchmem

@ -0,0 +1,9 @@
.env
.git
.svn
.idea
.vscode
*.log
goinit.sh
gomod.sh
/vendor/

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 李光春
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,17 @@
<h1>
<a href="https://www.dtapp.net/">Golang Random</a>
</h1>
📦 Golang Random
[comment]: <> (go)
[![godoc](https://pkg.go.dev/badge/go.dtapp.net/gorandom?status.svg)](https://pkg.go.dev/go.dtapp.net/gorandom)
[![goproxy.cn](https://goproxy.cn/stats/go.dtapp.net/gorandom/badges/download-count.svg)](https://goproxy.cn/stats/go.dtapp.net/gorandom)
[![goreportcard.com](https://goreportcard.com/badge/go.dtapp.net/gorandom)](https://goreportcard.com/report/go.dtapp.net/gorandom)
[![deps.dev](https://img.shields.io/badge/deps-go-red.svg)](https://deps.dev/go/go.dtapp.net%2Fgorandom)
#### 安装
```go
go get -v -u go.dtapp.net/gorandom
```

@ -0,0 +1,50 @@
package gorandom
import (
"math/rand"
"time"
)
const numbers string = "0123456789"
const letters string = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
const specials = "~!@#$%^*()_+-=[]{}|;:,./<>?"
const alphanumerics string = letters + numbers
const ascii string = alphanumerics + specials
func random[T int | int64](n T, chars string) string {
if n <= 0 {
return ""
}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
bytes := make([]byte, n, n)
l := len(chars)
var i T = 0
for {
if i >= n {
break
}
bytes[i] = chars[r.Intn(l)]
i++
}
return string(bytes)
}
// Alphanumeric 随机字母数字
func Alphanumeric[T int | int64](n T) string {
return random(n, alphanumerics)
}
// Alphabetic 随机字母
func Alphabetic[T int | int64](n T) string {
return random(n, letters)
}
// Numeric 随机数字
func Numeric[T int | int64](n T) string {
return random(n, numbers)
}
// Ascii 随机ASCII
func Ascii[T int | int64](n T) string {
return random(n, ascii)
}

@ -0,0 +1,3 @@
package gorandom
const Version = "1.0.1"

@ -0,0 +1,17 @@
kind: pipeline
type: docker
name: clone
steps:
- name: Test
image: golang:1.18
commands:
- go env -w GO111MODULE=on
- go env -w GOPROXY=https://goproxy.cn,direct
- go test -v ./...
- name: Benchmark
image: golang:1.18
commands:
- go env -w GO111MODULE=on
- go env -w GOPROXY=https://goproxy.cn,direct
- go test -bench=. -benchmem

@ -0,0 +1,8 @@
.env
.git
.svn
.idea
.vscode
*.log
gomod.sh
/vendor/

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 李光春
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,27 @@
<h1>
<a href="https://www.dtapp.net/">Golang Request</a>
</h1>
📦 Golang Request
[comment]: <> (go)
[![godoc](https://pkg.go.dev/badge/go.dtapp.net/gorequest?status.svg)](https://pkg.go.dev/go.dtapp.net/gorequest)
[![goproxy.cn](https://goproxy.cn/stats/go.dtapp.net/gorequest/badges/download-count.svg)](https://goproxy.cn/stats/go.dtapp.net/gorequest)
[![goreportcard.com](https://goreportcard.com/badge/go.dtapp.net/gorequest)](https://goreportcard.com/report/go.dtapp.net/gorequest)
[![deps.dev](https://img.shields.io/badge/deps-go-red.svg)](https://deps.dev/go/go.dtapp.net%2Fgorequest)
#### 安装
```go
go get -v -u go.dtapp.net/gorequest
```
#### 使用
```go
package main
func main() {
}
```

@ -0,0 +1,14 @@
package gorequest
const (
userAgentFormat = "%s/GO/%s"
)
// 定义请求类型
var (
httpParamsModeJson = "JSON"
httpParamsModeXml = "XML"
httpParamsModeForm = "FORM"
)
const Version = "1.0.24"

@ -0,0 +1,58 @@
package gorequest
import (
"net/url"
)
// Headers 头部信息
type Headers map[string]string
// NewHeaders 新建头部信息
func NewHeaders() Headers {
P := make(Headers)
return P
}
// NewNewHeadersWith 头部信息使用
func NewNewHeadersWith(headers ...Headers) Headers {
p := make(Headers)
for _, v := range headers {
p.SetHeaders(v)
}
return p
}
// Set 设置头部信息
func (p Headers) Set(key, value string) {
p[key] = value
}
// SetHeaders 批量设置头部信息
func (p Headers) SetHeaders(headers Headers) {
for key, value := range headers {
p[key] = value
}
}
// GetQuery 获取头部信息
func (p Headers) GetQuery() string {
u := url.Values{}
for k, v := range p {
u.Set(k, v)
}
return u.Encode()
}
// DeepCopy 深度复制
func (p *Headers) DeepCopy() map[string]string {
targetMap := make(map[string]string)
// 从原始复制到目标
for key, value := range *p {
targetMap[key] = value
}
// 重新申请一个新的map
*p = map[string]string{}
return targetMap
}

@ -0,0 +1,292 @@
package gorequest
import (
"bytes"
"crypto/tls"
"encoding/json"
"errors"
"fmt"
"go.dtapp.net/gotime"
"io"
"io/ioutil"
"net/http"
"net/url"
"runtime"
"strings"
"time"
)
// Response 返回内容
type Response struct {
RequestUri string //【请求】链接
RequestParams Params //【请求】参数
RequestMethod string //【请求】方式
RequestHeader Headers //【请求】头部
RequestTime time.Time //【请求】时间
ResponseHeader http.Header //【返回】头部
ResponseStatus string //【返回】状态
ResponseStatusCode int //【返回】状态码
ResponseBody []byte //【返回】内容
ResponseContentLength int64 //【返回】大小
ResponseTime time.Time //【返回】时间
}
// App 实例
type App struct {
Uri string // 全局请求地址没有设置url才会使用
Error error // 错误
httpUri string // 请求地址
httpMethod string // 请求方法
httpHeader Headers // 请求头
httpParams Params // 请求参数
responseContent Response // 返回内容
httpContentType string // 请求内容类型
debug bool // 是否开启调试模式
p12Cert *tls.Certificate // p12证书内容
}
// NewHttp 实例化
func NewHttp() *App {
return &App{
httpHeader: NewHeaders(),
httpParams: NewParams(),
}
}
// SetDebug 设置调试模式
func (app *App) SetDebug() {
app.debug = true
}
// SetUri 设置请求地址
func (app *App) SetUri(uri string) {
app.httpUri = uri
}
// SetMethod 设置请求方式
func (app *App) SetMethod(method string) {
app.httpMethod = method
}
// SetHeader 设置请求头
func (app *App) SetHeader(key, value string) {
if key == "" {
panic("url is empty")
}
app.httpHeader.Set(key, value)
}
// SetHeaders 批量设置请求头
func (app *App) SetHeaders(headers Headers) {
for key, value := range headers {
app.httpHeader.Set(key, value)
}
}
// SetAuthToken 设置身份验证令牌
func (app *App) SetAuthToken(token string) {
app.httpHeader.Set("Authorization", fmt.Sprintf("Bearer %s", token))
}
// SetUserAgent 设置用户代理,空字符串就随机设置
func (app *App) SetUserAgent(ua string) {
if ua == "" {
ua = GetRandomUserAgent()
}
app.httpHeader.Set("User-Agent", ua)
}
// SetContentTypeJson 设置JSON格式
func (app *App) SetContentTypeJson() {
app.httpContentType = httpParamsModeJson
}
// SetContentTypeForm 设置FORM格式
func (app *App) SetContentTypeForm() {
app.httpContentType = httpParamsModeForm
}
// SetContentTypeXml 设置XML格式
func (app *App) SetContentTypeXml() {
app.httpContentType = httpParamsModeXml
}
// SetParam 设置请求参数
func (app *App) SetParam(key string, value interface{}) {
if key == "" {
panic("url is empty")
}
app.httpParams.Set(key, value)
}
// SetParams 批量设置请求参数
func (app *App) SetParams(params Params) {
for key, value := range params {
app.httpParams.Set(key, value)
}
}
// SetP12Cert 设置证书
func (app *App) SetP12Cert(content *tls.Certificate) {
app.p12Cert = content
}
// Get 发起GET请求
func (app *App) Get(uri ...string) (httpResponse Response, err error) {
if len(uri) == 1 {
app.Uri = uri[0]
}
// 设置请求方法
app.httpMethod = http.MethodGet
return request(app)
}
// Post 发起POST请求
func (app *App) Post(uri ...string) (httpResponse Response, err error) {
if len(uri) == 1 {
app.Uri = uri[0]
}
// 设置请求方法
app.httpMethod = http.MethodPost
return request(app)
}
// Request 发起请求
func (app *App) Request() (httpResponse Response, err error) {
return request(app)
}
// 请求接口
func request(app *App) (httpResponse Response, err error) {
// 赋值
httpResponse.RequestTime = gotime.Current().Time
httpResponse.RequestUri = app.httpUri
httpResponse.RequestMethod = app.httpMethod
httpResponse.RequestParams = app.httpParams.DeepCopy()
httpResponse.RequestHeader = app.httpHeader.DeepCopy()
// 判断网址
if httpResponse.RequestUri == "" {
httpResponse.RequestUri = app.Uri
}
if httpResponse.RequestUri == "" {
app.Error = errors.New("没有设置Uri")
return httpResponse, app.Error
}
// 创建 http 客户端
client := &http.Client{}
if app.p12Cert != nil {
transport := &http.Transport{
TLSClientConfig: &tls.Config{
Certificates: []tls.Certificate{*app.p12Cert},
},
DisableCompression: true,
}
client = &http.Client{
Transport: transport,
}
}
httpResponse.RequestHeader.Set("Sdk-User-Agent", fmt.Sprintf(userAgentFormat, runtime.GOOS, runtime.Version()))
switch app.httpContentType {
case httpParamsModeJson:
httpResponse.RequestHeader.Set("Content-Type", "application/json")
case httpParamsModeForm:
httpResponse.RequestHeader.Set("Content-Type", "application/x-www-form-urlencoded")
case httpParamsModeXml:
httpResponse.RequestHeader.Set("Content-Type", "text/xml")
}
// 请求内容
var reqBody io.Reader
if httpResponse.RequestMethod == http.MethodPost && app.httpContentType == httpParamsModeJson {
jsonStr, err := json.Marshal(httpResponse.RequestParams)
if err != nil {
app.Error = errors.New(fmt.Sprintf("解析出错 %s", err))
return httpResponse, app.Error
}
// 赋值
reqBody = bytes.NewBuffer(jsonStr)
}
if httpResponse.RequestMethod == http.MethodPost && app.httpContentType == httpParamsModeForm {
// 携带 form 参数
form := url.Values{}
if len(httpResponse.RequestParams) > 0 {
for k, v := range httpResponse.RequestParams {
form.Add(k, GetParamsString(v))
}
}
// 赋值
reqBody = strings.NewReader(form.Encode())
}
if app.httpContentType == httpParamsModeXml {
reqBody, err = httpResponse.RequestParams.MarshalXML()
if err != nil {
app.Error = errors.New(fmt.Sprintf("解析XML出错 %s", err))
return httpResponse, app.Error
}
}
// 创建请求
req, err := http.NewRequest(httpResponse.RequestMethod, httpResponse.RequestUri, reqBody)
if err != nil {
app.Error = errors.New(fmt.Sprintf("创建请求出错 %s", err))
return httpResponse, app.Error
}
// GET 请求携带查询参数
if httpResponse.RequestMethod == http.MethodGet {
if len(httpResponse.RequestParams) > 0 {
q := req.URL.Query()
for k, v := range httpResponse.RequestParams {
q.Add(k, GetParamsString(v))
}
req.URL.RawQuery = q.Encode()
}
}
// 设置请求头
if len(httpResponse.RequestHeader) > 0 {
for key, value := range httpResponse.RequestHeader {
req.Header.Set(key, value)
}
}
// 发送请求
resp, err := client.Do(req)
if err != nil {
app.Error = errors.New(fmt.Sprintf("请求出错 %s", err))
return httpResponse, app.Error
}
// 最后关闭连接
defer resp.Body.Close()
// 读取内容
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
app.Error = errors.New(fmt.Sprintf("解析内容出错 %s", err))
return httpResponse, app.Error
}
// 赋值
httpResponse.ResponseTime = gotime.Current().Time
httpResponse.ResponseStatus = resp.Status
httpResponse.ResponseStatusCode = resp.StatusCode
httpResponse.ResponseHeader = resp.Header
httpResponse.ResponseBody = body
httpResponse.ResponseContentLength = resp.ContentLength
if app.debug == true {
fmt.Printf("gorequest%+v\n", httpResponse)
fmt.Printf("gorequest.body%s\n", httpResponse.ResponseBody)
}
return httpResponse, err
}

@ -0,0 +1,50 @@
package gorequest
import (
"net"
"net/http"
"strings"
)
// ClientIp 尽最大努力实现获取客户端 IP 的算法。
// 解析 X-Real-IP 和 X-Forwarded-For 以便于反向代理nginx 或 haproxy可以正常工作。
func ClientIp(r *http.Request) string {
// CloudFlare
CfConnectingIp := strings.TrimSpace(r.Header.Get("Cf-Connecting-Ip"))
if CfConnectingIp != "" {
return CfConnectingIp
}
// 转发IP
xForwardedFor := strings.TrimSpace(strings.Split(r.Header.Get("X-Forwarded-For"), ",")[0])
if xForwardedFor != "" {
return xForwardedFor
}
// 真实Ip
XRealIp := strings.TrimSpace(r.Header.Get("X-Real-Ip"))
if XRealIp != "" {
return XRealIp
}
// HTTP客户端IP
HttpClientIp := strings.TrimSpace(strings.Split(r.Header.Get("HTTP_CLIENT_IP"), ",")[0])
if HttpClientIp != "" {
return HttpClientIp
}
// HTTP转发IP
HttpXForwardedFor := strings.TrimSpace(strings.Split(r.Header.Get("HTTP_X_FORWARDED_FOR"), ",")[0])
if HttpXForwardedFor != "" {
return HttpXForwardedFor
}
// 系统
ip, _, err := net.SplitHostPort(strings.TrimSpace(r.RemoteAddr))
if err == nil {
return ip
}
return ""
}

@ -0,0 +1,68 @@
package gorequest
import (
"encoding/json"
"go.dtapp.net/gostring"
"log"
)
// Params 参数
type Params map[string]interface{}
// NewParams 新建参数
func NewParams() Params {
P := make(Params)
return P
}
// NewParamsWith 参数使用
func NewParamsWith(params ...Params) Params {
p := make(Params)
for _, v := range params {
p.SetParams(v)
}
return p
}
// Set 设置参数
func (p Params) Set(key string, value interface{}) {
p[key] = value
}
// SetParams 批量设置参数
func (p Params) SetParams(params Params) {
for key, value := range params {
p[key] = value
}
}
// GetParamsString 获取参数字符串
func GetParamsString(src interface{}) string {
switch src.(type) {
case string:
return src.(string)
case int, int8, int32, int64:
case uint8, uint16, uint32, uint64:
case float32, float64:
return gostring.ToString(src)
}
data, err := json.Marshal(src)
if err != nil {
log.Fatal(err)
}
return string(data)
}
// DeepCopy 深度复制
func (p *Params) DeepCopy() map[string]interface{} {
targetMap := make(map[string]interface{})
// 从原始复制到目标
for key, value := range *p {
targetMap[key] = value
}
// 重新申请一个新的map
*p = map[string]interface{}{}
return targetMap
}

@ -0,0 +1,47 @@
package gorequest
import (
"bytes"
"encoding/xml"
"fmt"
"io"
)
// MarshalXML 结构体转xml
func (p Params) MarshalXML() (reader io.Reader, err error) {
buffer := bytes.NewBuffer(make([]byte, 0))
if _, err = io.WriteString(buffer, "<xml>"); err != nil {
return
}
for k, v := range p {
switch {
case k == "detail":
if _, err = io.WriteString(buffer, "<detail><![CDATA["); err != nil {
return
}
if _, err = io.WriteString(buffer, fmt.Sprintf("%v", v)); err != nil {
return
}
if _, err = io.WriteString(buffer, "]]></detail>"); err != nil {
return
}
default:
if _, err = io.WriteString(buffer, "<"+k+">"); err != nil {
return
}
if err = xml.EscapeText(buffer, []byte(fmt.Sprintf("%v", v))); err != nil {
return
}
if _, err = io.WriteString(buffer, "</"+k+">"); err != nil {
return
}
}
}
if _, err = io.WriteString(buffer, "</xml>"); err != nil {
return
}
return buffer, nil
}

@ -0,0 +1,70 @@
package gorequest
import (
"net/url"
"strings"
)
// ResponseUrlParse 返回参数
type ResponseUrlParse struct {
Uri string `json:"uri"` // URI
Urn string `json:"urn"` // URN
Url string `json:"url"` // URL
Scheme string `json:"scheme"` // 协议
Host string `json:"host"` // 主机
Hostname string `json:"hostname"` // 主机名
Port string `json:"port"` // 端口
Path string `json:"path"` // 路径
RawQuery string `json:"raw_query"` // 参数 ?
Fragment string `json:"fragment"` // 片段 #
}
// UriParse 解析URl
func UriParse(input string) (resp ResponseUrlParse) {
parse, err := url.Parse(input)
if err != nil {
return
}
resp.Uri = input
resp.Urn = parse.Host + parse.Path
resp.Url = parse.Scheme + "://" + parse.Host + parse.Path
resp.Scheme = parse.Scheme
resp.Host = parse.Host
resp.Hostname = parse.Hostname()
resp.Port = parse.Port()
resp.Path = parse.Path
resp.RawQuery = parse.RawQuery
resp.Fragment = parse.Fragment
return
}
// UriFilterExcludeQueryString 过滤掉url中的参数
func UriFilterExcludeQueryString(uri string) string {
URL, _ := url.Parse(uri)
clearUri := strings.ReplaceAll(uri, URL.RawQuery, "")
clearUri = strings.TrimRight(clearUri, "?")
return strings.TrimRight(clearUri, "/")
}
// LenCode 编码
func LenCode(s string) string {
escape := url.QueryEscape(s)
return escape
}
// DeCode 解码
func DeCode(s string) string {
unescape, _ := url.QueryUnescape(s)
return unescape
}
// ParseQuery 获取URL参数 https://studygolang.com/articles/2876
func ParseQuery(s string) map[string][]string {
u, err := url.Parse(s)
if err != nil {
return nil
}
urlParam := u.RawQuery
m, _ := url.ParseQuery(urlParam)
return m
}

@ -0,0 +1,73 @@
package gorequest
import (
"fmt"
"math/rand"
"runtime"
"time"
)
// GetRandomUserAgent 获取随机UA
func GetRandomUserAgent() string {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
return userAgentList[r.Intn(len(userAgentList))]
}
var userAgentList = []string{
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36", // Chrome 2022-02-14
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0", // Firefox 2022-02-14
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", // IE 2022-02-14
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.80 Safari/537.36 HeyTapBrowser/40.7.35.1", // Chrome 2022-02-14
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36 OnePlusBrowser/30.5.0.8", // Chrome 2022-02-14
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9a1) Gecko/20060814 Firefox/51.0", // Firefox 2022-02-14
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Safari/605.1.15", // Safari 2022-02-14
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15", // Safari 2022-04-18
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36 Edg/98.0.1108.51", // Edge 2022-02-14
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.44", // Chrome 2022-04-18
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36", // Chrome 2022-02-14
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36", // Chrome 2022-04-18
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:97.0) Gecko/20100101 Firefox/97.0", // Firefox 2022-02-14
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:99.0) Gecko/20100101 Firefox/99.0", // Firefox 2022-04-18
}
// GetRandomUserAgentSystem 获取系统随机UA
func GetRandomUserAgentSystem() string {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
switch runtime.GOOS {
case "linux":
return userAgentListLinux[r.Intn(len(userAgentListLinux))]
case "windows":
return userAgentListWindows[r.Intn(len(userAgentListWindows))]
default:
return userAgentListMac[r.Intn(len(userAgentListMac))]
}
}
var userAgentListWindows = []string{
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36", // Chrome 2022-02-14
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0", // Firefox 2022-02-14
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko", // IE 2022-02-14
}
var userAgentListLinux = []string{
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.80 Safari/537.36 HeyTapBrowser/40.7.35.1", // Chrome 2022-02-14
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Safari/537.36 OnePlusBrowser/30.5.0.8", // Chrome 2022-02-14
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9a1) Gecko/20060814 Firefox/51.0", // Firefox 2022-02-14
}
var userAgentListMac = []string{
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Safari/605.1.15", // Safari 2022-02-14
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15", // Safari 2022-04-18
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.82 Safari/537.36 Edg/98.0.1108.51", // Edge 2022-02-14
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.44", // Chrome 2022-04-18
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36", // Chrome 2022-02-14
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36", // Chrome 2022-04-18
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:97.0) Gecko/20100101 Firefox/97.0", // Firefox 2022-02-14
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:99.0) Gecko/20100101 Firefox/99.0", // Firefox 2022-04-18
}
func DtaUa() string {
str := runtime.Version()
content := str[2 : len(str)-0]
return fmt.Sprintf("Go-dta-request/%s/%v", Version, content)
}

@ -0,0 +1,17 @@
kind: pipeline
type: docker
name: clone
steps:
- name: Test
image: golang:1.18
commands:
- go env -w GO111MODULE=on
- go env -w GOPROXY=https://goproxy.cn,direct
- go test -v ./...
- name: Benchmark
image: golang:1.18
commands:
- go env -w GO111MODULE=on
- go env -w GOPROXY=https://goproxy.cn,direct
- go test -bench=. -benchmem

@ -0,0 +1,8 @@
.env
.git
.svn
.idea
.vscode
*.log
gomod.sh
/vendor/

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 李光春
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,17 @@
<h1>
<a href="https://www.dtapp.net/">Golang String</a>
</h1>
📦 Golang String
[comment]: <> (go)
[![godoc](https://pkg.go.dev/badge/go.dtapp.net/gostring?status.svg)](https://pkg.go.dev/go.dtapp.net/gostring)
[![goproxy.cn](https://goproxy.cn/stats/go.dtapp.net/gostring/badges/download-count.svg)](https://goproxy.cn/stats/go.dtapp.net/gostring)
[![goreportcard.com](https://goreportcard.com/badge/go.dtapp.net/gostring)](https://goreportcard.com/report/go.dtapp.net/gostring)
[![deps.dev](https://img.shields.io/badge/deps-go-red.svg)](https://deps.dev/go/go.dtapp.net%2Fgostring)
#### 安装
```go
go get -v -u go.dtapp.net/gostring
```

@ -0,0 +1,203 @@
package gostring
import (
"errors"
"fmt"
"go.dtapp.net/gorandom"
"go.dtapp.net/gotime"
)
// GenerateId 生成18一编号
func GenerateId(customId string) string {
currentTime := gotime.Current().Format()
one, err := generateIdOne(customId, currentTime)
if err == nil {
return one
}
two, err := generateIdTwo(customId, currentTime)
if err == nil {
return two
}
three, err := generateIdThree(customId, currentTime)
if err == nil {
return three
}
four, err := generateIdFour(customId, currentTime)
if err == nil {
return four
}
five, err := generateIdFive(customId, currentTime)
if err == nil {
return five
}
six, err := generateIdSix(customId)
return six
}
// GenerateIdAndTime 生成18一编号
func GenerateIdAndTime(customId, customTime string) string {
one, err := generateIdOne(customId, customTime)
if err == nil {
return one
}
two, err := generateIdTwo(customId, customTime)
if err == nil {
return two
}
three, err := generateIdThree(customId, customTime)
if err == nil {
return three
}
four, err := generateIdFour(customId, customTime)
if err == nil {
return four
}
five, err := generateIdFive(customId, customTime)
if err == nil {
return five
}
six, err := generateIdSix(customId)
return six
}
// 生成18位时间[年月日时分]唯一编号
func generateIdOne(customId, setTime string) (string, error) {
var (
newRandomLength = 0 // 随机数据长度
customIdLength = len(customId) // 自定义长度
)
const (
randomLength = 5 // 随机数据长度
dataLength = 18 // 默认数据长度
dateFormat = "200601021504" // 时间格式
dateFormatLength = len(dateFormat) // 时间格式长度
)
// 重新计算随机数据长度
newRandomLength = dataLength - (dateFormatLength + customIdLength)
if (dateFormatLength+customIdLength > dataLength) || (dateFormatLength+customIdLength == dataLength) || (newRandomLength < randomLength) {
return "", errors.New("没有满足条件")
}
return fmt.Sprintf("%v%s%s", customId, gotime.SetCurrentParse(setTime).SetFormat(dateFormat), gorandom.Numeric(newRandomLength)), nil
}
// 生成18位时间[年月日时]唯一编号
func generateIdTwo(customId, setTime string) (string, error) {
var (
newRandomLength = 0 // 随机数据长度
customIdLength = len(customId) // 自定义长度
)
const (
randomLength = 5 // 随机数据长度
dataLength = 18 // 默认数据长度
dateFormat = "2006010215" // 时间格式
dateFormatLength = len(dateFormat) // 时间格式长度
)
// 重新计算随机数据长度
newRandomLength = dataLength - (dateFormatLength + customIdLength)
if (dateFormatLength+customIdLength > dataLength) || (dateFormatLength+customIdLength == dataLength) || (newRandomLength < randomLength) {
return "", errors.New("没有满足条件")
}
return fmt.Sprintf("%v%s%s", customId, gotime.SetCurrentParse(setTime).SetFormat(dateFormat), gorandom.Numeric(newRandomLength)), nil
}
// 生成18位时间[年月日]唯一编号
func generateIdThree(customId, setTime string) (string, error) {
var (
newRandomLength = 0 // 随机数据长度
customIdLength = len(customId) // 自定义长度
)
const (
randomLength = 5 // 随机数据长度
dataLength = 18 // 默认数据长度
dateFormat = "20060102" // 时间格式
dateFormatLength = len(dateFormat) // 时间格式长度
)
// 重新计算随机数据长度
newRandomLength = dataLength - (dateFormatLength + customIdLength)
if (dateFormatLength+customIdLength > dataLength) || (dateFormatLength+customIdLength == dataLength) || (newRandomLength < randomLength) {
return "", errors.New("没有满足条件")
}
return fmt.Sprintf("%v%s%s", customId, gotime.SetCurrentParse(setTime).SetFormat(dateFormat), gorandom.Numeric(newRandomLength)), nil
}
// 生成18位时间[年月]唯一编号
func generateIdFour(customId, setTime string) (string, error) {
var (
newRandomLength = 0 // 随机数据长度
customIdLength = len(customId) // 自定义长度
)
const (
randomLength = 5 // 随机数据长度
dataLength = 18 // 默认数据长度
dateFormat = "200601" // 时间格式
dateFormatLength = len(dateFormat) // 时间格式长度
)
// 重新计算随机数据长度
newRandomLength = dataLength - (dateFormatLength + customIdLength)
if (dateFormatLength+customIdLength > dataLength) || (dateFormatLength+customIdLength == dataLength) || (newRandomLength < randomLength) {
return "", errors.New("没有满足条件")
}
return fmt.Sprintf("%v%s%s", customId, gotime.SetCurrentParse(setTime).SetFormat(dateFormat), gorandom.Numeric(newRandomLength)), nil
}
// 生成18位时间[年]唯一编号
func generateIdFive(customId, setTime string) (string, error) {
var (
newRandomLength = 0 // 随机数据长度
customIdLength = len(customId) // 自定义长度
)
const (
randomLength = 5 // 随机数据长度`
dataLength = 18 // 默认数据长度
dateFormat = "2006" // 时间格式
dateFormatLength = len(dateFormat) // 时间格式长度
)
// 重新计算随机数据长度
newRandomLength = dataLength - (dateFormatLength + customIdLength)
if (dateFormatLength+customIdLength > dataLength) || (dateFormatLength+customIdLength == dataLength) || (newRandomLength < randomLength) {
return "", errors.New("没有满足条件")
}
return fmt.Sprintf("%v%s%s", customId, gotime.SetCurrentParse(setTime).SetFormat(dateFormat), gorandom.Numeric(newRandomLength)), nil
}
// 生成18位随机唯一编号
func generateIdSix(customId string) (string, error) {
var (
newRandomLength = 0 // 随机数据长度
customIdLength = len(customId) // 自定义长度
)
const (
randomLength = 5 // 随机数据长度
dataLength = 18 // 默认数据长度
)
// 重新计算随机数据长度
newRandomLength = dataLength - customIdLength
if (customIdLength > dataLength) || (customIdLength == dataLength) || (newRandomLength < randomLength) {
return "", errors.New("没有满足条件")
}
return fmt.Sprintf("%v%s", customId, gorandom.Numeric(newRandomLength)), nil
}

@ -0,0 +1,8 @@
package gostring
type Only struct {
}
func NewOnly() *Only {
return &Only{}
}

@ -0,0 +1,152 @@
package gostring
import (
"crypto/hmac"
"crypto/sha256"
"encoding/hex"
"fmt"
"strconv"
"strings"
"unicode/utf8"
)
// ToString 转换成string
func ToString(value interface{}) string {
if value == nil {
return ""
}
return fmt.Sprint(value)
}
// ToFloat64 string到float64
func ToFloat64(s string) float64 {
i, _ := strconv.ParseFloat(s, 64)
return i
}
// ToInt string到int
func ToInt(s string) int {
i, _ := strconv.Atoi(s)
return i
}
// ToInt64 string到int64
func ToInt64(s string) int64 {
i, err := strconv.ParseInt(s, 10, 64)
if err == nil {
return i
}
return int64(ToFloat64(s))
}
// ToUint string到uint64
func ToUint(s string) uint {
i, err := strconv.ParseUint(s, 10, 64)
if err == nil {
return uint(i)
}
return 0
}
// ToUint64 string到uint64
func ToUint64(s string) uint64 {
i, err := strconv.ParseUint(s, 10, 64)
if err == nil {
return i
}
return 0
}
// Replace 字符串替换
func Replace(str, old, new string) string {
return strings.Replace(str, old, new, -1)
}
func HmacSha256Hex(key, strToSign string) string {
hasHer := hmac.New(sha256.New, []byte(key))
hasHer.Write([]byte(strToSign))
return hex.EncodeToString(hasHer.Sum(nil))
}
// Space 去除空格
func Space(str string) string {
return strings.Replace(str, " ", "", -1)
}
// LineBreak 去除换行符
func LineBreak(str string) string {
return strings.Replace(str, "\n", "", -1)
}
// SpaceAndLineBreak 去除空格和去除换行符
func SpaceAndLineBreak(str string) string {
return LineBreak(Space(str))
}
// TrimLastChar 删除字符串中的最后一个
func TrimLastChar(s string) string {
r, size := utf8.DecodeLastRuneInString(s)
if r == utf8.RuneError && (size == 0 || size == 1) {
size = 0
}
return s[:len(s)-size]
}
// Split 字符串分隔
func Split(s string, sep string) []string {
if len(s) <= 0 {
return []string{}
}
return strings.Split(s, sep)
}
// Contains 判断字符串是否包含某个字符
func Contains(s, sep string) bool {
return strings.Contains(s, sep)
}
func NumericalToString(value interface{}) (string, bool) {
var val string
switch value.(type) {
default:
return "0", false
case int:
intVal, _ := value.(int)
val = strconv.FormatInt(int64(intVal), 10)
case int8:
intVal, _ := value.(int8)
val = strconv.FormatInt(int64(intVal), 10)
case int16:
intVal, _ := value.(int16)
val = strconv.FormatInt(int64(intVal), 10)
case int32:
intVal, _ := value.(int32)
val = strconv.FormatInt(int64(intVal), 10)
case int64:
intVal, _ := value.(int64)
val = strconv.FormatInt(int64(intVal), 10)
case uint:
intVal, _ := value.(uint)
val = strconv.FormatUint(uint64(intVal), 10)
case uint8:
intVal, _ := value.(uint8)
val = strconv.FormatUint(uint64(intVal), 10)
case uint16:
intVal, _ := value.(uint16)
val = strconv.FormatUint(uint64(intVal), 10)
case uint32:
intVal, _ := value.(uint32)
val = strconv.FormatUint(uint64(intVal), 10)
case uint64:
intVal, _ := value.(uint64)
val = strconv.FormatUint(intVal, 10)
case float32:
floatVal, _ := value.(float32)
val = strconv.FormatFloat(float64(floatVal), 'f', -1, 32)
case float64:
floatVal, _ := value.(float64)
val = strconv.FormatFloat(floatVal, 'f', -1, 64)
}
return val, true
}

@ -0,0 +1,18 @@
package gostring
import (
"crypto/rand"
"fmt"
"time"
)
// GetUuId 由 32 个十六进制数字组成,以 6 个组显示,由连字符 - 分隔
func GetUuId() string {
unix32bits := uint32(time.Now().UTC().Unix())
buff := make([]byte, 12)
numRead, err := rand.Read(buff)
if numRead != len(buff) || err != nil {
return ""
}
return fmt.Sprintf("%x-%x-%x-%x-%x-%x", unix32bits, buff[0:2], buff[2:4], buff[4:6], buff[6:8], buff[8:])
}

@ -0,0 +1,3 @@
package gostring
const Version = "1.0.6"

@ -0,0 +1,17 @@
kind: pipeline
type: docker
name: clone
steps:
- name: Test
image: golang:1.18
commands:
- go env -w GO111MODULE=on
- go env -w GOPROXY=https://goproxy.cn,direct
- go test -v ./...
- name: Benchmark
image: golang:1.18
commands:
- go env -w GO111MODULE=on
- go env -w GOPROXY=https://goproxy.cn,direct
- go test -bench=. -benchmem

@ -0,0 +1,7 @@
.env
.git
.svn
.idea
.vscode
.log
gomod.sh

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 李光春
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,33 @@
<h1>
<a href="https://www.dtapp.net/">Golang Time</a>
</h1>
📦 Golang 时间
[comment]: <> (go)
[![godoc](https://pkg.go.dev/badge/go.dtapp.net/gotime?status.svg)](https://pkg.go.dev/go.dtapp.net/gotime)
[![goproxy.cn](https://goproxy.cn/stats/go.dtapp.net/gotime/badges/download-count.svg)](https://goproxy.cn/stats/go.dtapp.net/gotime)
[![goreportcard.com](https://goreportcard.com/badge/go.dtapp.net/gotime)](https://goreportcard.com/report/go.dtapp.net/gotime)
[![deps.dev](https://img.shields.io/badge/deps-go-red.svg)](https://deps.dev/go/go.dtapp.net%2Fgotime)
#### 安装
```go
go get -v -u go.dtapp.net/gotime
```
#### 使用
```go
package main
import (
"go.dtapp.net/gotime"
"testing"
)
// TestVerification 验证字符串是否为时间
func TestVerification(t *testing.T) {
t.Log(gotime.Verification("2022-02-05 00:00:00", gotime.DateTimeFormat))
}
```

@ -0,0 +1,65 @@
package gotime
import "time"
// Gt 是否大于
func (p Pro) Gt(t2 time.Time) bool {
return p.Time.After(t2)
}
// Lt 是否小于
func (p Pro) Lt(t2 time.Time) bool {
return p.Time.Before(t2)
}
// Eq 是否等于
func (p Pro) Eq(t2 time.Time) bool {
return p.Time.Equal(t2)
}
// Ne 是否不等于
func (p Pro) Ne(t2 time.Time) bool {
return !p.Eq(t2)
}
// Gte 是否大于等于
func (p Pro) Gte(t2 time.Time) bool {
return p.Gt(t2) || p.Eq(t2)
}
// Lte 是否小于等于
func (p Pro) Lte(t2 time.Time) bool {
return p.Lt(t2) || p.Eq(t2)
}
// Between 是否在两个时间之间(不包括这两个时间)
func (p Pro) Between(start time.Time, end time.Time) bool {
if p.Gt(start) && p.Lt(end) {
return true
}
return false
}
// BetweenIncludedStart 是否在两个时间之间(包括开始时间)
func (p Pro) BetweenIncludedStart(start time.Time, end time.Time) bool {
if p.Gte(start) && p.Lt(end) {
return true
}
return false
}
// BetweenIncludedEnd 是否在两个时间之间(包括结束时间)
func (p Pro) BetweenIncludedEnd(start time.Time, end time.Time) bool {
if p.Gt(start) && p.Lte(end) {
return true
}
return false
}
// BetweenIncludedBoth 是否在两个时间之间(包括这两个时间)
func (p Pro) BetweenIncludedBoth(start time.Time, end time.Time) bool {
if p.Gte(start) && p.Lte(end) {
return true
}
return false
}

@ -0,0 +1,69 @@
package gotime
import "time"
// DiffInHour 相差多少小时
func (p Pro) DiffInHour(t2 time.Time) (hour int64) {
t2.Before(p.Time)
diff := p.Time.Unix() - t2.Unix()
hour = diff / 3600
return hour
}
// DiffInHourWithAbs 相差多少小时(绝对值)
func (p Pro) DiffInHourWithAbs(t2 time.Time) (hour int64) {
p.Time.Before(t2)
diff := t2.Unix() - p.Time.Unix()
hour = diff / 3600
if hour > 0 {
return hour
}
t2.Before(p.Time)
diff = p.Time.Unix() - t2.Unix()
hour = diff / 3600
return hour
}
// DiffInMinutes 相差多少分钟
func (p Pro) DiffInMinutes(t2 time.Time) (hour int64) {
t2.Before(p.Time)
diff := p.Time.Unix() - t2.Unix()
hour = diff / 60
return hour
}
// DiffInMinutesWithAbs 相差多少分钟(绝对值)
func (p Pro) DiffInMinutesWithAbs(t2 time.Time) (hour int64) {
p.Time.Before(t2)
diff := t2.Unix() - p.Time.Unix()
hour = diff / 60
if hour > 0 {
return hour
}
t2.Before(p.Time)
diff = p.Time.Unix() - t2.Unix()
hour = diff / 60
return hour
}
// DiffInSecond 相差多少秒
func (p Pro) DiffInSecond(t2 time.Time) (hour int64) {
t2.Before(p.Time)
diff := p.Time.Unix() - t2.Unix()
hour = diff
return hour
}
// DiffInSecondWithAbs 相差多少秒(绝对值)
func (p Pro) DiffInSecondWithAbs(t2 time.Time) (hour int64) {
p.Time.Before(t2)
diff := t2.Unix() - p.Time.Unix()
hour = diff
if hour > 0 {
return hour
}
t2.Before(p.Time)
diff = p.Time.Unix() - t2.Unix()
hour = diff
return hour
}

@ -0,0 +1,8 @@
package gotime
import "fmt"
// invalidTimezoneError 无效的时区错误
var invalidTimezoneError = func(timezone string) error {
return fmt.Errorf("invalid timezone %q, please see the file %q for all valid timezones", timezone, "$GOROOT/lib/time/zoneinfo.zip")
}

@ -0,0 +1,105 @@
package gotime
import (
"fmt"
"time"
)
// 时间格式化常量
const (
RFC3339Format = time.RFC3339
Iso8601Format = "2006-01-02T15:04:05-07:00"
CookieFormat = "Monday, 02-Jan-2006 15:04:05 MST"
RFC1036Format = "Mon, 02 Jan 06 15:04:05 -0700"
RFC7231Format = "Mon, 02 Jan 2006 15:04:05 GMT"
DayDateTimeFormat = "Mon, Jan 2, 2006 3:04 PM"
DateTimeFormat = "2006-01-02 15:04:05"
DateFormat = "2006-01-02"
TimeFormat = "15:04:05"
ShortDateTimeFormat = "20060102150405"
ShortDateFormat = "20060102"
ShortTimeFormat = "150405"
)
// Pro 结构体
type Pro struct {
Time time.Time
loc *time.Location
Error error
}
// NewPro 初始化结构体
func NewPro() Pro {
return Pro{
Time: time.Now(),
}
}
// BeforeSeconds 获取n秒前的时间
func (p Pro) BeforeSeconds(seconds int64) Pro {
st, _ := time.ParseDuration(fmt.Sprintf("-%ds", seconds))
p.Time = p.Time.Add(st)
return p
}
// AfterSeconds 获取n秒后的时间
func (p Pro) AfterSeconds(seconds int64) Pro {
st, _ := time.ParseDuration(fmt.Sprintf("+%ds", seconds))
p.Time = p.Time.Add(st)
return p
}
// BeforeMinute 获取n分钟前的时间
func (p Pro) BeforeMinute(seconds int64) Pro {
st, _ := time.ParseDuration(fmt.Sprintf("-%dm", seconds))
p.Time = p.Time.Add(st)
return p
}
// AfterMinute 获取n分钟后的时间
func (p Pro) AfterMinute(seconds int64) Pro {
st, _ := time.ParseDuration(fmt.Sprintf("+%dm", seconds))
p.Time = p.Time.Add(st)
return p
}
// BeforeHour 获取n小时前的时间
func (p Pro) BeforeHour(hour int64) Pro {
st, _ := time.ParseDuration(fmt.Sprintf("-%dh", hour))
p.Time = p.Time.Add(st)
return p
}
// AfterHour 获取n小时后的时间
func (p Pro) AfterHour(hour int64) Pro {
st, _ := time.ParseDuration(fmt.Sprintf("+%dh", hour))
p.Time = p.Time.Add(st)
return p
}
// BeforeDay 获取n天前的时间
func (p Pro) BeforeDay(day int) Pro {
p.Time = p.Time.AddDate(0, 0, -day)
return p
}
// AfterDay 获取n天后的时间
func (p Pro) AfterDay(day int) Pro {
p.Time = p.Time.AddDate(0, 0, day)
return p
}
// SetFormat 格式化
func (p Pro) SetFormat(layout string) string {
return p.Time.Format(layout)
}
// Month 获取当前月
func (p Pro) Month() int64 {
return p.MonthOfYear()
}
// MonthOfYear 获取本年的第几月
func (p Pro) MonthOfYear() int64 {
return int64(p.Time.In(p.loc).Month())
}

@ -0,0 +1,14 @@
package gotime
import (
"time"
)
// 通过时区获取 Location 实例
func getLocationByTimezone(timezone string) (*time.Location, error) {
loc, err := time.LoadLocation(timezone)
if err != nil {
err = invalidTimezoneError(timezone)
}
return loc, err
}

@ -0,0 +1,6 @@
package gotime
// Bson mongoDB
func (p Pro) Bson() string {
return p.Now().String()
}

@ -0,0 +1,124 @@
package gotime
import "time"
// 数字常量
const (
YearsPerMillennium = 1000 // 每千年1000年
YearsPerCentury = 100 // 每世纪100年
YearsPerDecade = 10 // 每十年10年
QuartersPerYear = 4 // 每年4季度
MonthsPerYear = 12 // 每年12月
MonthsPerQuarter = 3 // 每季度3月
WeeksPerNormalYear = 52 // 每常规年52周
weeksPerLongYear = 53 // 每长年53周
WeeksPerMonth = 4 // 每月4周
DaysPerLeapYear = 366 // 每闰年366天
DaysPerNormalYear = 365 // 每常规年365天
DaysPerWeek = 7 // 每周7天
HoursPerWeek = 168 // 每周168小时
HoursPerDay = 24 // 每天24小时
MinutesPerDay = 1440 // 每天1440分钟
MinutesPerHour = 60 // 每小时60分钟
SecondsPerWeek = 604800 // 每周604800秒
SecondsPerDay = 86400 // 每天86400秒
SecondsPerHour = 3600 // 每小时3600秒
SecondsPerMinute = 60 // 每分钟60秒
MillisecondsPerSecond = 1000 // 每秒1000毫秒
MicrosecondsPerMillisecond = 1000 // 每毫秒1000微秒
MicrosecondsPerSecond = 1000000 // 每秒1000000微秒
)
// StartOfCentury 本世纪开始时间
func (p Pro) StartOfCentury() Pro {
p.Time = time.Date(p.Time.Year()/YearsPerCentury*YearsPerCentury, 1, 1, 0, 0, 0, 0, p.Time.Location())
return p
}
// EndOfCentury 本世纪结束时间
func (p Pro) EndOfCentury() Pro {
p.Time = time.Date(p.Time.Year()/YearsPerCentury*YearsPerCentury+99, 12, 31, 23, 59, 59, 999999999, p.Time.Location())
return p
}
// StartOfDecade 本年代开始时间
func (p Pro) StartOfDecade() Pro {
p.Time = time.Date(p.Time.Year()/YearsPerDecade*YearsPerDecade, 1, 1, 0, 0, 0, 0, p.Time.Location())
return p
}
// EndOfDecade 本年代结束时间
func (p Pro) EndOfDecade() Pro {
p.Time = time.Date(p.Time.Year()/YearsPerDecade*YearsPerDecade+9, 12, 31, 23, 59, 59, 999999999, p.Time.Location())
return p
}
// StartOfYear 本年开始时间
func (p Pro) StartOfYear() Pro {
p.Time = time.Date(p.Time.Year(), 1, 1, 0, 0, 0, 0, p.Time.Location())
return p
}
// EndOfYear 本年结束时间
func (p Pro) EndOfYear() Pro {
p.Time = time.Date(p.Time.Year(), 12, 31, 23, 59, 59, 999999999, p.Time.Location())
return p
}
// Quarter 获取当前季度
func (p Pro) Quarter() (quarter int) {
switch {
case p.Time.Month() >= 10:
quarter = 4
case p.Time.Month() >= 7:
quarter = 3
case p.Time.Month() >= 4:
quarter = 2
case p.Time.Month() >= 1:
quarter = 1
}
return
}
// StartOfQuarter 本季度开始时间
func (p Pro) StartOfQuarter() Pro {
p.Time = time.Date(p.Time.Year(), time.Month(3*p.Quarter()-2), 1, 0, 0, 0, 0, p.Time.Location())
return p
}
// EndOfQuarter 本季度结束时间
func (p Pro) EndOfQuarter() Pro {
quarter, day := p.Quarter(), 30
switch quarter {
case 1, 4:
day = 31
case 2, 3:
day = 30
}
p.Time = time.Date(p.Time.Year(), time.Month(3*quarter), day, 23, 59, 59, 999999999, p.Time.Location())
return p
}
// StartOfMonth 本月开始时间
func (p Pro) StartOfMonth() Pro {
p.Time = time.Date(p.Time.Year(), time.Month(p.Month()), 1, 0, 0, 0, 0, p.Time.Location())
return p
}
// EndOfMonth 本月结束时间
func (p Pro) EndOfMonth() Pro {
p.Time = time.Date(p.Time.Year(), time.Month(p.Month()), 1, 23, 59, 59, 999999999, p.Time.Location())
return p
}
// StartOfDay 本日开始时间
func (p Pro) StartOfDay() Pro {
p.Time = time.Date(p.Time.Year(), p.Time.Month(), p.Time.Day(), 0, 0, 0, 0, p.Time.Location())
return p
}
// EndOfDay 本日结束时间
func (p Pro) EndOfDay() Pro {
p.Time = time.Date(p.Time.Year(), p.Time.Month(), p.Time.Day(), 23, 59, 59, 0, p.Time.Location())
return p
}

112
vendor/go.dtapp.net/gotime/today.go generated vendored

@ -0,0 +1,112 @@
package gotime
import (
"log"
"strconv"
"strings"
"time"
)
// Current 获取当前的时间
func Current() Pro {
p := NewPro()
p.loc, p.Error = time.LoadLocation("Asia/Shanghai")
if p.Error != nil {
// Docker部署golang应用时时区问题 https://www.ddhigh.com/2018/03/01/golang-docker-timezone.html
log.Printf("【gotime】时区错误%v\n", p.Error)
p.Time = time.Now().Add(time.Hour * 8)
} else {
p.Time = time.Now().In(p.loc)
}
return p
}
// SetCurrent 设置当前的时间
func SetCurrent(sTime time.Time) Pro {
p := NewPro()
p.Time = sTime
return p
}
// SetCurrentParse 设置当前的时间
func SetCurrentParse(str string) Pro {
p := NewPro()
p.loc, p.Error = time.LoadLocation("Asia/Shanghai")
layout := DateTimeFormat
if str == "" || str == "0" || str == "0000-00-00 00:00:00" || str == "0000-00-00" || str == "00:00:00" {
return p
}
if len(str) == 10 && strings.Count(str, "-") == 2 {
layout = DateFormat
}
if strings.Index(str, "T") == 10 {
layout = RFC3339Format
}
if _, err := strconv.ParseInt(str, 10, 64); err == nil {
switch len(str) {
case 8:
layout = ShortDateFormat
case 14:
layout = ShortDateTimeFormat
}
}
location, _ := time.ParseInLocation(layout, str, p.loc)
p.Time = location
return p
}
// SetCurrentUnix 设置当前的时间 Unix时间戳
func SetCurrentUnix(ts int64) Pro {
p := NewPro()
p.Time = time.Unix(ts, 0)
return p
}
// Now 今天此刻
func (p Pro) Now() time.Time {
return p.Time
}
// Format 今天此刻格式化
func (p Pro) Format() string {
return p.Time.Format(DateTimeFormat)
}
// ToDateFormat 今天此刻日期
func (p Pro) ToDateFormat() string {
return p.Time.Format(DateFormat)
}
// ToTimeFormat 今天此刻时间
func (p Pro) ToTimeFormat() string {
return p.Time.Format(TimeFormat)
}
// Timestamp 今天此刻时间戳
func (p Pro) Timestamp() int64 {
return p.Time.Unix()
}
// TimestampWithSecond 今天此刻时间戳
func (p Pro) TimestampWithSecond() int64 {
return p.Time.Unix()
}
// TimestampWithMillisecond 今天毫秒级时间戳
func (p Pro) TimestampWithMillisecond() int64 {
return p.Time.UnixNano() / int64(time.Millisecond)
}
// TimestampWithMicrosecond 今天微秒级时间戳
func (p Pro) TimestampWithMicrosecond() int64 {
return p.Time.UnixNano() / int64(time.Microsecond)
}
// TimestampWithNanosecond 今天纳秒级时间戳
func (p Pro) TimestampWithNanosecond() int64 {
return p.Time.UnixNano()
}

@ -0,0 +1,15 @@
package gotime
import "time"
// Tomorrow 明天
func Tomorrow() Pro {
p := NewPro()
location, err := time.LoadLocation("Asia/Shanghai")
if err != nil {
p.Time = time.Now().Add(time.Hour*8).AddDate(0, 0, +1)
} else {
p.Time = time.Now().In(location).AddDate(0, 0, +1)
}
return p
}

@ -0,0 +1,16 @@
package gotime
import "time"
// Verification 验证字符串是否为时间
func Verification(str, layout string) (resp time.Time, err error) {
loc, err := time.LoadLocation("Asia/Shanghai")
if err != nil {
return time.Time{}, err
}
location, err := time.ParseInLocation(layout, str, loc)
if err != nil {
return time.Time{}, err
}
return location, nil
}

@ -0,0 +1,3 @@
package gotime
const Version = "1.0.5"

@ -0,0 +1,17 @@
package gotime
import (
"time"
)
// Yesterday 昨天
func Yesterday() Pro {
p := NewPro()
location, err := time.LoadLocation("Asia/Shanghai")
if err != nil {
p.Time = time.Now().Add(time.Hour*8).AddDate(0, 0, -1)
} else {
p.Time = time.Now().In(location).AddDate(0, 0, -1)
}
return p
}

3
vendor/golang.org/x/text/AUTHORS generated vendored

@ -0,0 +1,3 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.

@ -0,0 +1,3 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save