master v1.0.0
李光春 2 years ago
commit 795000a48e

8
.gitignore vendored

@ -0,0 +1,8 @@
.env
.git
.svn
.idea
.vscode
*.log
gitmod.sh
*_test.go

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 茂名聚合科技有限公司
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,25 @@
<h1>
<a href="https://www.dtapp.net/">Golang Ip</a>
</h1>
📦 Golang Ip库组件
[comment]: <> (go)
[![godoc](https://pkg.go.dev/badge/github.com/dtapps/goip?status.svg)](https://pkg.go.dev/github.com/dtapps/goip)
[![goproxy.cn](https://goproxy.cn/stats/github.com/dtapps/goip/badges/download-count.svg)](https://goproxy.cn/stats/github.com/dtapps/goip)
[![goreportcard.com](https://goreportcard.com/badge/github.com/dtapps/goip)](https://goreportcard.com/report/github.com/dtapps/goip)
[![deps.dev](https://img.shields.io/badge/deps-go-red.svg)](https://deps.dev/go/github.com%2Fdtapps%2Fgoip)
#### 安装使用
```go
go get -v -u github.com/dtapps/goip
```
#### 导入
```go
import (
"github.com/dtapps/goip"
)
```

@ -0,0 +1,89 @@
package goip
import (
"strconv"
)
var (
ipv4 = "IPV4"
ipv6 = "IPV6"
)
type AnalyseResult struct {
IP string `json:"ip,omitempty"` // 输入的ip地址
Country string `json:"country,omitempty"` // 国家或地区
Province string `json:"province,omitempty"` // 省份
City string `json:"city,omitempty"` // 城市
Area string `json:"area,omitempty"` // 区域
Isp string `json:"isp,omitempty"` // 运营商
}
func (app *App) Analyse(item string) AnalyseResult {
isIp := app.isIpv4OrIpv6(item)
switch isIp {
case ipv4:
info := app.V4db.Find(item)
search, err := app.V4Region.MemorySearch(item)
if err != nil {
return AnalyseResult{
IP: info.IP,
Country: info.Country,
Area: info.Area,
}
} else {
return AnalyseResult{
IP: search.IP,
Country: search.Country,
Province: search.Province,
City: search.City,
Isp: info.Area,
}
}
case ipv6:
info := app.V6db.Find(item)
return AnalyseResult{
IP: info.IP,
Country: info.Country,
Province: info.Province,
City: info.City,
Area: info.Area,
Isp: info.Isp,
}
default:
return AnalyseResult{}
}
}
// CheckIpv4 检查数据是不是IPV4
func (app *App) CheckIpv4(ips string) bool {
if len(ips) > 3 {
return false
}
nums, err := strconv.Atoi(ips)
if err != nil {
return false
}
if nums < 0 || nums > 255 {
return false
}
if len(ips) > 1 && ips[0] == '0' {
return false
}
return true
}
// CheckIpv6 检测是不是IPV6
func (app *App) CheckIpv6(ips string) bool {
if ips == "" {
return true
}
if len(ips) > 4 {
return false
}
for _, val := range ips {
if !((val >= '0' && val <= '9') || (val >= 'a' && val <= 'f') || (val >= 'A' && val <= 'F')) {
return false
}
}
return true
}

@ -0,0 +1,60 @@
package goip
import (
"github.com/dtapps/goip/ip2region"
v4 "github.com/dtapps/goip/v4"
v6 "github.com/dtapps/goip/v6"
"log"
"strings"
)
type App struct {
V4Region ip2region.Ip2Region // IPV4
V4db v4.Pointer // IPV4
V6db v6.Pointer // IPV6
}
func (app *App) InitIPData() {
v4Num := app.V4db.InitIPV4Data()
log.Printf("IPV4 库加载完成 共加载:%d 条 IP 记录\n", v4Num)
v6Num := app.V6db.InitIPV4Data()
log.Printf("IPV6 库加载完成 共加载:%d 条 IP 记录\n", v6Num)
}
func (app *App) Ipv4(ip string) (res v4.Result, resInfo ip2region.IpInfo) {
res = app.V4db.Find(ip)
resInfo, _ = app.V4Region.MemorySearch(ip)
return res, resInfo
}
func (app *App) Ipv6(ip string) (res v6.Result) {
res = app.V6db.Find(ip)
return res
}
func (app *App) isIpv4OrIpv6(ip string) string {
if len(ip) < 7 {
return ""
}
arrIpv4 := strings.Split(ip, ".")
if len(arrIpv4) == 4 {
//. 判断IPv4
for _, val := range arrIpv4 {
if !app.CheckIpv4(val) {
return ""
}
}
return ipv4
}
arrIpv6 := strings.Split(ip, ":")
if len(arrIpv6) == 8 {
// 判断Ipv6
for _, val := range arrIpv6 {
if !app.CheckIpv6(val) {
return "Neither"
}
}
return ipv6
}
return ""
}

@ -0,0 +1,15 @@
module github.com/dtapps/goip
go 1.18
require (
github.com/dtapps/gostring v1.0.0
github.com/saracen/go7z v0.0.0-20191010121135-9c09b6bd7fda
golang.org/x/text v0.3.7
)
require (
github.com/saracen/go7z-fixtures v0.0.0-20190623165746-aa6b8fba1d2f // indirect
github.com/saracen/solidblock v0.0.0-20190426153529-45df20abab6f // indirect
github.com/ulikunitz/xz v0.5.10 // indirect
)

@ -0,0 +1,13 @@
github.com/dtapps/gostring v1.0.0 h1:UQyCCfpz+395yTzuKAa0SrmQ3ky/rCv1FkX9t6GyOx4=
github.com/dtapps/gostring v1.0.0/go.mod h1:BYYnZHrmwpFXkLpd9rQyV5YcChovVreacfcjyMKEwoU=
github.com/saracen/go7z v0.0.0-20191010121135-9c09b6bd7fda h1:h+YpzUB/bGVJcLqW+d5GghcCmE/A25KbzjXvWJQi/+o=
github.com/saracen/go7z v0.0.0-20191010121135-9c09b6bd7fda/go.mod h1:MSotTrCv1PwoR8QgU1JurEx+lNNbtr25I+m0zbLyAGw=
github.com/saracen/go7z-fixtures v0.0.0-20190623165746-aa6b8fba1d2f h1:PF9WV5j/x6MT+x/sauUHd4objCvJbZb0wdxZkHSdd5A=
github.com/saracen/go7z-fixtures v0.0.0-20190623165746-aa6b8fba1d2f/go.mod h1:6Ff0ADODZ6S3gYepgZ2w7OqFrTqtFcfwDUhmm8jsUhs=
github.com/saracen/solidblock v0.0.0-20190426153529-45df20abab6f h1:1cJITU3JUI8qNS5T0BlXwANsVdyoJQHQ4hvOxbunPCw=
github.com/saracen/solidblock v0.0.0-20190426153529-45df20abab6f/go.mod h1:LyBTue+RWeyIfN3ZJ4wVxvDuvlGJtDgCLgCb6HCPgps=
github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8=
github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e h1:FDhOuMEY4JVRztM/gsbk+IKUQ8kj74bxZrgw87eMMVc=

Binary file not shown.

@ -0,0 +1,162 @@
package ip2region
import (
_ "embed"
"errors"
"github.com/dtapps/gostring"
"os"
"strconv"
"strings"
)
const (
IndexBlockLength = 12
)
type Ip2Region struct {
// db file handler
dbFileHandler *os.File
//header block info
headerSip []int64
headerPtr []int64
headerLen int64
// super block index info
firstIndexPtr int64
lastIndexPtr int64
totalBlocks int64
// for memory mode only
// the original db binary string
dbFile string
}
//go:embed ip2region.db
var dbBinStr []byte
type IpInfo struct {
IP string `json:"ip,omitempty"` // 输入的ip地址
CityID int64 `json:"city_id,omitempty"` // 城市ID
Country string `json:"country,omitempty"` // 国家
Region string `json:"region,omitempty"` // 区域
Province string `json:"province,omitempty"` // 省份
City string `json:"city,omitempty"` // 城市
ISP string `json:"isp,omitempty"` // 运营商
}
func (ip IpInfo) String() string {
return ip.IP + "|" + strconv.FormatInt(ip.CityID, 10) + "|" + ip.Country + "|" + ip.Region + "|" + ip.Province + "|" + ip.City + "|" + ip.ISP
}
// 获取Ip信息
func getIpInfo(ipStr string, cityId int64, line []byte) (ipInfo IpInfo) {
lineSlice := strings.Split(string(line), "|")
length := len(lineSlice)
ipInfo.CityID = cityId
if length < 5 {
for i := 0; i <= 5-length; i++ {
lineSlice = append(lineSlice, "")
}
}
if lineSlice[0] != "0" {
ipInfo.Country = gostring.SpaceAndLineBreak(lineSlice[0])
}
if lineSlice[1] != "0" {
ipInfo.Region = gostring.SpaceAndLineBreak(lineSlice[1])
}
if lineSlice[2] != "0" {
ipInfo.Province = gostring.SpaceAndLineBreak(lineSlice[2])
}
if lineSlice[3] != "0" {
ipInfo.City = gostring.SpaceAndLineBreak(lineSlice[3])
}
if lineSlice[4] != "0" {
ipInfo.ISP = gostring.SpaceAndLineBreak(lineSlice[4])
}
ipInfo.IP = ipStr
return ipInfo
}
// MemorySearch memory算法整个数据库全部载入内存单次查询都在0.1x毫秒内
func (r *Ip2Region) MemorySearch(ipStr string) (ipInfo IpInfo, err error) {
ipInfo.IP = ipStr
if r.totalBlocks == 0 {
if err != nil {
return ipInfo, err
}
r.firstIndexPtr = getLong(dbBinStr, 0)
r.lastIndexPtr = getLong(dbBinStr, 4)
r.totalBlocks = (r.lastIndexPtr-r.firstIndexPtr)/IndexBlockLength + 1
}
ip, err := ip2long(ipStr)
if err != nil {
return ipInfo, err
}
h := r.totalBlocks
var dataPtr, l int64
for l <= h {
m := (l + h) >> 1
p := r.firstIndexPtr + m*IndexBlockLength
sip := getLong(dbBinStr, p)
if ip < sip {
h = m - 1
} else {
eip := getLong(dbBinStr, p+4)
if ip > eip {
l = m + 1
} else {
dataPtr = getLong(dbBinStr, p+8)
break
}
}
}
if dataPtr == 0 {
return ipInfo, errors.New("not found")
}
dataLen := (dataPtr >> 24) & 0xFF
dataPtr = dataPtr & 0x00FFFFFF
ipInfo = getIpInfo(ipStr, getLong(dbBinStr, dataPtr), dbBinStr[(dataPtr)+4:dataPtr+dataLen])
return ipInfo, nil
}
func getLong(b []byte, offset int64) int64 {
val := int64(b[offset]) |
int64(b[offset+1])<<8 |
int64(b[offset+2])<<16 |
int64(b[offset+3])<<24
return val
}
func ip2long(IpStr string) (int64, error) {
bits := strings.Split(IpStr, ".")
if len(bits) != 4 {
return 0, errors.New("ip format error")
}
var sum int64
for i, n := range bits {
bit, _ := strconv.ParseInt(n, 10, 64)
sum += bit << uint(24-8*i)
}
return sum, nil
}

@ -0,0 +1,58 @@
package v4
import (
"bytes"
"compress/zlib"
"encoding/binary"
"io/ioutil"
"net/http"
)
// 解密key
// https://zhangzifan.com/update-qqwry-dat.html
func getKey() (uint32, error) {
resp, err := http.Get("https://update.cz88.net/ip/copywrite.rar")
if err != nil {
return 0, err
}
defer resp.Body.Close()
if body, err := ioutil.ReadAll(resp.Body); err != nil {
return 0, err
} else {
// @see https://stackoverflow.com/questions/34078427/how-to-read-packed-binary-data-in-go
return binary.LittleEndian.Uint32(body[5*4:]), nil
}
}
// 在线获取内容
func getOnline() ([]byte, error) {
resp, err := http.Get("https://update.cz88.net/ip/qqwry.rar")
if err != nil {
return nil, err
}
defer resp.Body.Close()
if body, err := ioutil.ReadAll(resp.Body); err != nil {
return nil, err
} else {
if key, err := getKey(); err != nil {
return nil, err
} else {
for i := 0; i < 0x200; i++ {
key = key * 0x805
key++
key = key & 0xff
body[i] = byte(uint32(body[i]) ^ key)
}
reader, err := zlib.NewReader(bytes.NewReader(body))
if err != nil {
return nil, err
}
return ioutil.ReadAll(reader)
}
}
}

@ -0,0 +1,201 @@
package v4
import (
_ "embed"
"encoding/binary"
"errors"
"github.com/dtapps/gostring"
"golang.org/x/text/encoding/simplifiedchinese"
"io/ioutil"
"log"
"net"
)
var (
header []byte
country []byte
area []byte
offset uint32
start uint32
end uint32
)
//go:embed qqwry.dat
var dat []byte
type Pointer struct {
Offset uint32
ItemLen uint32
IndexLen uint32
}
// Result 返回
type Result struct {
IP string `json:"ip,omitempty"` // 输入的ip地址
Country string `json:"country,omitempty"` // 国家或地区
Area string `json:"area,omitempty"` // 区域
}
// InitIPV4Data 加载
func (q *Pointer) InitIPV4Data() int64 {
buf := dat[0:8]
start := binary.LittleEndian.Uint32(buf[:4])
end := binary.LittleEndian.Uint32(buf[4:])
return int64((end-start)/7 + 1)
}
// ReadData 从文件中读取数据
func (q *Pointer) readData(length uint32) (rs []byte) {
end := q.Offset + length
dataNum := uint32(len(dat))
if q.Offset > dataNum {
return nil
}
if end > dataNum {
end = dataNum
}
rs = dat[q.Offset:end]
q.Offset = end
return rs
}
// Find ip地址查询对应归属地信息
func (q *Pointer) Find(ip string) (res Result) {
// 赋值
res.IP = ip
if net.ParseIP("61.241.55.180").To4() == nil {
// 不是ip地址
return res
}
q.Offset = 0
// 偏移
offset = q.searchIndex(binary.BigEndian.Uint32(net.ParseIP(ip).To4()))
if offset <= 0 {
return
}
q.Offset = offset + q.ItemLen
country, area = q.getAddr()
enc := simplifiedchinese.GBK.NewDecoder()
res.Country, _ = enc.String(string(country))
res.Country = gostring.SpaceAndLineBreak(res.Country)
res.Area, _ = enc.String(string(area))
// Delete CZ88.NET (防止不相关的信息产生干扰)
if res.Area == " CZ88.NET" || res.Area == "" {
res.Area = ""
} else {
res.Area = " " + res.Area
}
res.Area = gostring.SpaceAndLineBreak(res.Area)
return
}
// 获取地址信息
func (q *Pointer) getAddr() ([]byte, []byte) {
mode := q.readData(1)[0]
if mode == 0x01 {
// [IP][0x01][国家和地区信息的绝对偏移地址]
q.Offset = byteToUInt32(q.readData(3))
return q.getAddr()
}
// [IP][0x02][信息的绝对偏移][...] or [IP][国家][...]
_offset := q.Offset - 1
c1 := q.readArea(_offset)
if mode == 0x02 {
q.Offset = 4 + _offset
} else {
q.Offset = _offset + uint32(1+len(c1))
}
c2 := q.readArea(q.Offset)
return c1, c2
}
// 读取区
func (q *Pointer) readArea(offset uint32) []byte {
q.Offset = offset
mode := q.readData(1)[0]
if mode == 0x01 || mode == 0x02 {
return q.readArea(byteToUInt32(q.readData(3)))
}
q.Offset = offset
return q.readString()
}
// 读取字符串
func (q *Pointer) readString() []byte {
data := make([]byte, 0)
for {
buf := q.readData(1)
if buf[0] == 0 {
break
}
data = append(data, buf[0])
}
return data
}
// 搜索索引
func (q *Pointer) searchIndex(ip uint32) uint32 {
q.ItemLen = 4
q.IndexLen = 7
header = dat[0:8]
start = binary.LittleEndian.Uint32(header[:4])
end = binary.LittleEndian.Uint32(header[4:])
buf := make([]byte, q.IndexLen)
for {
mid := start + q.IndexLen*(((end-start)/q.IndexLen)>>1)
buf = dat[mid : mid+q.IndexLen]
_ip := binary.LittleEndian.Uint32(buf[:q.ItemLen])
if end-start == q.IndexLen {
if ip >= binary.LittleEndian.Uint32(dat[end:end+q.ItemLen]) {
buf = dat[end : end+q.IndexLen]
}
return byteToUInt32(buf[q.ItemLen:])
}
if _ip > ip {
end = mid
} else if _ip < ip {
start = mid
} else if _ip == ip {
return byteToUInt32(buf[q.ItemLen:])
}
}
}
// 字节转UInt32
func byteToUInt32(data []byte) uint32 {
i := uint32(data[0]) & 0xff
i |= (uint32(data[1]) << 8) & 0xff00
i |= (uint32(data[2]) << 16) & 0xff0000
return i
}
// OnlineDownload 在线下载
func (q *Pointer) OnlineDownload() (err error) {
tmpData, err := getOnline()
if err != nil {
return errors.New("下载失败")
}
if err := ioutil.WriteFile("./qqwry.dat", tmpData, 0644); err == nil {
log.Printf("已下载最新 纯真 IPv4数据库 %s ", "./qqwry.dat")
} else {
return errors.New("保存失败")
}
return nil
}

Binary file not shown.

@ -0,0 +1,81 @@
package v6
import (
"github.com/saracen/go7z"
"io"
"io/ioutil"
"log"
"net/http"
"os"
)
func getOnline() (data []byte, err error) {
resp, err := http.Get("https://ip.zxinc.org/ip.7z")
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
file7z, err := ioutil.TempFile("", "*")
if err != nil {
return nil, err
}
defer os.Remove(file7z.Name())
if err := ioutil.WriteFile(file7z.Name(), body, 0644); err == nil {
return Un7z(file7z.Name())
}
return
}
func Un7z(filePath string) (data []byte, err error) {
sz, err := go7z.OpenReader(filePath)
if err != nil {
return nil, err
}
defer sz.Close()
fileNoNeed, err := ioutil.TempFile("", "*")
if err != nil {
return nil, err
}
fileNeed, err := ioutil.TempFile("", "*")
if err != nil {
return nil, err
}
if err != nil {
return nil, err
}
for {
hdr, err := sz.Next()
if err == io.EOF {
break // End of archive
}
if err != nil {
return nil, err
}
if hdr.Name == "ipv6wry.db" {
if _, err := io.Copy(fileNeed, sz); err != nil {
log.Fatalln("ZX ipv6数据库解压出错", err.Error())
}
} else {
if _, err := io.Copy(fileNoNeed, sz); err != nil {
log.Fatalln("ZX ipv6数据库解压出错", err.Error())
}
}
}
err = fileNoNeed.Close()
if err != nil {
return nil, err
}
defer os.Remove(fileNoNeed.Name())
defer os.Remove(fileNeed.Name())
return ioutil.ReadFile(fileNeed.Name())
}

@ -0,0 +1,226 @@
package v6
import (
_ "embed"
"encoding/binary"
"errors"
"github.com/dtapps/gostring"
"io/ioutil"
"log"
"math/big"
"net"
"strings"
)
var (
header []byte
country []byte
area []byte
v6ip uint64
offset uint32
start uint32
end uint32
)
type Result struct {
IP string `json:"ip,omitempty"` // 输入的ip地址
Country string `json:"country,omitempty"` // 国家
Province string `json:"province,omitempty"` // 省份
City string `json:"city,omitempty"` // 城市
Area string `json:"area,omitempty"` // 区域
Isp string `json:"isp,omitempty"` // 运营商
}
//go:embed ipv6wry.db
var dat []byte
type Pointer struct {
Offset uint32
ItemLen uint32
IndexLen uint32
}
// InitIPV4Data 加载
func (q *Pointer) InitIPV4Data() int64 {
buf := dat[0:8]
start := binary.LittleEndian.Uint32(buf[:4])
end := binary.LittleEndian.Uint32(buf[4:])
return int64((end-start)/7 + 1)
}
// ReadData 从文件中读取数据
func (q *Pointer) readData(length uint32) (rs []byte) {
end := q.Offset + length
dataNum := uint32(len(dat))
if q.Offset > dataNum {
return nil
}
if end > dataNum {
end = dataNum
}
rs = dat[q.Offset:end]
q.Offset = end
return rs
}
// Find ip地址查询对应归属地信息
func (q *Pointer) Find(ip string) (res Result) {
res = Result{}
res.IP = ip
q.Offset = 0
tp := big.NewInt(0)
op := big.NewInt(0)
tp.SetBytes(net.ParseIP(ip).To16())
op.SetString("18446744073709551616", 10)
op.Div(tp, op)
tp.SetString("FFFFFFFFFFFFFFFF", 16)
op.And(op, tp)
v6ip = op.Uint64()
offset = q.searchIndex(v6ip)
q.Offset = offset
country, area = q.getAddr()
// 解析地区数据
info := strings.Split(string(country), "\t")
if len(info) > 0 {
i := 1
for {
if i > len(info) {
break
}
switch i {
case 1:
res.Country = info[i-1]
res.Country = gostring.SpaceAndLineBreak(res.Country)
case 2:
res.Province = info[i-1]
res.Province = gostring.SpaceAndLineBreak(res.Province)
case 3:
res.City = info[i-1]
res.City = gostring.SpaceAndLineBreak(res.City)
case 4:
res.Area = info[i-1]
res.Area = gostring.SpaceAndLineBreak(res.Area)
}
i++ // 自增
}
} else {
res.Country = string(country)
res.Country = gostring.SpaceAndLineBreak(res.Country)
}
// 运营商
res.Isp = string(area)
// Delete ZX (防止不相关的信息产生干扰)
if res.Isp == "ZX" || res.Isp == "" {
res.Isp = ""
} else {
res.Isp = " " + res.Isp
}
res.Isp = gostring.SpaceAndLineBreak(res.Isp)
return
}
func (q *Pointer) getAddr() ([]byte, []byte) {
mode := q.readData(1)[0]
if mode == 0x01 {
// [IP][0x01][国家和地区信息的绝对偏移地址]
q.Offset = byteToUInt32(q.readData(3))
return q.getAddr()
}
// [IP][0x02][信息的绝对偏移][...] or [IP][国家][...]
_offset := q.Offset - 1
c1 := q.readArea(_offset)
if mode == 0x02 {
q.Offset = 4 + _offset
} else {
q.Offset = _offset + uint32(1+len(c1))
}
c2 := q.readArea(q.Offset)
return c1, c2
}
func (q *Pointer) readArea(offset uint32) []byte {
q.Offset = offset
mode := q.readData(1)[0]
if mode == 0x01 || mode == 0x02 {
return q.readArea(byteToUInt32(q.readData(3)))
}
q.Offset = offset
return q.readString()
}
func (q *Pointer) readString() []byte {
data := make([]byte, 0)
for {
buf := q.readData(1)
if buf[0] == 0 {
break
}
data = append(data, buf[0])
}
return data
}
func (q *Pointer) searchIndex(ip uint64) uint32 {
q.ItemLen = 8
q.IndexLen = 11
header = dat[8:24]
start = binary.LittleEndian.Uint32(header[8:])
counts := binary.LittleEndian.Uint32(header[:8])
end = start + counts*q.IndexLen
buf := make([]byte, q.IndexLen)
for {
mid := start + q.IndexLen*(((end-start)/q.IndexLen)>>1)
buf = dat[mid : mid+q.IndexLen]
_ip := binary.LittleEndian.Uint64(buf[:q.ItemLen])
if end-start == q.IndexLen {
if ip >= binary.LittleEndian.Uint64(dat[end:end+q.ItemLen]) {
buf = dat[end : end+q.IndexLen]
}
return byteToUInt32(buf[q.ItemLen:])
}
if _ip > ip {
end = mid
} else if _ip < ip {
start = mid
} else if _ip == ip {
return byteToUInt32(buf[q.ItemLen:])
}
}
}
func byteToUInt32(data []byte) uint32 {
i := uint32(data[0]) & 0xff
i |= (uint32(data[1]) << 8) & 0xff00
i |= (uint32(data[2]) << 16) & 0xff0000
return i
}
// OnlineDownload 在线下载
func (q *Pointer) OnlineDownload() (err error) {
tmpData, err := getOnline()
if err != nil {
return errors.New("下载失败")
}
if err := ioutil.WriteFile("./ipv6wry.db", tmpData, 0644); err == nil {
log.Printf("已下载最新 ZX IPv6数据库 %s ", "./ipv6wry.db")
} else {
return errors.New("保存失败")
}
return nil
}

Binary file not shown.

@ -0,0 +1,8 @@
.env
.git
.svn
.idea
.vscode
*.log
gitmod.sh
*_test.go

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 茂名聚合科技有限公司
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,25 @@
<h1>
<a href="https://www.dtapp.net/">Golang String</a>
</h1>
📦 Golang 字符串组件
[comment]: <> (go)
[![godoc](https://pkg.go.dev/badge/github.com/dtapps/gostring?status.svg)](https://pkg.go.dev/github.com/dtapps/gostring)
[![goproxy.cn](https://goproxy.cn/stats/github.com/dtapps/gostring/badges/download-count.svg)](https://goproxy.cn/stats/github.com/dtapps/gostring)
[![goreportcard.com](https://goreportcard.com/badge/github.com/dtapps/gostring)](https://goreportcard.com/report/github.com/dtapps/gostring)
[![deps.dev](https://img.shields.io/badge/deps-go-red.svg)](https://deps.dev/go/github.com%2Fdtapps%2Fgostring)
#### 安装使用
```go
go get -v -u github.com/dtapps/gostring
```
#### 导入
```go
import (
"github.com/dtapps/gostring"
)
```

@ -0,0 +1,140 @@
package gostring
import (
"crypto/hmac"
"crypto/sha256"
"encoding/hex"
"strconv"
"strings"
"unicode/utf8"
)
// ToFloat64 string到float64
func ToFloat64(s string) float64 {
i, _ := strconv.ParseFloat(s, 64)
return i
}
// ToInt string到int
func ToInt(s string) int {
i, _ := strconv.Atoi(s)
return i
}
// ToInt64 string到int64
func ToInt64(s string) int64 {
i, err := strconv.ParseInt(s, 10, 64)
if err == nil {
return i
}
return int64(ToFloat64(s))
}
// ToUint string到uint64
func ToUint(s string) uint {
i, err := strconv.ParseUint(s, 10, 64)
if err == nil {
return uint(i)
}
return 0
}
// ToUint64 string到uint64
func ToUint64(s string) uint64 {
i, err := strconv.ParseUint(s, 10, 64)
if err == nil {
return i
}
return 0
}
// Replace 字符串替换
func Replace(str, old, new string) string {
return strings.Replace(str, old, new, -1)
}
func HmacSha256Hex(key, strToSign string) string {
hasHer := hmac.New(sha256.New, []byte(key))
hasHer.Write([]byte(strToSign))
return hex.EncodeToString(hasHer.Sum(nil))
}
// Space 去除空格
func Space(str string) string {
return strings.Replace(str, " ", "", -1)
}
// LineBreak 去除换行符
func LineBreak(str string) string {
return strings.Replace(str, "\n", "", -1)
}
// SpaceAndLineBreak 去除空格和去除换行符
func SpaceAndLineBreak(str string) string {
return LineBreak(Space(str))
}
// TrimLastChar 删除字符串中的最后一个
func TrimLastChar(s string) string {
r, size := utf8.DecodeLastRuneInString(s)
if r == utf8.RuneError && (size == 0 || size == 1) {
size = 0
}
return s[:len(s)-size]
}
// Split 字符串分隔
func Split(s string, sep string) []string {
return strings.Split(s, sep)
}
// Contains 判断字符串是否包含某个字符
func Contains(s, substr string) bool {
return strings.Contains(s, substr)
}
func NumericalToString(value interface{}) (string, bool) {
var val string
switch value.(type) {
default:
return "0", false
case int:
intVal, _ := value.(int)
val = strconv.FormatInt(int64(intVal), 10)
case int8:
intVal, _ := value.(int8)
val = strconv.FormatInt(int64(intVal), 10)
case int16:
intVal, _ := value.(int16)
val = strconv.FormatInt(int64(intVal), 10)
case int32:
intVal, _ := value.(int32)
val = strconv.FormatInt(int64(intVal), 10)
case int64:
intVal, _ := value.(int64)
val = strconv.FormatInt(int64(intVal), 10)
case uint:
intVal, _ := value.(uint)
val = strconv.FormatUint(uint64(intVal), 10)
case uint8:
intVal, _ := value.(uint8)
val = strconv.FormatUint(uint64(intVal), 10)
case uint16:
intVal, _ := value.(uint16)
val = strconv.FormatUint(uint64(intVal), 10)
case uint32:
intVal, _ := value.(uint32)
val = strconv.FormatUint(uint64(intVal), 10)
case uint64:
intVal, _ := value.(uint64)
val = strconv.FormatUint(intVal, 10)
case float32:
floatVal, _ := value.(float32)
val = strconv.FormatFloat(float64(floatVal), 'f', -1, 32)
case float64:
floatVal, _ := value.(float64)
val = strconv.FormatFloat(floatVal, 'f', -1, 64)
}
return val, true
}

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Arran Walker
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,68 @@
# go7z
A native Go 7z archive reader.
Features:
- Development in early stages.
- Very little tests.
- Medium probability of crashes.
- Medium probability of using all memory.
- Decompresses:
- [LZMA](https://github.com/ulikunitz/xz)
- [LZMA2](https://github.com/ulikunitz/xz)
- Delta
- BCJ2
- bzip2
- deflate
## Usage
Extracting an archive:
```
package main
import (
"io"
"os"
"github.com/saracen/go7z"
)
func main() {
sz, err := go7z.OpenReader("hello.7z")
if err != nil {
panic(err)
}
defer sz.Close()
for {
hdr, err := sz.Next()
if err == io.EOF {
break // End of archive
}
if err != nil {
panic(err)
}
// If empty stream (no contents) and isn't specifically an empty file...
// then it's a directory.
if hdr.IsEmptyStream && !hdr.IsEmptyFile {
if err := os.MkdirAll(hdr.Name, os.ModePerm); err != nil {
panic(err)
}
continue
}
// Create file
f, err := os.Create(hdr.Name)
if err != nil {
panic(err)
}
defer f.Close()
if _, err := io.Copy(f, sz); err != nil {
panic(err)
}
}
}
```

@ -0,0 +1,132 @@
package filters
import (
"bytes"
"crypto/aes"
"crypto/cipher"
"crypto/sha256"
"encoding/binary"
"hash"
"io"
"strings"
"unicode/utf16"
)
var km keyManager
func init() {
km.cache = make(map[string][]byte)
km.hasher = sha256.New()
}
// AESDecrypter is an AES-256 decryptor.
type AESDecrypter struct {
r io.Reader
rbuf bytes.Buffer
cbc cipher.BlockMode
buf [aes.BlockSize]byte
}
type keyManager struct {
hasher hash.Hash
cache map[string][]byte
}
func (km *keyManager) Key(power int, salt []byte, password string) []byte {
var cacheKey strings.Builder
cacheKey.WriteString(password)
cacheKey.Write(salt)
cacheKey.WriteByte(byte(power))
key, ok := km.cache[cacheKey.String()]
if ok {
return key
}
b := bytes.NewBuffer(nil)
for _, p := range utf16.Encode([]rune(password)) {
binary.Write(b, binary.LittleEndian, p)
}
if power == 0x3f {
key = km.stretch(salt, b.Bytes())
} else {
key = km.sha256Stretch(power, salt, b.Bytes())
}
km.cache[cacheKey.String()] = key
return key
}
func (km *keyManager) stretch(salt, password []byte) []byte {
var key [aes.BlockSize]byte
var pos int
for pos = 0; pos < len(salt); pos++ {
key[pos] = salt[pos]
}
for i := 0; i < len(password) && pos < len(key); i++ {
key[pos] = password[i]
pos++
}
for ; pos < len(key); pos++ {
key[pos] = 0
}
return key[:]
}
func (km *keyManager) sha256Stretch(power int, salt, password []byte) []byte {
var temp [8]byte
for round := 0; round < 1<<power; round++ {
km.hasher.Write(salt)
km.hasher.Write(password)
km.hasher.Write(temp[:])
for i := 0; i < 8; i++ {
temp[i]++
if temp[i] != 0 {
break
}
}
}
defer km.hasher.Reset()
return km.hasher.Sum(nil)
}
// NewAESDecrypter returns a new AES-256 decryptor.
func NewAESDecrypter(r io.Reader, power int, salt, iv []byte, password string) (*AESDecrypter, error) {
key := km.Key(power, salt, password)
cb, err := aes.NewCipher(key)
if err != nil {
return nil, err
}
var aesiv [aes.BlockSize]byte
copy(aesiv[:], iv)
return &AESDecrypter{
r: r,
cbc: cipher.NewCBCDecrypter(cb, aesiv[:]),
}, nil
}
func (d *AESDecrypter) Read(p []byte) (int, error) {
for d.rbuf.Len() < len(p) {
_, err := d.r.Read(d.buf[:])
if err != nil {
return 0, err
}
d.cbc.CryptBlocks(d.buf[:], d.buf[:])
_, err = d.rbuf.Write(d.buf[:])
if err != nil {
return 0, err
}
}
n, err := d.rbuf.Read(p)
return n, err
}

@ -0,0 +1,210 @@
package filters
import (
"bufio"
"bytes"
"encoding/binary"
"io"
)
type rangeDecoder struct {
r io.Reader
nrange uint
code uint
}
func newRangeDecoder(r io.Reader) (*rangeDecoder, error) {
rd := &rangeDecoder{
r: r,
nrange: 0xffffffff,
}
for i := 0; i < 5; i++ {
b, err := rd.ReadByte()
if err != nil {
return nil, err
}
rd.code = (rd.code << 8) | uint(b)
}
return rd, nil
}
func (rd *rangeDecoder) ReadByte() (byte, error) {
var b [1]byte
_, err := rd.r.Read(b[:])
return b[0], err
}
const (
numMoveBits = 5
numbitModelTotalBits = 11
bitModelTotal = uint(1) << numbitModelTotalBits
numTopBits = 24
topValue = uint(1 << numTopBits)
)
type statusDecoder struct {
prob uint
}
func newStatusDecoder() *statusDecoder {
return &statusDecoder{prob: bitModelTotal / 2}
}
func (sd *statusDecoder) Decode(decoder *rangeDecoder) (uint, error) {
var err error
var b byte
newBound := (decoder.nrange >> numbitModelTotalBits) * sd.prob
if decoder.code < newBound {
decoder.nrange = newBound
sd.prob += (bitModelTotal - sd.prob) >> numMoveBits
if decoder.nrange < topValue {
if b, err = decoder.ReadByte(); err != nil {
return 0, err
}
decoder.code = (decoder.code << 8) | uint(b)
decoder.nrange <<= 8
}
return 0, nil
}
decoder.nrange -= newBound
decoder.code -= newBound
sd.prob -= sd.prob >> numMoveBits
if decoder.nrange < topValue {
if b, err = decoder.ReadByte(); err != nil {
return 0, err
}
decoder.code = (decoder.code << 8) | uint(b)
decoder.nrange <<= 8
}
return 1, nil
}
// BCJ2Decoder is a BCJ2 decoder.
type BCJ2Decoder struct {
main *bufio.Reader
call io.Reader
jump io.Reader
rangeDecoder *rangeDecoder
statusDecoder []*statusDecoder
written int64
finished bool
prevByte byte
buf *bytes.Buffer
}
// NewBCJ2Decoder returns a new BCJ2 decoder.
func NewBCJ2Decoder(main, call, jump, rangedecoder io.Reader, limit int64) (*BCJ2Decoder, error) {
rd, err := newRangeDecoder(rangedecoder)
if err != nil {
return nil, err
}
decoder := &BCJ2Decoder{
main: bufio.NewReader(main),
call: call,
jump: jump,
rangeDecoder: rd,
statusDecoder: make([]*statusDecoder, 256+2),
buf: new(bytes.Buffer),
}
decoder.buf.Grow(1 << 16)
for i := range decoder.statusDecoder {
decoder.statusDecoder[i] = newStatusDecoder()
}
return decoder, nil
}
func (d *BCJ2Decoder) isJcc(b0, b1 byte) bool {
return b0 == 0x0f && (b1&0xf0) == 0x80
}
func (d *BCJ2Decoder) isJ(b0, b1 byte) bool {
return (b1&0xfe) == 0xe8 || d.isJcc(b0, b1)
}
func (d *BCJ2Decoder) index(b0, b1 byte) int {
switch b1 {
case 0xe8:
return int(b0)
case 0xe9:
return 256
}
return 257
}
func (d *BCJ2Decoder) Read(p []byte) (int, error) {
err := d.read()
if err != nil && err != io.EOF {
return 0, err
}
return d.buf.Read(p)
}
func (d *BCJ2Decoder) read() error {
b := byte(0)
var err error
for i := 0; i < d.buf.Cap(); i++ {
b, err = d.main.ReadByte()
if err != nil {
return err
}
d.written++
if err = d.buf.WriteByte(b); err != nil {
return err
}
if d.isJ(d.prevByte, b) {
break
}
d.prevByte = b
}
if d.buf.Len() == d.buf.Cap() {
return nil
}
bit, err := d.statusDecoder[d.index(d.prevByte, b)].Decode(d.rangeDecoder)
if err != nil {
return err
}
if bit == 1 {
var r io.Reader
if b == 0xe8 {
r = d.call
} else {
r = d.jump
}
var dest uint32
if err = binary.Read(r, binary.BigEndian, &dest); err != nil {
return err
}
dest -= uint32(d.written + 4)
if err = binary.Write(d.buf, binary.LittleEndian, dest); err != nil {
return err
}
d.prevByte = byte(dest >> 24)
d.written += 4
} else {
d.prevByte = b
}
return nil
}

@ -0,0 +1,45 @@
package filters
import "io"
const deltaStateSize = 256
// DeltaDecoder is a Delta decoder.
type DeltaDecoder struct {
state [deltaStateSize]byte
r io.Reader
delta uint
}
// NewDeltaDecoder returns a new Delta decoder.
func NewDeltaDecoder(r io.Reader, delta uint, limit int64) (*DeltaDecoder, error) {
return &DeltaDecoder{r: r, delta: delta}, nil
}
func (d *DeltaDecoder) Read(p []byte) (int, error) {
n, err := d.r.Read(p)
if err != nil {
return n, err
}
var buf [deltaStateSize]byte
copy(buf[:], d.state[:d.delta])
var i, j uint
for i = 0; i < uint(n); {
for j = 0; j < d.delta && i < uint(n); i++ {
p[i] = buf[j] + p[i]
buf[j] = p[i]
j++
}
}
if j == d.delta {
j = 0
}
copy(d.state[:], buf[j:d.delta])
copy(d.state[d.delta-j:], buf[:j])
return n, err
}

@ -0,0 +1,32 @@
// +build gofuzz
package go7z
import (
"bytes"
"io"
"io/ioutil"
)
func Fuzz(data []byte) int {
sz := new(Reader)
if err := sz.init(bytes.NewReader(data), int64(len(data)), true); err != nil {
return 0
}
for {
_, err := sz.Next()
if err == io.EOF {
return 0
}
if err != nil {
return 0
}
if _, err = io.Copy(ioutil.Discard, sz); err != nil {
return 0
}
}
return 1
}

@ -0,0 +1,25 @@
package headers
import (
"encoding/binary"
"io"
)
// ReadDigests reads an array of uint32 CRCs.
func ReadDigests(r io.Reader, length int) ([]uint32, error) {
defined, _, err := ReadOptionalBoolVector(r, length)
if err != nil {
return nil, err
}
crcs := make([]uint32, length)
for i := range defined {
if defined[i] {
if err := binary.Read(r, binary.LittleEndian, &crcs[i]); err != nil {
return nil, err
}
}
}
return crcs, nil
}

@ -0,0 +1,159 @@
package headers
import (
"encoding/binary"
"errors"
"io"
"time"
"unicode/utf16"
)
// ErrInvalidFileCount is returned when the file count read from the stream
// exceeds the caller supplied maxFileCount.
var ErrInvalidFileCount = errors.New("invalid file count")
// FileInfo is a structure containing the information of an archived file.
type FileInfo struct {
Name string
Attrib uint32
IsEmptyStream bool
IsEmptyFile bool
// Flag indicating a file should be removed upon extraction.
IsAntiFile bool
CreatedAt time.Time
AccessedAt time.Time
ModifiedAt time.Time
}
// ReadFilesInfo reads the files info structure.
func ReadFilesInfo(r io.Reader, maxFileCount int) ([]*FileInfo, error) {
numFiles, err := ReadNumberInt(r)
if err != nil {
return nil, err
}
if numFiles > maxFileCount {
return nil, ErrInvalidFileCount
}
fileInfo := make([]*FileInfo, numFiles)
for i := range fileInfo {
fileInfo[i] = &FileInfo{}
}
var numEmptyStreams int
for {
id, err := ReadByte(r)
if err != nil {
return nil, err
}
if id == k7zEnd {
return fileInfo, nil
}
size, err := ReadNumber(r)
if err != nil {
return nil, err
}
switch id {
case k7zEmptyStream:
var emptyStreams []bool
emptyStreams, numEmptyStreams, err = ReadBoolVector(r, numFiles)
if err != nil {
return nil, err
}
for i, fi := range fileInfo {
fi.IsEmptyStream = emptyStreams[i]
}
case k7zEmptyFile, k7zAnti:
files, _, err := ReadBoolVector(r, numEmptyStreams)
if err != nil {
return nil, err
}
idx := 0
for _, fi := range fileInfo {
if fi.IsEmptyStream {
switch id {
case k7zEmptyFile:
fi.IsEmptyFile = files[idx]
case k7zAnti:
fi.IsAntiFile = files[idx]
}
idx++
}
}
case k7zStartPos:
return nil, ErrUnexpectedPropertyID
case k7zCTime, k7zATime, k7zMTime:
times, err := ReadDateTimeVector(r, numFiles)
if err != nil {
return nil, err
}
for i, fi := range fileInfo {
switch id {
case k7zCTime:
fi.CreatedAt = times[i]
case k7zATime:
fi.AccessedAt = times[i]
case k7zMTime:
fi.ModifiedAt = times[i]
}
}
case k7zName:
external, err := ReadByte(r)
if err != nil {
return nil, err
}
switch external {
case 0:
for _, fi := range fileInfo {
var rune uint16
var name []uint16
for {
if err = binary.Read(r, binary.LittleEndian, &rune); err != nil {
return nil, err
}
if rune == 0 {
break
}
name = append(name, rune)
}
fi.Name = string(utf16.Decode(name))
}
default:
return nil, ErrAdditionalStreamsNotImplemented
}
case k7zWinAttributes:
attributes, err := ReadAttributeVector(r, numFiles)
if err != nil {
return nil, err
}
for i, fi := range fileInfo {
fi.Attrib = attributes[i]
}
case k7zDummy:
for i := uint64(0); i < size; i++ {
if _, err = ReadByte(r); err != nil {
return nil, err
}
}
default:
return nil, ErrUnexpectedPropertyID
}
}
}

@ -0,0 +1,240 @@
package headers
import (
"errors"
"io"
)
const (
// MaxInOutStreams is the maximum allowed stream inputs/outputs into/out
// of a coder.
MaxInOutStreams = 4
// MaxPropertyDataSize is the size in bytes supported for coder property data.
MaxPropertyDataSize = 128
// MaxCodersInFolder is the maximum number of coders allowed to be
// specified in a folder.
MaxCodersInFolder = 4
// MaxPackedStreamsInFolder is the maximum number of packed streams allowed
// to be in a folder.
MaxPackedStreamsInFolder = 4
)
var (
// ErrInvalidStreamCount is the error returned when the input/output stream
// count for a coder is <= 0 || > MaxInOutStreams.
ErrInvalidStreamCount = errors.New("invalid in/out stream count")
// ErrInvalidPropertyDataSize is the error returned when the property data
// size is <= 0 || > MaxInOutStreams.
ErrInvalidPropertyDataSize = errors.New("invalid property data size")
// ErrInvalidCoderInFolderCount is the error returned when the number of
// coders in a folder is <= 0 || > MaxCodersInFolder.
ErrInvalidCoderInFolderCount = errors.New("invalid coder in folder count")
// ErrInvalidPackedStreamsCount is the error returned when the number of
// packed streams exceeds MaxPackedStreamsInFolder
ErrInvalidPackedStreamsCount = errors.New("invalid packed streams count")
)
// Folder is a structure containing information on how a solid block was
// constructed.
type Folder struct {
CoderInfo []*CoderInfo
BindPairsInfo []*BindPairsInfo
PackedIndices []int
UnpackSizes []uint64
UnpackCRC uint32
}
// NumInStreamsTotal is the sum of inputs required by all codecs.
func (f *Folder) NumInStreamsTotal() int {
var count int
for i := range f.CoderInfo {
count += f.CoderInfo[i].NumInStreams
}
return count
}
// NumOutStreamsTotal is the sum of outputs required by all codecs.
func (f *Folder) NumOutStreamsTotal() int {
var count int
for i := range f.CoderInfo {
count += f.CoderInfo[i].NumOutStreams
}
return count
}
// FindBindPairForInStream returns the index of a bindpair by an in index.
func (f *Folder) FindBindPairForInStream(inStreamIndex int) int {
for i := range f.BindPairsInfo {
if f.BindPairsInfo[i].InIndex == inStreamIndex {
return i
}
}
return -1
}
// FindBindPairForOutStream returns the index of a bindpair by an out index.
func (f *Folder) FindBindPairForOutStream(outStreamIndex int) int {
for i := range f.BindPairsInfo {
if f.BindPairsInfo[i].OutIndex == outStreamIndex {
return i
}
}
return -1
}
// UnpackSize returns the final unpacked size of the folder.
func (f *Folder) UnpackSize() uint64 {
for i := range f.UnpackSizes {
if f.FindBindPairForOutStream(i) < 0 {
return f.UnpackSizes[i]
}
}
return 0
}
// ReadFolder reads a folder structure.
func ReadFolder(r io.Reader) (*Folder, error) {
var err error
folder := &Folder{}
numCoders, err := ReadNumberInt(r)
if err != nil {
return nil, err
}
if numCoders == 0 || numCoders > MaxCodersInFolder {
return nil, ErrInvalidCoderInFolderCount
}
folder.CoderInfo = make([]*CoderInfo, numCoders)
for i := range folder.CoderInfo {
if folder.CoderInfo[i], err = ReadCoderInfo(r); err != nil {
return nil, err
}
}
folder.BindPairsInfo = make([]*BindPairsInfo, numCoders-1)
for i := range folder.BindPairsInfo {
if folder.BindPairsInfo[i], err = ReadBindPairsInfo(r); err != nil {
return nil, err
}
}
numInStreamsTotal := folder.NumInStreamsTotal()
numPackedStreams := numInStreamsTotal - len(folder.BindPairsInfo)
if numPackedStreams > 1 {
if numPackedStreams > MaxPackedStreamsInFolder {
return nil, ErrInvalidPackedStreamsCount
}
folder.PackedIndices = make([]int, numPackedStreams)
for i := range folder.PackedIndices {
if folder.PackedIndices[i], err = ReadNumberInt(r); err != nil {
return nil, err
}
}
} else if numPackedStreams == 1 {
for i := 0; i < numInStreamsTotal; i++ {
if folder.FindBindPairForInStream(i) < 0 {
folder.PackedIndices = []int{i}
break
}
}
}
return folder, nil
}
// CoderInfo is a structure holding information about a codec.
type CoderInfo struct {
CodecID uint32
Properties []byte
NumInStreams int
NumOutStreams int
}
// ReadCoderInfo reads a coder info structure.
func ReadCoderInfo(r io.Reader) (*CoderInfo, error) {
attributes, err := ReadByte(r)
if err != nil {
return nil, err
}
coderInfo := &CoderInfo{}
codecIDSize := attributes & 0x0f
isComplexCoder := attributes&0x10 > 0
hasAttributes := attributes&0x20 > 0
if codecIDSize > 0 {
b := make([]byte, codecIDSize)
if _, err = r.Read(b); err != nil {
return nil, err
}
for i := codecIDSize; i > 0; i-- {
coderInfo.CodecID |= uint32(b[i-1]) << ((codecIDSize - i) * 8)
}
}
coderInfo.NumInStreams = 1
coderInfo.NumOutStreams = 1
if isComplexCoder {
if coderInfo.NumInStreams, err = ReadNumberInt(r); err != nil {
return nil, err
}
if coderInfo.NumInStreams == 0 || coderInfo.NumInStreams > MaxInOutStreams {
return nil, ErrInvalidStreamCount
}
if coderInfo.NumOutStreams, err = ReadNumberInt(r); err != nil {
return nil, err
}
if coderInfo.NumOutStreams == 0 || coderInfo.NumOutStreams > MaxInOutStreams {
return nil, ErrInvalidStreamCount
}
}
if hasAttributes {
size, err := ReadNumberInt(r)
if err != nil {
return nil, err
}
if size <= 0 || size > MaxPropertyDataSize {
return nil, ErrInvalidPropertyDataSize
}
coderInfo.Properties = make([]byte, size)
if _, err = r.Read(coderInfo.Properties); err != nil {
return nil, err
}
}
return coderInfo, nil
}
// BindPairsInfo is a structure that binds the in and out indexes of a codec.
type BindPairsInfo struct {
InIndex int
OutIndex int
}
// ReadBindPairsInfo reads a bindpairs info structure.
func ReadBindPairsInfo(r io.Reader) (*BindPairsInfo, error) {
bindPairsInfo := &BindPairsInfo{}
var err error
if bindPairsInfo.InIndex, err = ReadNumberInt(r); err != nil {
return nil, err
}
if bindPairsInfo.OutIndex, err = ReadNumberInt(r); err != nil {
return nil, err
}
return bindPairsInfo, nil
}

@ -0,0 +1,152 @@
package headers
import (
"bytes"
"encoding/binary"
"errors"
"hash/crc32"
"io"
)
const (
// SignatureHeader size is the size of the signature header.
SignatureHeaderSize = 32
// MaxHeaderSize is the maximum header size.
MaxHeaderSize = int64(1 << 62) // 4 exbibyte
)
var (
// MagicBytes is the magic bytes used in the 7z signature.
MagicBytes = [6]byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C}
// ErrInvalidSignatureHeader is returned when signature header is invalid.
ErrInvalidSignatureHeader = errors.New("invalid signature header")
)
// SignatureHeader is the structure found at the top of 7z files.
type SignatureHeader struct {
Signature [6]byte
ArchiveVersion struct {
Major byte
Minor byte
}
StartHeaderCRC uint32
StartHeader struct {
NextHeaderOffset int64
NextHeaderSize int64
NextHeaderCRC uint32
}
}
// ReadSignatureHeader reads the signature header.
func ReadSignatureHeader(r io.Reader) (*SignatureHeader, error) {
var raw [SignatureHeaderSize]byte
_, err := r.Read(raw[:])
if err != nil {
return nil, err
}
var header SignatureHeader
copy(header.Signature[:], raw[:6])
if bytes.Compare(header.Signature[:], MagicBytes[:]) != 0 {
return nil, ErrInvalidSignatureHeader
}
header.ArchiveVersion.Major = raw[6]
header.ArchiveVersion.Minor = raw[7]
header.StartHeaderCRC = binary.LittleEndian.Uint32(raw[8:])
header.StartHeader.NextHeaderOffset = int64(binary.LittleEndian.Uint64(raw[12:]))
header.StartHeader.NextHeaderSize = int64(binary.LittleEndian.Uint64(raw[20:]))
header.StartHeader.NextHeaderCRC = binary.LittleEndian.Uint32(raw[28:])
if header.StartHeader.NextHeaderSize < 0 || header.StartHeader.NextHeaderSize > MaxHeaderSize {
return &header, ErrInvalidSignatureHeader
}
if crc32.ChecksumIEEE(raw[12:]) != header.StartHeaderCRC {
err = ErrChecksumMismatch
}
return &header, err
}
// Header is structure containing file and stream information.
type Header struct {
MainStreamsInfo *StreamsInfo
FilesInfo []*FileInfo
}
// ReadPackedStreamsForHeaders reads either a header or encoded header structure.
func ReadPackedStreamsForHeaders(r *io.LimitedReader) (header *Header, encodedHeader *StreamsInfo, err error) {
id, err := ReadByte(r)
if err != nil {
return nil, nil, err
}
switch id {
case k7zHeader:
if header, err = ReadHeader(r); err != nil && err != io.EOF {
return nil, nil, err
}
case k7zEncodedHeader:
if encodedHeader, err = ReadStreamsInfo(r); err != nil {
return nil, nil, err
}
case k7zEnd:
if header == nil && encodedHeader == nil {
return nil, nil, ErrUnexpectedPropertyID
}
break
default:
return nil, nil, ErrUnexpectedPropertyID
}
return header, encodedHeader, nil
}
// ReadHeader reads a header structure.
func ReadHeader(r *io.LimitedReader) (*Header, error) {
header := &Header{}
for {
id, err := ReadByte(r)
if err != nil {
return nil, err
}
switch id {
case k7zArchiveProperties:
return nil, ErrArchivePropertiesNotImplemented
case k7zAdditionalStreamsInfo:
return nil, ErrAdditionalStreamsNotImplemented
case k7zMainStreamsInfo:
if header.MainStreamsInfo, err = ReadStreamsInfo(r); err != nil {
return nil, err
}
case k7zFilesInfo:
// Limit the maximum amount of FileInfos that get allocated to size
// of the remaining header / 3
if header.FilesInfo, err = ReadFilesInfo(r, int(r.N)/3); err != nil {
return nil, err
}
case k7zEnd:
if header.MainStreamsInfo == nil {
return nil, ErrUnexpectedPropertyID
}
return header, nil
default:
return nil, ErrUnexpectedPropertyID
}
}
}

@ -0,0 +1,51 @@
package headers
import "io"
// PackInfo contains the pack stream sizes of the folders.
type PackInfo struct {
PackPos uint64
PackSizes []uint64
}
// ReadPackInfo reads a pack info structure.
func ReadPackInfo(r io.Reader) (*PackInfo, error) {
packInfo := &PackInfo{}
var err error
if packInfo.PackPos, err = ReadNumber(r); err != nil {
return nil, err
}
numPackStreams, err := ReadNumberInt(r)
if err != nil {
return nil, err
}
for {
id, err := ReadByte(r)
if err != nil {
return nil, err
}
switch id {
case k7zSize:
packInfo.PackSizes = make([]uint64, numPackStreams+1)
for i := 0; i < numPackStreams; i++ {
packInfo.PackSizes[i], err = ReadNumber(r)
if err != nil {
return nil, err
}
}
case k7zCRC:
return nil, ErrPackInfoCRCsNotImplemented
case k7zEnd:
return packInfo, nil
default:
return nil, ErrUnexpectedPropertyID
}
}
}

@ -0,0 +1,262 @@
package headers
import (
"encoding/binary"
"errors"
"io"
"time"
)
const (
k7zEnd = iota
k7zHeader
k7zArchiveProperties
k7zAdditionalStreamsInfo
k7zMainStreamsInfo
k7zFilesInfo
k7zPackInfo
k7zUnpackInfo
k7zSubStreamsInfo
k7zSize
k7zCRC
k7zFolder
k7zCodersUnpackSize
k7zNumUnpackStream
k7zEmptyStream
k7zEmptyFile
k7zAnti
k7zName
k7zCTime
k7zATime
k7zMTime
k7zWinAttributes
k7zComment
k7zEncodedHeader
k7zStartPos
k7zDummy
)
const MaxNumber = 0x7FFFFFFF
var (
// ErrUnexpectedPropertyID is returned when we read a property id that was
// either unexpected, or we don't support.
ErrUnexpectedPropertyID = errors.New("unexpected property id")
// ErrAdditionalStreamsNotImplemented is returned for archives using
// additional streams. These were apparently used in older versions of 7zip.
ErrAdditionalStreamsNotImplemented = errors.New("additional streams are not implemented")
// ErrArchivePropertiesNotImplemented is returned if archive properties
// structure is found. So far, this hasn't been used in any verison of 7zip.
ErrArchivePropertiesNotImplemented = errors.New("archive properties are not implemented")
// ErrChecksumMismatch is returned when a CRC check fails.
ErrChecksumMismatch = errors.New("checksum mismatch")
// ErrPackInfoCRCsNotImplemented is returned if a CRC property id is
// encountered whilst reading packinfo.
ErrPackInfoCRCsNotImplemented = errors.New("packinfo crcs are not implemented")
// ErrInvalidNumber is returned when a number read exceeds 0x7FFFFFFF
ErrInvalidNumber = errors.New("invalid number")
)
// ReadByte reads a single byte.
func ReadByte(r io.Reader) (byte, error) {
var val [1]byte
_, err := r.Read(val[:])
return val[0], err
}
// ReadByteExpect reads a byte to be expected, errors if unexpected.
func ReadByteExpect(r io.Reader, val byte) error {
value, err := ReadByte(r)
if err != nil {
return err
}
if value != val {
return ErrUnexpectedPropertyID
}
return nil
}
// ReadNumber reads a 7z encoded uint64.
func ReadNumber(r io.Reader) (uint64, error) {
first, err := ReadByte(r)
if err != nil {
return 0, err
}
var value uint64
mask := byte(0x80)
for i := uint64(0); i < 8; i++ {
if first&mask == 0 {
hp := uint64(first) & (uint64(mask) - 1)
value += hp << (i * 8)
return value, nil
}
val, err := ReadByte(r)
if err != nil {
return 0, err
}
value |= uint64(val) << (8 * i)
mask >>= 1
}
return value, nil
}
// ReadNumberInt is the same as ReadNumber, but cast to int.
func ReadNumberInt(r io.Reader) (int, error) {
u64, err := ReadNumber(r)
if u64 > MaxNumber {
return 0, ErrInvalidNumber
}
return int(u64), err
}
// ReadUint32 reads a uint32.
func ReadUint32(r io.Reader) (uint32, error) {
var v uint32
return v, binary.Read(r, binary.LittleEndian, &v)
}
// ReadUint64 reads a uint64.
func ReadUint64(r io.Reader) (uint64, error) {
var v uint64
return v, binary.Read(r, binary.LittleEndian, &v)
}
// ReadBoolVector reads a vector of boolean values.
func ReadBoolVector(r io.Reader, length int) ([]bool, int, error) {
var b byte
var mask byte
var err error
v := make([]bool, length)
count := 0
for i := range v {
if mask == 0 {
b, err = ReadByte(r)
if err != nil {
return nil, 0, err
}
mask = 0x80
}
v[i] = (b & mask) != 0
mask >>= 1
if v[i] {
count++
}
}
return v, count, nil
}
// ReadOptionalBoolVector reads a vector of boolean values if they're available,
// otherwise it returns an array of booleans all being true.
func ReadOptionalBoolVector(r io.Reader, length int) ([]bool, int, error) {
allDefined, err := ReadByte(r)
if err != nil {
return nil, 0, err
}
if allDefined == 0 {
return ReadBoolVector(r, length)
}
defined := make([]bool, length)
for i := range defined {
defined[i] = true
}
return defined, length, nil
}
// ReadNumberVector returns a vector of 7z encoded int64s.
func ReadNumberVector(r io.Reader, numFiles int) ([]*int64, error) {
defined, _, err := ReadOptionalBoolVector(r, numFiles)
if err != nil {
return nil, err
}
external, err := ReadByte(r)
if err != nil {
return nil, err
}
if external != 0 {
return nil, ErrAdditionalStreamsNotImplemented
}
numbers := make([]*int64, numFiles)
for i := 0; i < numFiles; i++ {
if defined[i] {
num, err := ReadUint64(r)
if err != nil {
return nil, err
}
val := int64(num)
numbers[i] = &val
} else {
numbers[i] = nil
}
}
return numbers, err
}
// ReadDateTimeVector reads a vector of datetime values.
func ReadDateTimeVector(r io.Reader, numFiles int) ([]time.Time, error) {
timestamps, err := ReadNumberVector(r, numFiles)
if err != nil {
return nil, err
}
times := make([]time.Time, len(timestamps))
for i := range times {
if timestamps[i] != nil {
nsec := *timestamps[i]
nsec -= 116444736000000000
nsec *= 100
times[i] = time.Unix(0, nsec)
}
}
return times, nil
}
// ReadAttributeVector reads a vector of uint32s.
func ReadAttributeVector(r io.Reader, numFiles int) ([]uint32, error) {
defined, _, err := ReadOptionalBoolVector(r, numFiles)
if err != nil {
return nil, err
}
external, err := ReadByte(r)
if err != nil {
return nil, err
}
if external != 0 {
return nil, ErrAdditionalStreamsNotImplemented
}
attributes := make([]uint32, numFiles)
for i := range attributes {
if defined[i] {
val, err := ReadUint32(r)
if err != nil {
return nil, err
}
attributes[i] = val
}
}
return attributes, nil
}

@ -0,0 +1,143 @@
package headers
import (
"io"
)
// StreamsInfo is a top-level structure of the 7z format.
type StreamsInfo struct {
PackInfo *PackInfo
UnpackInfo *UnpackInfo
SubStreamsInfo *SubStreamsInfo
}
// ReadStreamsInfo reads the streams info structure.
func ReadStreamsInfo(r io.Reader) (*StreamsInfo, error) {
streamsInfo := &StreamsInfo{}
for {
id, err := ReadByte(r)
if err != nil {
return nil, err
}
switch id {
case k7zPackInfo:
if streamsInfo.PackInfo, err = ReadPackInfo(r); err != nil {
return nil, err
}
case k7zUnpackInfo:
if streamsInfo.UnpackInfo, err = ReadUnpackInfo(r); err != nil {
return nil, err
}
case k7zSubStreamsInfo:
if streamsInfo.UnpackInfo == nil {
return nil, ErrUnexpectedPropertyID
}
if streamsInfo.SubStreamsInfo, err = ReadSubStreamsInfo(r, streamsInfo.UnpackInfo); err != nil {
return nil, err
}
case k7zEnd:
if streamsInfo.PackInfo == nil || streamsInfo.UnpackInfo == nil {
return nil, ErrUnexpectedPropertyID
}
return streamsInfo, nil
default:
return nil, ErrUnexpectedPropertyID
}
}
}
// SubStreamsInfo is a structure found within the StreamsInfo structure.
type SubStreamsInfo struct {
NumUnpackStreamsInFolders []int
UnpackSizes []uint64
Digests []uint32
}
// ReadSubStreamsInfo reads the substreams info structure.
func ReadSubStreamsInfo(r io.Reader, unpackInfo *UnpackInfo) (*SubStreamsInfo, error) {
id, err := ReadByte(r)
if err != nil {
return nil, err
}
subStreamInfo := &SubStreamsInfo{}
subStreamInfo.NumUnpackStreamsInFolders = make([]int, len(unpackInfo.Folders))
for i := range subStreamInfo.NumUnpackStreamsInFolders {
subStreamInfo.NumUnpackStreamsInFolders[i] = 1
}
if id == k7zNumUnpackStream {
for i := range subStreamInfo.NumUnpackStreamsInFolders {
if subStreamInfo.NumUnpackStreamsInFolders[i], err = ReadNumberInt(r); err != nil {
return nil, err
}
}
id, err = ReadByte(r)
if err != nil {
return nil, err
}
}
for i := range unpackInfo.Folders {
if subStreamInfo.NumUnpackStreamsInFolders[i] == 0 {
continue
}
var sum uint64
if id == k7zSize {
for j := 1; j < subStreamInfo.NumUnpackStreamsInFolders[i]; j++ {
size, err := ReadNumber(r)
if err != nil {
return nil, err
}
sum += size
subStreamInfo.UnpackSizes = append(subStreamInfo.UnpackSizes, size)
}
}
subStreamInfo.UnpackSizes = append(subStreamInfo.UnpackSizes, unpackInfo.Folders[i].UnpackSize()-uint64(sum))
}
if id == k7zSize {
id, err = ReadByte(r)
if err != nil {
return nil, err
}
}
numDigests := 0
for i := range unpackInfo.Folders {
numSubStreams := subStreamInfo.NumUnpackStreamsInFolders[i]
if numSubStreams > 1 || unpackInfo.Folders[i].UnpackCRC == 0 {
numDigests += int(numSubStreams)
}
}
if id == k7zCRC {
subStreamInfo.Digests, err = ReadDigests(r, numDigests)
if err != nil {
return nil, err
}
id, err = ReadByte(r)
if err != nil {
return nil, err
}
}
if id != k7zEnd {
return nil, ErrUnexpectedPropertyID
}
return subStreamInfo, nil
}

@ -0,0 +1,89 @@
package headers
import (
"errors"
"io"
)
const MaxFolderCount = 1 << 30
// ErrInvalidCountExceeded is returned when the folder count is
// < 0 || > MaxFolderCount
var ErrInvalidCountExceeded = errors.New("invalid folder count")
// UnpackInfo is a structure containing folders.
type UnpackInfo struct {
Folders []*Folder
}
// ReadUnpackInfo reads unpack info structures.
func ReadUnpackInfo(r io.Reader) (*UnpackInfo, error) {
err := ReadByteExpect(r, k7zFolder)
if err != nil {
return nil, err
}
numFolders, err := ReadNumberInt(r)
if err != nil {
return nil, err
}
if numFolders > MaxFolderCount {
return nil, ErrInvalidCountExceeded
}
unpackInfo := &UnpackInfo{}
external, err := ReadByte(r)
if err != nil {
return nil, err
}
switch external {
case 0:
unpackInfo.Folders = make([]*Folder, numFolders)
for i := range unpackInfo.Folders {
if unpackInfo.Folders[i], err = ReadFolder(r); err != nil {
return nil, err
}
}
default:
return nil, ErrAdditionalStreamsNotImplemented
}
if err = ReadByteExpect(r, k7zCodersUnpackSize); err != nil {
return nil, err
}
for _, folder := range unpackInfo.Folders {
folder.UnpackSizes = make([]uint64, folder.NumOutStreamsTotal())
for i := range folder.UnpackSizes {
if folder.UnpackSizes[i], err = ReadNumber(r); err != nil {
return nil, err
}
}
}
id, err := ReadByte(r)
if err != nil {
return nil, err
}
if id == k7zCRC {
crcs, err := ReadDigests(r, len(unpackInfo.Folders))
if err != nil {
return nil, err
}
for i := range unpackInfo.Folders {
unpackInfo.Folders[i].UnpackCRC = crcs[i]
}
id, err = ReadByte(r)
if err != nil {
return nil, err
}
}
if id != k7zEnd {
return nil, ErrUnexpectedPropertyID
}
return unpackInfo, nil
}

@ -0,0 +1,377 @@
package go7z
import (
"bufio"
"errors"
"fmt"
"hash/crc32"
"io"
"os"
"sync"
"github.com/saracen/go7z/headers"
"github.com/saracen/solidblock"
)
var (
// ErrNotSupported is returned when an unrecognized archive format is
// encountered.
ErrNotSupported = errors.New("not supported")
// ErrDecompressorNotFound is returned when a requested decompressor has not
// been registered.
ErrDecompressorNotFound = errors.New("decompressor not found")
)
// Reader is a 7z archive reader.
type Reader struct {
r *io.SectionReader
err error
header *headers.Header
folderIndex int
fileIndex int
emptyStream bool
folders []*folderReader
Options ReaderOptions
}
// ReaderOptions are optional options to configure a 7z archive reader.
type ReaderOptions struct {
password string
cb func() string
}
// SetPassword sets the password used for extraction.
func (o *ReaderOptions) SetPassword(password string) {
o.password = password
}
// SetPasswordCallback sets the callback thats used if a password is required,
// but wasn't supplied with SetPassword()
func (o *ReaderOptions) SetPasswordCallback(cb func() string) {
o.cb = cb
}
// Password returns the set password. This will call the password callback
// supplied to SetPasswordCallback() if no password is set.
func (o *ReaderOptions) Password() string {
if o.password != "" {
return o.password
}
if o.cb != nil {
o.password = o.cb()
}
return o.password
}
// ReadCloser provides an io.ReadCloser for the archive when opened with
// OpenReader.
type ReadCloser struct {
f *os.File
Reader
}
// Close closes the 7z file, rendering it unusable for I/O.
func (rc *ReadCloser) Close() error {
return rc.f.Close()
}
// OpenReader will open the 7z file specified by name and return a ReadCloser.
func OpenReader(name string) (*ReadCloser, error) {
f, err := os.Open(name)
if err != nil {
return nil, err
}
fi, err := f.Stat()
if err != nil {
f.Close()
return nil, err
}
r := new(ReadCloser)
if err := r.init(f, fi.Size(), false); err != nil {
f.Close()
return nil, err
}
r.f = f
return r, nil
}
// NewReader returns a new Reader reading from r, which is assumed to
// have the given size in bytes.
func NewReader(r io.ReaderAt, size int64) (*Reader, error) {
szr := new(Reader)
if err := szr.init(r, size, false); err != nil {
return nil, err
}
return szr, nil
}
func (sz *Reader) init(r io.ReaderAt, size int64, ignoreChecksumError bool) error {
sz.r = io.NewSectionReader(r, 0, size)
signatureHeader, err := headers.ReadSignatureHeader(sz.r)
if err != nil {
if !(ignoreChecksumError && err == headers.ErrChecksumMismatch) {
return err
}
}
if _, err := sz.r.Seek(signatureHeader.StartHeader.NextHeaderOffset, io.SeekCurrent); err != nil {
return err
}
if signatureHeader.StartHeader.NextHeaderSize > size-headers.SignatureHeaderSize {
return io.ErrUnexpectedEOF
}
crc := crc32.NewIEEE()
tee := io.TeeReader(bufio.NewReader(io.LimitReader(sz.r, signatureHeader.StartHeader.NextHeaderSize)), crc)
header, encoded, err := headers.ReadPackedStreamsForHeaders(&io.LimitedReader{tee, signatureHeader.StartHeader.NextHeaderSize})
if err != nil {
return err
}
if crc.Sum32() != signatureHeader.StartHeader.NextHeaderCRC {
if !ignoreChecksumError {
return headers.ErrChecksumMismatch
}
}
if encoded != nil {
folders, err := sz.extract(encoded)
if err != nil {
return err
}
if len(folders) != 1 {
return ErrNotSupported
}
if err = folders[0].Next(); err != nil {
return err
}
header, _, err = headers.ReadPackedStreamsForHeaders(&io.LimitedReader{folders[0].sb, folders[0].sb.Size()})
if err != nil {
return err
}
if err = folders[0].Next(); err != io.EOF {
return ErrNotSupported
}
}
if header == nil {
return ErrNotSupported
}
sz.header = header
sz.folders, err = sz.extract(sz.header.MainStreamsInfo)
return err
}
// Next advances to the next entry in the 7z archive.
//
// io.EOF is returned at the end of the input.
func (sz *Reader) Next() (*headers.FileInfo, error) {
if sz.err != nil {
return nil, sz.err
}
hdr, err := sz.next()
sz.err = err
return hdr, err
}
func (sz *Reader) nextFileInfo() *headers.FileInfo {
var fileInfo *headers.FileInfo
if sz.fileIndex < len(sz.header.FilesInfo) {
fileInfo = sz.header.FilesInfo[sz.fileIndex]
sz.fileIndex++
return fileInfo
}
return nil
}
func (sz *Reader) extract(streamsInfo *headers.StreamsInfo) ([]*folderReader, error) {
var sizes []uint64
var crcs []uint32
if streamsInfo.SubStreamsInfo != nil {
sizes = streamsInfo.SubStreamsInfo.UnpackSizes
crcs = streamsInfo.SubStreamsInfo.Digests
}
offset := int64(headers.SignatureHeaderSize)
offset += int64(streamsInfo.PackInfo.PackPos)
packedIndicesOffset := 0
var folders []*folderReader
for i, folder := range streamsInfo.UnpackInfo.Folders {
if len(folder.PackedIndices) == 0 {
folder.PackedIndices = []int{0}
}
fr := &folderReader{}
fr.inputs = make(map[int]io.Reader)
fr.binder = solidblock.Binder{}
// setup codecs
for j := range folder.CoderInfo {
coderInfo := folder.CoderInfo[j]
size := folder.UnpackSizes[j]
d := decompressor(coderInfo.CodecID)
if d == nil {
return folders, ErrDecompressorNotFound
}
fn := func(in []io.Reader) ([]io.Reader, error) {
r, err := d(in, coderInfo.Properties, size, &sz.Options)
return []io.Reader{r}, err
}
fr.binder.AddCodec(fn, coderInfo.NumInStreams, coderInfo.NumOutStreams)
}
// setup initial inputs
for index, input := range folder.PackedIndices {
if packedIndicesOffset+index >= len(streamsInfo.PackInfo.PackSizes) {
return nil, fmt.Errorf("folder references invalid packinfo")
}
size := int64(streamsInfo.PackInfo.PackSizes[packedIndicesOffset+index])
fr.inputs[input] = io.NewSectionReader(sz.r, offset, size)
offset += size
}
packedIndicesOffset += len(folder.PackedIndices)
// setup pairs
for _, bindPairsInfo := range folder.BindPairsInfo {
fr.binder.Pair(bindPairsInfo.InIndex, bindPairsInfo.OutIndex)
}
if streamsInfo.SubStreamsInfo != nil {
numUnpackStreamsInFolders := streamsInfo.SubStreamsInfo.NumUnpackStreamsInFolders
if i >= len(numUnpackStreamsInFolders) {
return nil, fmt.Errorf("folder references invalid unpack stream")
}
off := numUnpackStreamsInFolders[i]
if off > len(sizes) || off > len(crcs) {
return nil, fmt.Errorf("folder references invalid unpack size or digest")
}
fr.sizes = sizes[:off]
fr.crcs = crcs[:off]
sizes = sizes[len(fr.sizes):]
crcs = crcs[len(fr.crcs):]
} else {
fr.sizes = []uint64{folder.UnpackSize()}
fr.crcs = []uint32{folder.UnpackCRC}
}
folders = append(folders, fr)
}
return folders, nil
}
type folderReader struct {
binder solidblock.Binder
sizes []uint64
crcs []uint32
inputs map[int]io.Reader
bufs []*bufio.Reader
sb *solidblock.Solidblock
}
var bufioReaderPool = sync.Pool{
New: func() interface{} {
return bufio.NewReaderSize(nil, 32*1024)
},
}
func (fr *folderReader) Next() error {
if fr.sb == nil {
fr.bufs = make([]*bufio.Reader, 0, len(fr.inputs))
for in, r := range fr.inputs {
br := bufioReaderPool.Get().(*bufio.Reader)
br.Reset(r)
fr.bufs = append(fr.bufs, br)
fr.binder.Reader(br, in)
}
outputs, err := fr.binder.Outputs()
if err != nil {
return err
}
if len(outputs) != 1 {
return ErrNotSupported
}
if outputs[0] == nil {
return ErrNotSupported
}
fr.sb = solidblock.New(outputs[0], fr.sizes, fr.crcs)
}
return fr.sb.Next()
}
func (fr *folderReader) Close() error {
for _, buf := range fr.bufs {
bufioReaderPool.Put(buf)
}
fr.bufs = nil
return nil
}
func (sz *Reader) next() (*headers.FileInfo, error) {
fileInfo := sz.nextFileInfo()
if fileInfo == nil {
return nil, io.EOF
}
sz.emptyStream = fileInfo.IsEmptyStream
if sz.emptyStream {
return fileInfo, nil
}
if sz.folders[sz.folderIndex].Next() == io.EOF {
sz.folders[sz.folderIndex].Close()
sz.folderIndex++
if sz.folderIndex >= len(sz.folders) {
return nil, io.EOF
}
sz.folders[sz.folderIndex].Next()
}
return fileInfo, nil
}
// Read reads from the current file in the 7z archive.
// It returns (0, io.EOF) when it reaches the end of that file,
// until Next is called to advance to the next file.
func (sz *Reader) Read(p []byte) (int, error) {
if sz.err != nil {
return 0, sz.err
}
if sz.emptyStream {
return 0, io.EOF
}
n, err := sz.folders[sz.folderIndex].sb.Read(p)
if err != nil && err != io.EOF {
sz.err = err
}
return n, err
}

@ -0,0 +1,129 @@
package go7z
import (
"bytes"
"compress/bzip2"
"compress/flate"
"encoding/binary"
"io"
"sync"
"github.com/saracen/go7z/filters"
"github.com/ulikunitz/xz/lzma"
)
// Decompressor is a handler function called when a registered decompressor is
// initialized.
type Decompressor func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error)
var (
decompressors sync.Map // map[uint32]Decompressor
)
func init() {
// copy
RegisterDecompressor(0x00, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 {
return nil, ErrNotSupported
}
return r[0], nil
}))
// delta
RegisterDecompressor(0x03, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 || len(options) == 0 || len(options) > 1 {
return nil, ErrNotSupported
}
return filters.NewDeltaDecoder(r[0], uint(options[0])+1, int64(unpackSize))
}))
// lzma
RegisterDecompressor(0x030101, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 {
return nil, ErrNotSupported
}
// We can't set options in the lzma decoder library, so instead we add
// a fake header
header := bytes.NewBuffer(options)
binary.Write(header, binary.LittleEndian, unpackSize)
return lzma.NewReader(io.MultiReader(header, r[0]))
}))
// lzma2
RegisterDecompressor(0x21, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 {
return nil, ErrNotSupported
}
config := lzma.Reader2Config{}
if len(options) > 0 {
config.DictCap = int(2 | (options[0] & 1))
config.DictCap <<= (options[0] >> 1) + 11
}
return config.NewReader2(r[0])
}))
// bcj2
RegisterDecompressor(0x303011b, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 4 {
return nil, ErrNotSupported
}
return filters.NewBCJ2Decoder(r[0], r[1], r[2], r[3], int64(unpackSize))
}))
// deflate
RegisterDecompressor(0x40108, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 {
return nil, ErrNotSupported
}
return flate.NewReader(r[0]), nil
}))
// bzip2
RegisterDecompressor(0x40202, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 {
return nil, ErrNotSupported
}
return bzip2.NewReader(r[0]), nil
}))
// AES
RegisterDecompressor(0x6f10701, Decompressor(func(r []io.Reader, options []byte, unpackSize uint64, ro *ReaderOptions) (io.Reader, error) {
if len(r) != 1 {
return nil, ErrNotSupported
}
if len(options) < 2 {
return nil, ErrNotSupported
}
saltSize := ((options[0] >> 7) & 1) + (options[1] >> 4)
ivSize := ((options[0] >> 6) & 1) + (options[1] & 0x0F)
power := int(options[0]) & 0x3f
options = options[2:]
salt := options[:saltSize]
iv := options[saltSize : saltSize+ivSize]
return filters.NewAESDecrypter(r[0], power, salt, iv, ro.Password())
}))
}
// RegisterDecompressor registers a decompressor.
func RegisterDecompressor(method uint32, dcomp Decompressor) {
if _, dup := decompressors.LoadOrStore(method, dcomp); dup {
panic("decompressor already registered")
}
}
func decompressor(method uint32) Decompressor {
di, ok := decompressors.Load(method)
if !ok {
return nil
}
return di.(Decompressor)
}

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Arran Walker
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -0,0 +1,159 @@
# Solidblock
Solidblock is a Go library providing `io.Reader`s for solid compression and
codec binding/chaining.
## Solid Compression Reader
Wrapped around a compressed solid block of concatenated files, it provides
sequential access to the files:
```
// file contents
files := [][]byte{
[]byte("file 1\n"),
[]byte("file 2\n"),
}
// file metadata
var metadata struct {
sizes []uint64
crcs []uint32
}
metadata.sizes = []uint64{
uint64(len(files[0])),
uint64(len(files[1])),
}
metadata.crcs = []uint32{
crc32.ChecksumIEEE(files[0]),
crc32.ChecksumIEEE(files[1]),
}
// Concatenate files to compressed block
block := new(bytes.Buffer)
w := gzip.NewWriter(block)
w.Write(files[0])
w.Write(files[1])
w.Close()
// Open gzip reader to compressed block
r, err := gzip.NewReader(block)
if err != nil {
panic(err)
}
// Create a new solidblock reader
s := solidblock.New(r, metadata.sizes, metadata.crcs)
for {
err := s.Next()
if err == io.EOF {
break
}
if err != nil {
panic(err)
}
io.Copy(os.Stdout, s)
}
```
## Codec Binding
To improve compression, some codecs (such as BCJ2), split data up into multiple
streams that compress better individually. `solidblock.Binder` provides a simple
way to pair together the inputs and outputs of various codecs/readers.
For example:
```
func BCJ2Decoder(inputs []io.Reader) ([]io.Reader, error) {
// 1. take 4 input readers
// 2. do magic
// 3. return 1 reader
}
func GzipDecoder(inputs []io.Reader) ([]io.Reader, error) {
if len(inputs) != 1 {
panic("unsupported input configuration")
}
r, err := gzip.NewReader(inputs[0])
return []io.Reader{r}, nil
}
file, err := os.Open("file")
if err != nil {
panic(err)
}
// Assume file has 4 concatenated streams. 3 of the streams are from a BCJ2
// encoder, compressed to gzip streams. 1 is the 4th stream of the BCJ2 encoder,
// but left uncompressed.
streams := make([]io.Reader, 4)
streams[0] = io.NewSectionReader(file, 0, 100)
streams[1] = io.NewSectionReader(file, 101, 200)
streams[2] = io.NewSectionReader(file, 201, 300)
streams[3] = io.NewSectionReader(file, 301, 400)
// Create a new binder
binder := solidblock.NewBinder()
// Create gzip decompressors for the 4 initial input streams.
gzip0InputIDs, gzip0OutputIDs := binder.AddCodec(GzipDecoder, 1, 1)
gzip1InputIDs, gzip1OutputIDs := binder.AddCodec(GzipDecoder, 1, 1)
gzip2InputIDs, gzip2OutputIDs := binder.AddCodec(GzipDecoder, 1, 1)
// Create BCJ2 decoder for the 4 gzip decoded streams.
bcj2InputIDs, bcj2outputIDs := binder.AddCodec(BCJ2Decoder, 4, 1)
// Connect initial streams to gzip decoders
binder.Reader(streams[0], gzip0InputIDs[0])
binder.Reader(streams[1], gzip1InputIDs[0])
binder.Reader(streams[2], gzip2InputIDs[0])
// Connect 4th initial stream straight to 4th input of BCJ2 decoder.
binder.Reader(streams[3], bcj2InputIDs[3])
// Pair the 3 gzip output streams to the 1st, 2nd, 3rd input of BCJ2 decoder.
binder.Pair(gzip0OutputIDs[0], bcj2InputIDs[0])
binder.Pair(gzip1OutputIDs[0], bcj2InputIDs[1])
binder.Pair(gzip2OutputIDs[0], bcj2InputIDs[2])
// Create single output to read from
outputs, err := binder.Outputs()
if err != nil {
panic(err)
}
if len(outputs) != 1 {
panic("output should only contain one stream")
}
io.Copy(os.Stdout, outputs[0])
```
A picture says 60 lines of code...
```
+------------+
concatenated file |bcj2 decoder+--->io.Reader
+--------------------+ +-+--+--+--+-+
| | ^ ^ ^ ^
| +--------------+ | +------------+ | | | |
| |gzipped stream+------>gzip decoder+---+ | | |
| +--------------+ | +------------+ | | |
| | | | |
| +--------------+ | +------------+ | | |
| |gzipped stream+------>gzip decoder+------+ | |
| +--------------+ | +------------+ | |
| | | |
| +--------------+ | +------------+ | |
| |gzipped stream+------>gzip decoder+---------+ |
| +--------------+ | +------------+ |
| | |
| +--------------+ | |
| | uncompressed +--------------------------------+
| | stream | |
| +--------------+ |
| |
+--------------------+
```

@ -0,0 +1,123 @@
package solidblock
import (
"errors"
"fmt"
"io"
)
var (
// ErrInputIsUnbound is returned when an input hasn't been binded to either
// a reader/paired without an output.
ErrInputIsUnbound = errors.New("input is unbound")
// ErrUnexpectedOutputCount is returned when the amount of io.Readers
// returned from a codec handler doesn't match the amount specified when
// adding the codec.
ErrUnexpectedOutputCount = errors.New("unexpected output count")
)
type reader struct {
Name string
R io.Reader
}
type codec struct {
fn func([]io.Reader) ([]io.Reader, error)
inIndexes []int
outIndexes []int
}
// Binder holds information regarding codecs, their inputs/outputs and how they
// join together.
type Binder struct {
numInStreams int
numOutStreams int
in []*reader
out []*reader
codecs []*codec
}
// NewBinder returns a new binder.
func NewBinder() *Binder {
return &Binder{}
}
// AddCodec adds a handler function for processing information from input(s) and
// producing output(s).
func (b *Binder) AddCodec(fn func([]io.Reader) ([]io.Reader, error), inputs, outputs int) (in, out []int) {
c := &codec{fn: fn}
b.in = append(b.in, make([]*reader, inputs)...)
b.out = append(b.out, make([]*reader, outputs)...)
for i := 0; i < inputs; i++ {
c.inIndexes = append(c.inIndexes, b.numInStreams+i)
}
for i := 0; i < outputs; i++ {
c.outIndexes = append(c.outIndexes, b.numOutStreams+i)
}
b.numInStreams += inputs
b.numOutStreams += outputs
b.codecs = append(b.codecs, c)
return c.inIndexes, c.outIndexes
}
// Reader binds a reader to an in stream.
func (b *Binder) Reader(r io.Reader, in int) {
if in < 0 || in >= len(b.in) {
return
}
b.in[in] = &reader{fmt.Sprintf("In: %v", in), r}
}
// Pair pairs two streams, binding an in stream to an out stream.
func (b *Binder) Pair(in int, out int) {
if in < 0 || in >= len(b.in) || out < 0 || out >= len(b.out) {
return
}
if b.out[out] == nil {
b.out[out] = &reader{fmt.Sprintf("Bind %v:%v", in, out), nil}
}
b.in[in] = b.out[out]
}
// Outputs returns any unbound output readers to ready from.
func (b *Binder) Outputs() ([]io.Reader, error) {
var unbound []io.Reader
for i := range b.codecs {
var ins []io.Reader
for _, num := range b.codecs[i].inIndexes {
if b.in[num] == nil || b.in[num].R == nil {
return unbound, ErrInputIsUnbound
}
ins = append(ins, b.in[num].R)
}
outs, err := b.codecs[i].fn(ins)
if err != nil {
return nil, err
}
if len(outs) != len(b.codecs[i].outIndexes) {
return unbound, ErrUnexpectedOutputCount
}
for j, num := range b.codecs[i].outIndexes {
if b.out[num] == nil {
b.out[num] = &reader{fmt.Sprintf("Out %v", outs), nil}
unbound = append(unbound, outs[j])
}
b.out[num].R = outs[j]
}
}
return unbound, nil
}

@ -0,0 +1,100 @@
package solidblock
import (
"errors"
"hash"
"hash/crc32"
"io"
"io/ioutil"
)
var (
// ErrChecksumMismatch is returned when a file's crc check fails.
ErrChecksumMismatch = errors.New("checksum mismatch")
)
// Solidblock provides sequential access to files that have been concatenated
// into a single compressed data block.
type Solidblock struct {
sizes []uint64
crcs []uint32
base io.Reader
file io.Reader
crc hash.Hash32
target int
index int
}
// New returns a new solidblock reader.
func New(r io.Reader, sizes []uint64, crcs []uint32) *Solidblock {
if len(sizes) != len(crcs) {
panic("crcs slice needs to be the same length as sizes slice")
}
return &Solidblock{
sizes: sizes,
crcs: crcs,
target: -1,
base: r,
}
}
// Next advances to the next file entry in solid block.
//
// Calling Next without reading the current file is supported. Only when Read
// is called will decompression occur for current file. Any skipped files will
// still need to be decompressed, but their contents is discarded.
//
// io.EOF is returned at the end of the input.
func (fr *Solidblock) Next() error {
if fr.target < len(fr.sizes)-1 {
fr.target++
return nil
}
return io.EOF
}
// Read reads from the current file in solid block.
// It returns (0, io.EOF) when it reaches the end of that file,
// until Next is called to advance to the next file.
func (fr *Solidblock) Read(p []byte) (int, error) {
if fr.file != nil && fr.index != fr.target {
// drain current fileReader
_, err := io.Copy(ioutil.Discard, fr.file)
if err != nil {
return 0, err
}
}
if fr.file == nil || fr.index != fr.target {
// discard until we're at the position we want to be at
for i := fr.index + 1; i < fr.target; i++ {
_, err := io.CopyN(ioutil.Discard, fr.base, int64(fr.sizes[i]))
if err != nil {
return 0, err
}
}
fr.crc = crc32.NewIEEE()
fr.file = io.TeeReader(io.LimitReader(fr.base, int64(fr.sizes[fr.target])), fr.crc)
fr.index = fr.target
}
n, err := fr.file.Read(p)
if err == io.EOF {
if fr.crc.Sum32() != fr.crcs[fr.index] {
return n, ErrChecksumMismatch
}
}
return n, err
}
func (fr *Solidblock) Size() int64 {
if fr.target < 0 {
return 0
}
return int64(fr.sizes[fr.target])
}

@ -0,0 +1,26 @@
Copyright (c) 2014-2021 Ulrich Kunitz
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* My name, Ulrich Kunitz, may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,181 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package hash
// CyclicPoly provides a cyclic polynomial rolling hash.
type CyclicPoly struct {
h uint64
p []uint64
i int
}
// ror rotates the unsigned 64-bit integer to right. The argument s must be
// less than 64.
func ror(x uint64, s uint) uint64 {
return (x >> s) | (x << (64 - s))
}
// NewCyclicPoly creates a new instance of the CyclicPoly structure. The
// argument n gives the number of bytes for which a hash will be executed.
// This number must be positive; the method panics if this isn't the case.
func NewCyclicPoly(n int) *CyclicPoly {
if n < 1 {
panic("argument n must be positive")
}
return &CyclicPoly{p: make([]uint64, 0, n)}
}
// Len returns the length of the byte sequence for which a hash is generated.
func (r *CyclicPoly) Len() int {
return cap(r.p)
}
// RollByte hashes the next byte and returns a hash value. The complete becomes
// available after at least Len() bytes have been hashed.
func (r *CyclicPoly) RollByte(x byte) uint64 {
y := hash[x]
if len(r.p) < cap(r.p) {
r.h = ror(r.h, 1) ^ y
r.p = append(r.p, y)
} else {
r.h ^= ror(r.p[r.i], uint(cap(r.p)-1))
r.h = ror(r.h, 1) ^ y
r.p[r.i] = y
r.i = (r.i + 1) % cap(r.p)
}
return r.h
}
// Stores the hash for the individual bytes.
var hash = [256]uint64{
0x2e4fc3f904065142, 0xc790984cfbc99527,
0x879f95eb8c62f187, 0x3b61be86b5021ef2,
0x65a896a04196f0a5, 0xc5b307b80470b59e,
0xd3bff376a70df14b, 0xc332f04f0b3f1701,
0x753b5f0e9abf3e0d, 0xb41538fdfe66ef53,
0x1906a10c2c1c0208, 0xfb0c712a03421c0d,
0x38be311a65c9552b, 0xfee7ee4ca6445c7e,
0x71aadeded184f21e, 0xd73426fccda23b2d,
0x29773fb5fb9600b5, 0xce410261cd32981a,
0xfe2848b3c62dbc2d, 0x459eaaff6e43e11c,
0xc13e35fc9c73a887, 0xf30ed5c201e76dbc,
0xa5f10b3910482cea, 0x2945d59be02dfaad,
0x06ee334ff70571b5, 0xbabf9d8070f44380,
0xee3e2e9912ffd27c, 0x2a7118d1ea6b8ea7,
0x26183cb9f7b1664c, 0xea71dac7da068f21,
0xea92eca5bd1d0bb7, 0x415595862defcd75,
0x248a386023c60648, 0x9cf021ab284b3c8a,
0xfc9372df02870f6c, 0x2b92d693eeb3b3fc,
0x73e799d139dc6975, 0x7b15ae312486363c,
0xb70e5454a2239c80, 0x208e3fb31d3b2263,
0x01f563cabb930f44, 0x2ac4533d2a3240d8,
0x84231ed1064f6f7c, 0xa9f020977c2a6d19,
0x213c227271c20122, 0x09fe8a9a0a03d07a,
0x4236dc75bcaf910c, 0x460a8b2bead8f17e,
0xd9b27be1aa07055f, 0xd202d5dc4b11c33e,
0x70adb010543bea12, 0xcdae938f7ea6f579,
0x3f3d870208672f4d, 0x8e6ccbce9d349536,
0xe4c0871a389095ae, 0xf5f2a49152bca080,
0x9a43f9b97269934e, 0xc17b3753cb6f475c,
0xd56d941e8e206bd4, 0xac0a4f3e525eda00,
0xa06d5a011912a550, 0x5537ed19537ad1df,
0xa32fe713d611449d, 0x2a1d05b47c3b579f,
0x991d02dbd30a2a52, 0x39e91e7e28f93eb0,
0x40d06adb3e92c9ac, 0x9b9d3afde1c77c97,
0x9a3f3f41c02c616f, 0x22ecd4ba00f60c44,
0x0b63d5d801708420, 0x8f227ca8f37ffaec,
0x0256278670887c24, 0x107e14877dbf540b,
0x32c19f2786ac1c05, 0x1df5b12bb4bc9c61,
0xc0cac129d0d4c4e2, 0x9fdb52ee9800b001,
0x31f601d5d31c48c4, 0x72ff3c0928bcaec7,
0xd99264421147eb03, 0x535a2d6d38aefcfe,
0x6ba8b4454a916237, 0xfa39366eaae4719c,
0x10f00fd7bbb24b6f, 0x5bd23185c76c84d4,
0xb22c3d7e1b00d33f, 0x3efc20aa6bc830a8,
0xd61c2503fe639144, 0x30ce625441eb92d3,
0xe5d34cf359e93100, 0xa8e5aa13f2b9f7a5,
0x5c2b8d851ca254a6, 0x68fb6c5e8b0d5fdf,
0xc7ea4872c96b83ae, 0x6dd5d376f4392382,
0x1be88681aaa9792f, 0xfef465ee1b6c10d9,
0x1f98b65ed43fcb2e, 0x4d1ca11eb6e9a9c9,
0x7808e902b3857d0b, 0x171c9c4ea4607972,
0x58d66274850146df, 0x42b311c10d3981d1,
0x647fa8c621c41a4c, 0xf472771c66ddfedc,
0x338d27e3f847b46b, 0x6402ce3da97545ce,
0x5162db616fc38638, 0x9c83be97bc22a50e,
0x2d3d7478a78d5e72, 0xe621a9b938fd5397,
0x9454614eb0f81c45, 0x395fb6e742ed39b6,
0x77dd9179d06037bf, 0xc478d0fee4d2656d,
0x35d9d6cb772007af, 0x83a56e92c883f0f6,
0x27937453250c00a1, 0x27bd6ebc3a46a97d,
0x9f543bf784342d51, 0xd158f38c48b0ed52,
0x8dd8537c045f66b4, 0x846a57230226f6d5,
0x6b13939e0c4e7cdf, 0xfca25425d8176758,
0x92e5fc6cd52788e6, 0x9992e13d7a739170,
0x518246f7a199e8ea, 0xf104c2a71b9979c7,
0x86b3ffaabea4768f, 0x6388061cf3e351ad,
0x09d9b5295de5bbb5, 0x38bf1638c2599e92,
0x1d759846499e148d, 0x4c0ff015e5f96ef4,
0xa41a94cfa270f565, 0x42d76f9cb2326c0b,
0x0cf385dd3c9c23ba, 0x0508a6c7508d6e7a,
0x337523aabbe6cf8d, 0x646bb14001d42b12,
0xc178729d138adc74, 0xf900ef4491f24086,
0xee1a90d334bb5ac4, 0x9755c92247301a50,
0xb999bf7c4ff1b610, 0x6aeeb2f3b21e8fc9,
0x0fa8084cf91ac6ff, 0x10d226cf136e6189,
0xd302057a07d4fb21, 0x5f03800e20a0fcc3,
0x80118d4ae46bd210, 0x58ab61a522843733,
0x51edd575c5432a4b, 0x94ee6ff67f9197f7,
0x765669e0e5e8157b, 0xa5347830737132f0,
0x3ba485a69f01510c, 0x0b247d7b957a01c3,
0x1b3d63449fd807dc, 0x0fdc4721c30ad743,
0x8b535ed3829b2b14, 0xee41d0cad65d232c,
0xe6a99ed97a6a982f, 0x65ac6194c202003d,
0x692accf3a70573eb, 0xcc3c02c3e200d5af,
0x0d419e8b325914a3, 0x320f160f42c25e40,
0x00710d647a51fe7a, 0x3c947692330aed60,
0x9288aa280d355a7a, 0xa1806a9b791d1696,
0x5d60e38496763da1, 0x6c69e22e613fd0f4,
0x977fc2a5aadffb17, 0xfb7bd063fc5a94ba,
0x460c17992cbaece1, 0xf7822c5444d3297f,
0x344a9790c69b74aa, 0xb80a42e6cae09dce,
0x1b1361eaf2b1e757, 0xd84c1e758e236f01,
0x88e0b7be347627cc, 0x45246009b7a99490,
0x8011c6dd3fe50472, 0xc341d682bffb99d7,
0x2511be93808e2d15, 0xd5bc13d7fd739840,
0x2a3cd030679ae1ec, 0x8ad9898a4b9ee157,
0x3245fef0a8eaf521, 0x3d6d8dbbb427d2b0,
0x1ed146d8968b3981, 0x0c6a28bf7d45f3fc,
0x4a1fd3dbcee3c561, 0x4210ff6a476bf67e,
0xa559cce0d9199aac, 0xde39d47ef3723380,
0xe5b69d848ce42e35, 0xefa24296f8e79f52,
0x70190b59db9a5afc, 0x26f166cdb211e7bf,
0x4deaf2df3c6b8ef5, 0xf171dbdd670f1017,
0xb9059b05e9420d90, 0x2f0da855c9388754,
0x611d5e9ab77949cc, 0x2912038ac01163f4,
0x0231df50402b2fba, 0x45660fc4f3245f58,
0xb91cc97c7c8dac50, 0xb72d2aafe4953427,
0xfa6463f87e813d6b, 0x4515f7ee95d5c6a2,
0x1310e1c1a48d21c3, 0xad48a7810cdd8544,
0x4d5bdfefd5c9e631, 0xa43ed43f1fdcb7de,
0xe70cfc8fe1ee9626, 0xef4711b0d8dda442,
0xb80dd9bd4dab6c93, 0xa23be08d31ba4d93,
0x9b37db9d0335a39c, 0x494b6f870f5cfebc,
0x6d1b3c1149dda943, 0x372c943a518c1093,
0xad27af45e77c09c4, 0x3b6f92b646044604,
0xac2917909f5fcf4f, 0x2069a60e977e5557,
0x353a469e71014de5, 0x24be356281f55c15,
0x2b6d710ba8e9adea, 0x404ad1751c749c29,
0xed7311bf23d7f185, 0xba4f6976b4acc43e,
0x32d7198d2bc39000, 0xee667019014d6e01,
0x494ef3e128d14c83, 0x1f95a152baecd6be,
0x201648dff1f483a5, 0x68c28550c8384af6,
0x5fc834a6824a7f48, 0x7cd06cb7365eaf28,
0xd82bbd95e9b30909, 0x234f0d1694c53f6d,
0xd2fb7f4a96d83f4a, 0xff0d5da83acac05e,
0xf8f6b97f5585080a, 0x74236084be57b95b,
0xa25e40c03bbc36ad, 0x6b6e5c14ce88465b,
0x4378ffe93e1528c5, 0x94ca92a17118e2d2,
}

@ -0,0 +1,14 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package hash provides rolling hashes.
Rolling hashes have to be used for maintaining the positions of n-byte
sequences in the dictionary buffer.
The package provides currently the Rabin-Karp rolling hash and a Cyclic
Polynomial hash. Both support the Hashes method to be used with an interface.
*/
package hash

@ -0,0 +1,66 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package hash
// A is the default constant for Robin-Karp rolling hash. This is a random
// prime.
const A = 0x97b548add41d5da1
// RabinKarp supports the computation of a rolling hash.
type RabinKarp struct {
A uint64
// a^n
aOldest uint64
h uint64
p []byte
i int
}
// NewRabinKarp creates a new RabinKarp value. The argument n defines the
// length of the byte sequence to be hashed. The default constant will will be
// used.
func NewRabinKarp(n int) *RabinKarp {
return NewRabinKarpConst(n, A)
}
// NewRabinKarpConst creates a new RabinKarp value. The argument n defines the
// length of the byte sequence to be hashed. The argument a provides the
// constant used to compute the hash.
func NewRabinKarpConst(n int, a uint64) *RabinKarp {
if n <= 0 {
panic("number of bytes n must be positive")
}
aOldest := uint64(1)
// There are faster methods. For the small n required by the LZMA
// compressor O(n) is sufficient.
for i := 0; i < n; i++ {
aOldest *= a
}
return &RabinKarp{
A: a, aOldest: aOldest,
p: make([]byte, 0, n),
}
}
// Len returns the length of the byte sequence.
func (r *RabinKarp) Len() int {
return cap(r.p)
}
// RollByte computes the hash after x has been added.
func (r *RabinKarp) RollByte(x byte) uint64 {
if len(r.p) < cap(r.p) {
r.h += uint64(x)
r.h *= r.A
r.p = append(r.p, x)
} else {
r.h -= uint64(r.p[r.i]) * r.aOldest
r.h += uint64(x)
r.h *= r.A
r.p[r.i] = x
r.i = (r.i + 1) % cap(r.p)
}
return r.h
}

@ -0,0 +1,29 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package hash
// Roller provides an interface for rolling hashes. The hash value will become
// valid after hash has been called Len times.
type Roller interface {
Len() int
RollByte(x byte) uint64
}
// Hashes computes all hash values for the array p. Note that the state of the
// roller is changed.
func Hashes(r Roller, p []byte) []uint64 {
n := r.Len()
if len(p) < n {
return nil
}
h := make([]uint64, len(p)-n+1)
for i := 0; i < n-1; i++ {
r.RollByte(p[i])
}
for i := range h {
h[i] = r.RollByte(p[i+n-1])
}
return h
}

@ -0,0 +1,457 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package xlog provides a simple logging package that allows to disable
// certain message categories. It defines a type, Logger, with multiple
// methods for formatting output. The package has also a predefined
// 'standard' Logger accessible through helper function Print[f|ln],
// Fatal[f|ln], Panic[f|ln], Warn[f|ln], Print[f|ln] and Debug[f|ln]
// that are easier to use then creating a Logger manually. That logger
// writes to standard error and prints the date and time of each logged
// message, which can be configured using the function SetFlags.
//
// The Fatal functions call os.Exit(1) after the message is output
// unless not suppressed by the flags. The Panic functions call panic
// after the writing the log message unless suppressed.
package xlog
import (
"fmt"
"io"
"os"
"runtime"
"sync"
"time"
)
// The flags define what information is prefixed to each log entry
// generated by the Logger. The Lno* versions allow the suppression of
// specific output. The bits are or'ed together to control what will be
// printed. There is no control over the order of the items printed and
// the format. The full format is:
//
// 2009-01-23 01:23:23.123123 /a/b/c/d.go:23: message
//
const (
Ldate = 1 << iota // the date: 2009-01-23
Ltime // the time: 01:23:23
Lmicroseconds // microsecond resolution: 01:23:23.123123
Llongfile // full file name and line number: /a/b/c/d.go:23
Lshortfile // final file name element and line number: d.go:23
Lnopanic // suppresses output from Panic[f|ln] but not the panic call
Lnofatal // suppresses output from Fatal[f|ln] but not the exit
Lnowarn // suppresses output from Warn[f|ln]
Lnoprint // suppresses output from Print[f|ln]
Lnodebug // suppresses output from Debug[f|ln]
// initial values for the standard logger
Lstdflags = Ldate | Ltime | Lnodebug
)
// A Logger represents an active logging object that generates lines of
// output to an io.Writer. Each logging operation if not suppressed
// makes a single call to the Writer's Write method. A Logger can be
// used simultaneously from multiple goroutines; it guarantees to
// serialize access to the Writer.
type Logger struct {
mu sync.Mutex // ensures atomic writes; and protects the following
// fields
prefix string // prefix to write at beginning of each line
flag int // properties
out io.Writer // destination for output
buf []byte // for accumulating text to write
}
// New creates a new Logger. The out argument sets the destination to
// which the log output will be written. The prefix appears at the
// beginning of each log line. The flag argument defines the logging
// properties.
func New(out io.Writer, prefix string, flag int) *Logger {
return &Logger{out: out, prefix: prefix, flag: flag}
}
// std is the standard logger used by the package scope functions.
var std = New(os.Stderr, "", Lstdflags)
// itoa converts the integer to ASCII. A negative widths will avoid
// zero-padding. The function supports only non-negative integers.
func itoa(buf *[]byte, i int, wid int) {
var u = uint(i)
if u == 0 && wid <= 1 {
*buf = append(*buf, '0')
return
}
var b [32]byte
bp := len(b)
for ; u > 0 || wid > 0; u /= 10 {
bp--
wid--
b[bp] = byte(u%10) + '0'
}
*buf = append(*buf, b[bp:]...)
}
// formatHeader puts the header into the buf field of the buffer.
func (l *Logger) formatHeader(t time.Time, file string, line int) {
l.buf = append(l.buf, l.prefix...)
if l.flag&(Ldate|Ltime|Lmicroseconds) != 0 {
if l.flag&Ldate != 0 {
year, month, day := t.Date()
itoa(&l.buf, year, 4)
l.buf = append(l.buf, '-')
itoa(&l.buf, int(month), 2)
l.buf = append(l.buf, '-')
itoa(&l.buf, day, 2)
l.buf = append(l.buf, ' ')
}
if l.flag&(Ltime|Lmicroseconds) != 0 {
hour, min, sec := t.Clock()
itoa(&l.buf, hour, 2)
l.buf = append(l.buf, ':')
itoa(&l.buf, min, 2)
l.buf = append(l.buf, ':')
itoa(&l.buf, sec, 2)
if l.flag&Lmicroseconds != 0 {
l.buf = append(l.buf, '.')
itoa(&l.buf, t.Nanosecond()/1e3, 6)
}
l.buf = append(l.buf, ' ')
}
}
if l.flag&(Lshortfile|Llongfile) != 0 {
if l.flag&Lshortfile != 0 {
short := file
for i := len(file) - 1; i > 0; i-- {
if file[i] == '/' {
short = file[i+1:]
break
}
}
file = short
}
l.buf = append(l.buf, file...)
l.buf = append(l.buf, ':')
itoa(&l.buf, line, -1)
l.buf = append(l.buf, ": "...)
}
}
func (l *Logger) output(calldepth int, now time.Time, s string) error {
var file string
var line int
if l.flag&(Lshortfile|Llongfile) != 0 {
l.mu.Unlock()
var ok bool
_, file, line, ok = runtime.Caller(calldepth)
if !ok {
file = "???"
line = 0
}
l.mu.Lock()
}
l.buf = l.buf[:0]
l.formatHeader(now, file, line)
l.buf = append(l.buf, s...)
if len(s) == 0 || s[len(s)-1] != '\n' {
l.buf = append(l.buf, '\n')
}
_, err := l.out.Write(l.buf)
return err
}
// Output writes the string s with the header controlled by the flags to
// the l.out writer. A newline will be appended if s doesn't end in a
// newline. Calldepth is used to recover the PC, although all current
// calls of Output use the call depth 2. Access to the function is serialized.
func (l *Logger) Output(calldepth, noflag int, v ...interface{}) error {
now := time.Now()
l.mu.Lock()
defer l.mu.Unlock()
if l.flag&noflag != 0 {
return nil
}
s := fmt.Sprint(v...)
return l.output(calldepth+1, now, s)
}
// Outputf works like output but formats the output like Printf.
func (l *Logger) Outputf(calldepth int, noflag int, format string, v ...interface{}) error {
now := time.Now()
l.mu.Lock()
defer l.mu.Unlock()
if l.flag&noflag != 0 {
return nil
}
s := fmt.Sprintf(format, v...)
return l.output(calldepth+1, now, s)
}
// Outputln works like output but formats the output like Println.
func (l *Logger) Outputln(calldepth int, noflag int, v ...interface{}) error {
now := time.Now()
l.mu.Lock()
defer l.mu.Unlock()
if l.flag&noflag != 0 {
return nil
}
s := fmt.Sprintln(v...)
return l.output(calldepth+1, now, s)
}
// Panic prints the message like Print and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func (l *Logger) Panic(v ...interface{}) {
l.Output(2, Lnopanic, v...)
s := fmt.Sprint(v...)
panic(s)
}
// Panic prints the message like Print and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func Panic(v ...interface{}) {
std.Output(2, Lnopanic, v...)
s := fmt.Sprint(v...)
panic(s)
}
// Panicf prints the message like Printf and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func (l *Logger) Panicf(format string, v ...interface{}) {
l.Outputf(2, Lnopanic, format, v...)
s := fmt.Sprintf(format, v...)
panic(s)
}
// Panicf prints the message like Printf and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func Panicf(format string, v ...interface{}) {
std.Outputf(2, Lnopanic, format, v...)
s := fmt.Sprintf(format, v...)
panic(s)
}
// Panicln prints the message like Println and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func (l *Logger) Panicln(v ...interface{}) {
l.Outputln(2, Lnopanic, v...)
s := fmt.Sprintln(v...)
panic(s)
}
// Panicln prints the message like Println and calls panic. The printing
// might be suppressed by the flag Lnopanic.
func Panicln(v ...interface{}) {
std.Outputln(2, Lnopanic, v...)
s := fmt.Sprintln(v...)
panic(s)
}
// Fatal prints the message like Print and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func (l *Logger) Fatal(v ...interface{}) {
l.Output(2, Lnofatal, v...)
os.Exit(1)
}
// Fatal prints the message like Print and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func Fatal(v ...interface{}) {
std.Output(2, Lnofatal, v...)
os.Exit(1)
}
// Fatalf prints the message like Printf and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func (l *Logger) Fatalf(format string, v ...interface{}) {
l.Outputf(2, Lnofatal, format, v...)
os.Exit(1)
}
// Fatalf prints the message like Printf and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func Fatalf(format string, v ...interface{}) {
std.Outputf(2, Lnofatal, format, v...)
os.Exit(1)
}
// Fatalln prints the message like Println and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func (l *Logger) Fatalln(format string, v ...interface{}) {
l.Outputln(2, Lnofatal, v...)
os.Exit(1)
}
// Fatalln prints the message like Println and calls os.Exit(1). The
// printing might be suppressed by the flag Lnofatal.
func Fatalln(format string, v ...interface{}) {
std.Outputln(2, Lnofatal, v...)
os.Exit(1)
}
// Warn prints the message like Print. The printing might be suppressed
// by the flag Lnowarn.
func (l *Logger) Warn(v ...interface{}) {
l.Output(2, Lnowarn, v...)
}
// Warn prints the message like Print. The printing might be suppressed
// by the flag Lnowarn.
func Warn(v ...interface{}) {
std.Output(2, Lnowarn, v...)
}
// Warnf prints the message like Printf. The printing might be suppressed
// by the flag Lnowarn.
func (l *Logger) Warnf(format string, v ...interface{}) {
l.Outputf(2, Lnowarn, format, v...)
}
// Warnf prints the message like Printf. The printing might be suppressed
// by the flag Lnowarn.
func Warnf(format string, v ...interface{}) {
std.Outputf(2, Lnowarn, format, v...)
}
// Warnln prints the message like Println. The printing might be suppressed
// by the flag Lnowarn.
func (l *Logger) Warnln(v ...interface{}) {
l.Outputln(2, Lnowarn, v...)
}
// Warnln prints the message like Println. The printing might be suppressed
// by the flag Lnowarn.
func Warnln(v ...interface{}) {
std.Outputln(2, Lnowarn, v...)
}
// Print prints the message like fmt.Print. The printing might be suppressed
// by the flag Lnoprint.
func (l *Logger) Print(v ...interface{}) {
l.Output(2, Lnoprint, v...)
}
// Print prints the message like fmt.Print. The printing might be suppressed
// by the flag Lnoprint.
func Print(v ...interface{}) {
std.Output(2, Lnoprint, v...)
}
// Printf prints the message like fmt.Printf. The printing might be suppressed
// by the flag Lnoprint.
func (l *Logger) Printf(format string, v ...interface{}) {
l.Outputf(2, Lnoprint, format, v...)
}
// Printf prints the message like fmt.Printf. The printing might be suppressed
// by the flag Lnoprint.
func Printf(format string, v ...interface{}) {
std.Outputf(2, Lnoprint, format, v...)
}
// Println prints the message like fmt.Println. The printing might be
// suppressed by the flag Lnoprint.
func (l *Logger) Println(v ...interface{}) {
l.Outputln(2, Lnoprint, v...)
}
// Println prints the message like fmt.Println. The printing might be
// suppressed by the flag Lnoprint.
func Println(v ...interface{}) {
std.Outputln(2, Lnoprint, v...)
}
// Debug prints the message like Print. The printing might be suppressed
// by the flag Lnodebug.
func (l *Logger) Debug(v ...interface{}) {
l.Output(2, Lnodebug, v...)
}
// Debug prints the message like Print. The printing might be suppressed
// by the flag Lnodebug.
func Debug(v ...interface{}) {
std.Output(2, Lnodebug, v...)
}
// Debugf prints the message like Printf. The printing might be suppressed
// by the flag Lnodebug.
func (l *Logger) Debugf(format string, v ...interface{}) {
l.Outputf(2, Lnodebug, format, v...)
}
// Debugf prints the message like Printf. The printing might be suppressed
// by the flag Lnodebug.
func Debugf(format string, v ...interface{}) {
std.Outputf(2, Lnodebug, format, v...)
}
// Debugln prints the message like Println. The printing might be suppressed
// by the flag Lnodebug.
func (l *Logger) Debugln(v ...interface{}) {
l.Outputln(2, Lnodebug, v...)
}
// Debugln prints the message like Println. The printing might be suppressed
// by the flag Lnodebug.
func Debugln(v ...interface{}) {
std.Outputln(2, Lnodebug, v...)
}
// Flags returns the current flags used by the logger.
func (l *Logger) Flags() int {
l.mu.Lock()
defer l.mu.Unlock()
return l.flag
}
// Flags returns the current flags used by the standard logger.
func Flags() int {
return std.Flags()
}
// SetFlags sets the flags of the logger.
func (l *Logger) SetFlags(flag int) {
l.mu.Lock()
defer l.mu.Unlock()
l.flag = flag
}
// SetFlags sets the flags for the standard logger.
func SetFlags(flag int) {
std.SetFlags(flag)
}
// Prefix returns the prefix used by the logger.
func (l *Logger) Prefix() string {
l.mu.Lock()
defer l.mu.Unlock()
return l.prefix
}
// Prefix returns the prefix used by the standard logger of the package.
func Prefix() string {
return std.Prefix()
}
// SetPrefix sets the prefix for the logger.
func (l *Logger) SetPrefix(prefix string) {
l.mu.Lock()
defer l.mu.Unlock()
l.prefix = prefix
}
// SetPrefix sets the prefix of the standard logger of the package.
func SetPrefix(prefix string) {
std.SetPrefix(prefix)
}
// SetOutput sets the output of the logger.
func (l *Logger) SetOutput(w io.Writer) {
l.mu.Lock()
defer l.mu.Unlock()
l.out = w
}
// SetOutput sets the output for the standard logger of the package.
func SetOutput(w io.Writer) {
std.SetOutput(w)
}

@ -0,0 +1,522 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"unicode"
)
// node represents a node in the binary tree.
type node struct {
// x is the search value
x uint32
// p parent node
p uint32
// l left child
l uint32
// r right child
r uint32
}
// wordLen is the number of bytes represented by the v field of a node.
const wordLen = 4
// binTree supports the identification of the next operation based on a
// binary tree.
//
// Nodes will be identified by their index into the ring buffer.
type binTree struct {
dict *encoderDict
// ring buffer of nodes
node []node
// absolute offset of the entry for the next node. Position 4
// byte larger.
hoff int64
// front position in the node ring buffer
front uint32
// index of the root node
root uint32
// current x value
x uint32
// preallocated array
data []byte
}
// null represents the nonexistent index. We can't use zero because it
// would always exist or we would need to decrease the index for each
// reference.
const null uint32 = 1<<32 - 1
// newBinTree initializes the binTree structure. The capacity defines
// the size of the buffer and defines the maximum distance for which
// matches will be found.
func newBinTree(capacity int) (t *binTree, err error) {
if capacity < 1 {
return nil, errors.New(
"newBinTree: capacity must be larger than zero")
}
if int64(capacity) >= int64(null) {
return nil, errors.New(
"newBinTree: capacity must less 2^{32}-1")
}
t = &binTree{
node: make([]node, capacity),
hoff: -int64(wordLen),
root: null,
data: make([]byte, maxMatchLen),
}
return t, nil
}
func (t *binTree) SetDict(d *encoderDict) { t.dict = d }
// WriteByte writes a single byte into the binary tree.
func (t *binTree) WriteByte(c byte) error {
t.x = (t.x << 8) | uint32(c)
t.hoff++
if t.hoff < 0 {
return nil
}
v := t.front
if int64(v) < t.hoff {
// We are overwriting old nodes stored in the tree.
t.remove(v)
}
t.node[v].x = t.x
t.add(v)
t.front++
if int64(t.front) >= int64(len(t.node)) {
t.front = 0
}
return nil
}
// Writes writes a sequence of bytes into the binTree structure.
func (t *binTree) Write(p []byte) (n int, err error) {
for _, c := range p {
t.WriteByte(c)
}
return len(p), nil
}
// add puts the node v into the tree. The node must not be part of the
// tree before.
func (t *binTree) add(v uint32) {
vn := &t.node[v]
// Set left and right to null indices.
vn.l, vn.r = null, null
// If the binary tree is empty make v the root.
if t.root == null {
t.root = v
vn.p = null
return
}
x := vn.x
p := t.root
// Search for the right leave link and add the new node.
for {
pn := &t.node[p]
if x <= pn.x {
if pn.l == null {
pn.l = v
vn.p = p
return
}
p = pn.l
} else {
if pn.r == null {
pn.r = v
vn.p = p
return
}
p = pn.r
}
}
}
// parent returns the parent node index of v and the pointer to v value
// in the parent.
func (t *binTree) parent(v uint32) (p uint32, ptr *uint32) {
if t.root == v {
return null, &t.root
}
p = t.node[v].p
if t.node[p].l == v {
ptr = &t.node[p].l
} else {
ptr = &t.node[p].r
}
return
}
// Remove node v.
func (t *binTree) remove(v uint32) {
vn := &t.node[v]
p, ptr := t.parent(v)
l, r := vn.l, vn.r
if l == null {
// Move the right child up.
*ptr = r
if r != null {
t.node[r].p = p
}
return
}
if r == null {
// Move the left child up.
*ptr = l
t.node[l].p = p
return
}
// Search the in-order predecessor u.
un := &t.node[l]
ur := un.r
if ur == null {
// In order predecessor is l. Move it up.
un.r = r
t.node[r].p = l
un.p = p
*ptr = l
return
}
var u uint32
for {
// Look for the max value in the tree where l is root.
u = ur
ur = t.node[u].r
if ur == null {
break
}
}
// replace u with ul
un = &t.node[u]
ul := un.l
up := un.p
t.node[up].r = ul
if ul != null {
t.node[ul].p = up
}
// replace v by u
un.l, un.r = l, r
t.node[l].p = u
t.node[r].p = u
*ptr = u
un.p = p
}
// search looks for the node that have the value x or for the nodes that
// brace it. The node highest in the tree with the value x will be
// returned. All other nodes with the same value live in left subtree of
// the returned node.
func (t *binTree) search(v uint32, x uint32) (a, b uint32) {
a, b = null, null
if v == null {
return
}
for {
vn := &t.node[v]
if x <= vn.x {
if x == vn.x {
return v, v
}
b = v
if vn.l == null {
return
}
v = vn.l
} else {
a = v
if vn.r == null {
return
}
v = vn.r
}
}
}
// max returns the node with maximum value in the subtree with v as
// root.
func (t *binTree) max(v uint32) uint32 {
if v == null {
return null
}
for {
r := t.node[v].r
if r == null {
return v
}
v = r
}
}
// min returns the node with the minimum value in the subtree with v as
// root.
func (t *binTree) min(v uint32) uint32 {
if v == null {
return null
}
for {
l := t.node[v].l
if l == null {
return v
}
v = l
}
}
// pred returns the in-order predecessor of node v.
func (t *binTree) pred(v uint32) uint32 {
if v == null {
return null
}
u := t.max(t.node[v].l)
if u != null {
return u
}
for {
p := t.node[v].p
if p == null {
return null
}
if t.node[p].r == v {
return p
}
v = p
}
}
// succ returns the in-order successor of node v.
func (t *binTree) succ(v uint32) uint32 {
if v == null {
return null
}
u := t.min(t.node[v].r)
if u != null {
return u
}
for {
p := t.node[v].p
if p == null {
return null
}
if t.node[p].l == v {
return p
}
v = p
}
}
// xval converts the first four bytes of a into an 32-bit unsigned
// integer in big-endian order.
func xval(a []byte) uint32 {
var x uint32
switch len(a) {
default:
x |= uint32(a[3])
fallthrough
case 3:
x |= uint32(a[2]) << 8
fallthrough
case 2:
x |= uint32(a[1]) << 16
fallthrough
case 1:
x |= uint32(a[0]) << 24
case 0:
}
return x
}
// dumpX converts value x into a four-letter string.
func dumpX(x uint32) string {
a := make([]byte, 4)
for i := 0; i < 4; i++ {
c := byte(x >> uint((3-i)*8))
if unicode.IsGraphic(rune(c)) {
a[i] = c
} else {
a[i] = '.'
}
}
return string(a)
}
/*
// dumpNode writes a representation of the node v into the io.Writer.
func (t *binTree) dumpNode(w io.Writer, v uint32, indent int) {
if v == null {
return
}
vn := &t.node[v]
t.dumpNode(w, vn.r, indent+2)
for i := 0; i < indent; i++ {
fmt.Fprint(w, " ")
}
if vn.p == null {
fmt.Fprintf(w, "node %d %q parent null\n", v, dumpX(vn.x))
} else {
fmt.Fprintf(w, "node %d %q parent %d\n", v, dumpX(vn.x), vn.p)
}
t.dumpNode(w, vn.l, indent+2)
}
// dump prints a representation of the binary tree into the writer.
func (t *binTree) dump(w io.Writer) error {
bw := bufio.NewWriter(w)
t.dumpNode(bw, t.root, 0)
return bw.Flush()
}
*/
func (t *binTree) distance(v uint32) int {
dist := int(t.front) - int(v)
if dist <= 0 {
dist += len(t.node)
}
return dist
}
type matchParams struct {
rep [4]uint32
// length when match will be accepted
nAccept int
// nodes to check
check int
// finish if length get shorter
stopShorter bool
}
func (t *binTree) match(m match, distIter func() (int, bool), p matchParams,
) (r match, checked int, accepted bool) {
buf := &t.dict.buf
for {
if checked >= p.check {
return m, checked, true
}
dist, ok := distIter()
if !ok {
return m, checked, false
}
checked++
if m.n > 0 {
i := buf.rear - dist + m.n - 1
if i < 0 {
i += len(buf.data)
} else if i >= len(buf.data) {
i -= len(buf.data)
}
if buf.data[i] != t.data[m.n-1] {
if p.stopShorter {
return m, checked, false
}
continue
}
}
n := buf.matchLen(dist, t.data)
switch n {
case 0:
if p.stopShorter {
return m, checked, false
}
continue
case 1:
if uint32(dist-minDistance) != p.rep[0] {
continue
}
}
if n < m.n || (n == m.n && int64(dist) >= m.distance) {
continue
}
m = match{int64(dist), n}
if n >= p.nAccept {
return m, checked, true
}
}
}
func (t *binTree) NextOp(rep [4]uint32) operation {
// retrieve maxMatchLen data
n, _ := t.dict.buf.Peek(t.data[:maxMatchLen])
if n == 0 {
panic("no data in buffer")
}
t.data = t.data[:n]
var (
m match
x, u, v uint32
iterPred, iterSucc func() (int, bool)
)
p := matchParams{
rep: rep,
nAccept: maxMatchLen,
check: 32,
}
i := 4
iterSmall := func() (dist int, ok bool) {
i--
if i <= 0 {
return 0, false
}
return i, true
}
m, checked, accepted := t.match(m, iterSmall, p)
if accepted {
goto end
}
p.check -= checked
x = xval(t.data)
u, v = t.search(t.root, x)
if u == v && len(t.data) == 4 {
iter := func() (dist int, ok bool) {
if u == null {
return 0, false
}
dist = t.distance(u)
u, v = t.search(t.node[u].l, x)
if u != v {
u = null
}
return dist, true
}
m, _, _ = t.match(m, iter, p)
goto end
}
p.stopShorter = true
iterSucc = func() (dist int, ok bool) {
if v == null {
return 0, false
}
dist = t.distance(v)
v = t.succ(v)
return dist, true
}
m, checked, accepted = t.match(m, iterSucc, p)
if accepted {
goto end
}
p.check -= checked
iterPred = func() (dist int, ok bool) {
if u == null {
return 0, false
}
dist = t.distance(u)
u = t.pred(u)
return dist, true
}
m, _, _ = t.match(m, iterPred, p)
end:
if m.n == 0 {
return lit{t.data[0]}
}
return m
}

@ -0,0 +1,47 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
/* Naming conventions follows the CodeReviewComments in the Go Wiki. */
// ntz32Const is used by the functions NTZ and NLZ.
const ntz32Const = 0x04d7651f
// ntz32Table is a helper table for de Bruijn algorithm by Danny Dubé.
// See Henry S. Warren, Jr. "Hacker's Delight" section 5-1 figure 5-26.
var ntz32Table = [32]int8{
0, 1, 2, 24, 3, 19, 6, 25,
22, 4, 20, 10, 16, 7, 12, 26,
31, 23, 18, 5, 21, 9, 15, 11,
30, 17, 8, 14, 29, 13, 28, 27,
}
/*
// ntz32 computes the number of trailing zeros for an unsigned 32-bit integer.
func ntz32(x uint32) int {
if x == 0 {
return 32
}
x = (x & -x) * ntz32Const
return int(ntz32Table[x>>27])
}
*/
// nlz32 computes the number of leading zeros for an unsigned 32-bit integer.
func nlz32(x uint32) int {
// Smear left most bit to the right
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
// Use ntz mechanism to calculate nlz.
x++
if x == 0 {
return 0
}
x *= ntz32Const
return 32 - int(ntz32Table[x>>27])
}

@ -0,0 +1,39 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"io"
)
// breader provides the ReadByte function for a Reader. It doesn't read
// more data from the reader than absolutely necessary.
type breader struct {
io.Reader
// helper slice to save allocations
p []byte
}
// ByteReader converts an io.Reader into an io.ByteReader.
func ByteReader(r io.Reader) io.ByteReader {
br, ok := r.(io.ByteReader)
if !ok {
return &breader{r, make([]byte, 1)}
}
return br
}
// ReadByte read byte function.
func (r *breader) ReadByte() (c byte, err error) {
n, err := r.Reader.Read(r.p)
if n < 1 {
if err == nil {
err = errors.New("breader.ReadByte: no data")
}
return 0, err
}
return r.p[0], nil
}

@ -0,0 +1,171 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
)
// buffer provides a circular buffer of bytes. If the front index equals
// the rear index the buffer is empty. As a consequence front cannot be
// equal rear for a full buffer. So a full buffer has a length that is
// one byte less the the length of the data slice.
type buffer struct {
data []byte
front int
rear int
}
// newBuffer creates a buffer with the given size.
func newBuffer(size int) *buffer {
return &buffer{data: make([]byte, size+1)}
}
// Cap returns the capacity of the buffer.
func (b *buffer) Cap() int {
return len(b.data) - 1
}
// Resets the buffer. The front and rear index are set to zero.
func (b *buffer) Reset() {
b.front = 0
b.rear = 0
}
// Buffered returns the number of bytes buffered.
func (b *buffer) Buffered() int {
delta := b.front - b.rear
if delta < 0 {
delta += len(b.data)
}
return delta
}
// Available returns the number of bytes available for writing.
func (b *buffer) Available() int {
delta := b.rear - 1 - b.front
if delta < 0 {
delta += len(b.data)
}
return delta
}
// addIndex adds a non-negative integer to the index i and returns the
// resulting index. The function takes care of wrapping the index as
// well as potential overflow situations.
func (b *buffer) addIndex(i int, n int) int {
// subtraction of len(b.data) prevents overflow
i += n - len(b.data)
if i < 0 {
i += len(b.data)
}
return i
}
// Read reads bytes from the buffer into p and returns the number of
// bytes read. The function never returns an error but might return less
// data than requested.
func (b *buffer) Read(p []byte) (n int, err error) {
n, err = b.Peek(p)
b.rear = b.addIndex(b.rear, n)
return n, err
}
// Peek reads bytes from the buffer into p without changing the buffer.
// Peek will never return an error but might return less data than
// requested.
func (b *buffer) Peek(p []byte) (n int, err error) {
m := b.Buffered()
n = len(p)
if m < n {
n = m
p = p[:n]
}
k := copy(p, b.data[b.rear:])
if k < n {
copy(p[k:], b.data)
}
return n, nil
}
// Discard skips the n next bytes to read from the buffer, returning the
// bytes discarded.
//
// If Discards skips fewer than n bytes, it returns an error.
func (b *buffer) Discard(n int) (discarded int, err error) {
if n < 0 {
return 0, errors.New("buffer.Discard: negative argument")
}
m := b.Buffered()
if m < n {
n = m
err = errors.New(
"buffer.Discard: discarded less bytes then requested")
}
b.rear = b.addIndex(b.rear, n)
return n, err
}
// ErrNoSpace indicates that there is insufficient space for the Write
// operation.
var ErrNoSpace = errors.New("insufficient space")
// Write puts data into the buffer. If less bytes are written than
// requested ErrNoSpace is returned.
func (b *buffer) Write(p []byte) (n int, err error) {
m := b.Available()
n = len(p)
if m < n {
n = m
p = p[:m]
err = ErrNoSpace
}
k := copy(b.data[b.front:], p)
if k < n {
copy(b.data, p[k:])
}
b.front = b.addIndex(b.front, n)
return n, err
}
// WriteByte writes a single byte into the buffer. The error ErrNoSpace
// is returned if no single byte is available in the buffer for writing.
func (b *buffer) WriteByte(c byte) error {
if b.Available() < 1 {
return ErrNoSpace
}
b.data[b.front] = c
b.front = b.addIndex(b.front, 1)
return nil
}
// prefixLen returns the length of the common prefix of a and b.
func prefixLen(a, b []byte) int {
if len(a) > len(b) {
a, b = b, a
}
for i, c := range a {
if b[i] != c {
return i
}
}
return len(a)
}
// matchLen returns the length of the common prefix for the given
// distance from the rear and the byte slice p.
func (b *buffer) matchLen(distance int, p []byte) int {
var n int
i := b.rear - distance
if i < 0 {
if n = prefixLen(p, b.data[len(b.data)+i:]); n < -i {
return n
}
p = p[n:]
i = 0
}
n += prefixLen(p, b.data[i:])
return n
}

@ -0,0 +1,37 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"io"
)
// ErrLimit indicates that the limit of the LimitedByteWriter has been
// reached.
var ErrLimit = errors.New("limit reached")
// LimitedByteWriter provides a byte writer that can be written until a
// limit is reached. The field N provides the number of remaining
// bytes.
type LimitedByteWriter struct {
BW io.ByteWriter
N int64
}
// WriteByte writes a single byte to the limited byte writer. It returns
// ErrLimit if the limit has been reached. If the byte is successfully
// written the field N of the LimitedByteWriter will be decremented by
// one.
func (l *LimitedByteWriter) WriteByte(c byte) error {
if l.N <= 0 {
return ErrLimit
}
if err := l.BW.WriteByte(c); err != nil {
return err
}
l.N--
return nil
}

@ -0,0 +1,277 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
"io"
)
// decoder decodes a raw LZMA stream without any header.
type decoder struct {
// dictionary; the rear pointer of the buffer will be used for
// reading the data.
Dict *decoderDict
// decoder state
State *state
// range decoder
rd *rangeDecoder
// start stores the head value of the dictionary for the LZMA
// stream
start int64
// size of uncompressed data
size int64
// end-of-stream encountered
eos bool
// EOS marker found
eosMarker bool
}
// newDecoder creates a new decoder instance. The parameter size provides
// the expected byte size of the decompressed data. If the size is
// unknown use a negative value. In that case the decoder will look for
// a terminating end-of-stream marker.
func newDecoder(br io.ByteReader, state *state, dict *decoderDict, size int64) (d *decoder, err error) {
rd, err := newRangeDecoder(br)
if err != nil {
return nil, err
}
d = &decoder{
State: state,
Dict: dict,
rd: rd,
size: size,
start: dict.pos(),
}
return d, nil
}
// Reopen restarts the decoder with a new byte reader and a new size. Reopen
// resets the Decompressed counter to zero.
func (d *decoder) Reopen(br io.ByteReader, size int64) error {
var err error
if d.rd, err = newRangeDecoder(br); err != nil {
return err
}
d.start = d.Dict.pos()
d.size = size
d.eos = false
return nil
}
// decodeLiteral decodes a single literal from the LZMA stream.
func (d *decoder) decodeLiteral() (op operation, err error) {
litState := d.State.litState(d.Dict.byteAt(1), d.Dict.head)
match := d.Dict.byteAt(int(d.State.rep[0]) + 1)
s, err := d.State.litCodec.Decode(d.rd, d.State.state, match, litState)
if err != nil {
return nil, err
}
return lit{s}, nil
}
// errEOS indicates that an EOS marker has been found.
var errEOS = errors.New("EOS marker found")
// readOp decodes the next operation from the compressed stream. It
// returns the operation. If an explicit end of stream marker is
// identified the eos error is returned.
func (d *decoder) readOp() (op operation, err error) {
// Value of the end of stream (EOS) marker
const eosDist = 1<<32 - 1
state, state2, posState := d.State.states(d.Dict.head)
b, err := d.State.isMatch[state2].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
// literal
op, err := d.decodeLiteral()
if err != nil {
return nil, err
}
d.State.updateStateLiteral()
return op, nil
}
b, err = d.State.isRep[state].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
// simple match
d.State.rep[3], d.State.rep[2], d.State.rep[1] =
d.State.rep[2], d.State.rep[1], d.State.rep[0]
d.State.updateStateMatch()
// The length decoder returns the length offset.
n, err := d.State.lenCodec.Decode(d.rd, posState)
if err != nil {
return nil, err
}
// The dist decoder returns the distance offset. The actual
// distance is 1 higher.
d.State.rep[0], err = d.State.distCodec.Decode(d.rd, n)
if err != nil {
return nil, err
}
if d.State.rep[0] == eosDist {
d.eosMarker = true
return nil, errEOS
}
op = match{n: int(n) + minMatchLen,
distance: int64(d.State.rep[0]) + minDistance}
return op, nil
}
b, err = d.State.isRepG0[state].Decode(d.rd)
if err != nil {
return nil, err
}
dist := d.State.rep[0]
if b == 0 {
// rep match 0
b, err = d.State.isRepG0Long[state2].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
d.State.updateStateShortRep()
op = match{n: 1, distance: int64(dist) + minDistance}
return op, nil
}
} else {
b, err = d.State.isRepG1[state].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
dist = d.State.rep[1]
} else {
b, err = d.State.isRepG2[state].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
dist = d.State.rep[2]
} else {
dist = d.State.rep[3]
d.State.rep[3] = d.State.rep[2]
}
d.State.rep[2] = d.State.rep[1]
}
d.State.rep[1] = d.State.rep[0]
d.State.rep[0] = dist
}
n, err := d.State.repLenCodec.Decode(d.rd, posState)
if err != nil {
return nil, err
}
d.State.updateStateRep()
op = match{n: int(n) + minMatchLen, distance: int64(dist) + minDistance}
return op, nil
}
// apply takes the operation and transforms the decoder dictionary accordingly.
func (d *decoder) apply(op operation) error {
var err error
switch x := op.(type) {
case match:
err = d.Dict.writeMatch(x.distance, x.n)
case lit:
err = d.Dict.WriteByte(x.b)
default:
panic("op is neither a match nor a literal")
}
return err
}
// decompress fills the dictionary unless no space for new data is
// available. If the end of the LZMA stream has been reached io.EOF will
// be returned.
func (d *decoder) decompress() error {
if d.eos {
return io.EOF
}
for d.Dict.Available() >= maxMatchLen {
op, err := d.readOp()
switch err {
case nil:
// break
case errEOS:
d.eos = true
if !d.rd.possiblyAtEnd() {
return errDataAfterEOS
}
if d.size >= 0 && d.size != d.Decompressed() {
return errSize
}
return io.EOF
case io.EOF:
d.eos = true
return io.ErrUnexpectedEOF
default:
return err
}
if err = d.apply(op); err != nil {
return err
}
if d.size >= 0 && d.Decompressed() >= d.size {
d.eos = true
if d.Decompressed() > d.size {
return errSize
}
if !d.rd.possiblyAtEnd() {
switch _, err = d.readOp(); err {
case nil:
return errSize
case io.EOF:
return io.ErrUnexpectedEOF
case errEOS:
break
default:
return err
}
}
return io.EOF
}
}
return nil
}
// Errors that may be returned while decoding data.
var (
errDataAfterEOS = errors.New("lzma: data after end of stream marker")
errSize = errors.New("lzma: wrong uncompressed data size")
)
// Read reads data from the buffer. If no more data is available io.EOF is
// returned.
func (d *decoder) Read(p []byte) (n int, err error) {
var k int
for {
// Read of decoder dict never returns an error.
k, err = d.Dict.Read(p[n:])
if err != nil {
panic(fmt.Errorf("dictionary read error %s", err))
}
if k == 0 && d.eos {
return n, io.EOF
}
n += k
if n >= len(p) {
return n, nil
}
if err = d.decompress(); err != nil && err != io.EOF {
return n, err
}
}
}
// Decompressed returns the number of bytes decompressed by the decoder.
func (d *decoder) Decompressed() int64 {
return d.Dict.pos() - d.start
}

@ -0,0 +1,128 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
)
// decoderDict provides the dictionary for the decoder. The whole
// dictionary is used as reader buffer.
type decoderDict struct {
buf buffer
head int64
}
// newDecoderDict creates a new decoder dictionary. The whole dictionary
// will be used as reader buffer.
func newDecoderDict(dictCap int) (d *decoderDict, err error) {
// lower limit supports easy test cases
if !(1 <= dictCap && int64(dictCap) <= MaxDictCap) {
return nil, errors.New("lzma: dictCap out of range")
}
d = &decoderDict{buf: *newBuffer(dictCap)}
return d, nil
}
// Reset clears the dictionary. The read buffer is not changed, so the
// buffered data can still be read.
func (d *decoderDict) Reset() {
d.head = 0
}
// WriteByte writes a single byte into the dictionary. It is used to
// write literals into the dictionary.
func (d *decoderDict) WriteByte(c byte) error {
if err := d.buf.WriteByte(c); err != nil {
return err
}
d.head++
return nil
}
// pos returns the position of the dictionary head.
func (d *decoderDict) pos() int64 { return d.head }
// dictLen returns the actual length of the dictionary.
func (d *decoderDict) dictLen() int {
capacity := d.buf.Cap()
if d.head >= int64(capacity) {
return capacity
}
return int(d.head)
}
// byteAt returns a byte stored in the dictionary. If the distance is
// non-positive or exceeds the current length of the dictionary the zero
// byte is returned.
func (d *decoderDict) byteAt(dist int) byte {
if !(0 < dist && dist <= d.dictLen()) {
return 0
}
i := d.buf.front - dist
if i < 0 {
i += len(d.buf.data)
}
return d.buf.data[i]
}
// writeMatch writes the match at the top of the dictionary. The given
// distance must point in the current dictionary and the length must not
// exceed the maximum length 273 supported in LZMA.
//
// The error value ErrNoSpace indicates that no space is available in
// the dictionary for writing. You need to read from the dictionary
// first.
func (d *decoderDict) writeMatch(dist int64, length int) error {
if !(0 < dist && dist <= int64(d.dictLen())) {
return errors.New("writeMatch: distance out of range")
}
if !(0 < length && length <= maxMatchLen) {
return errors.New("writeMatch: length out of range")
}
if length > d.buf.Available() {
return ErrNoSpace
}
d.head += int64(length)
i := d.buf.front - int(dist)
if i < 0 {
i += len(d.buf.data)
}
for length > 0 {
var p []byte
if i >= d.buf.front {
p = d.buf.data[i:]
i = 0
} else {
p = d.buf.data[i:d.buf.front]
i = d.buf.front
}
if len(p) > length {
p = p[:length]
}
if _, err := d.buf.Write(p); err != nil {
panic(fmt.Errorf("d.buf.Write returned error %s", err))
}
length -= len(p)
}
return nil
}
// Write writes the given bytes into the dictionary and advances the
// head.
func (d *decoderDict) Write(p []byte) (n int, err error) {
n, err = d.buf.Write(p)
d.head += int64(n)
return n, err
}
// Available returns the number of available bytes for writing into the
// decoder dictionary.
func (d *decoderDict) Available() int { return d.buf.Available() }
// Read reads data from the buffer contained in the decoder dictionary.
func (d *decoderDict) Read(p []byte) (n int, err error) { return d.buf.Read(p) }

@ -0,0 +1,38 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
// directCodec allows the encoding and decoding of values with a fixed number
// of bits. The number of bits must be in the range [1,32].
type directCodec byte
// Bits returns the number of bits supported by this codec.
func (dc directCodec) Bits() int {
return int(dc)
}
// Encode uses the range encoder to encode a value with the fixed number of
// bits. The most-significant bit is encoded first.
func (dc directCodec) Encode(e *rangeEncoder, v uint32) error {
for i := int(dc) - 1; i >= 0; i-- {
if err := e.DirectEncodeBit(v >> uint(i)); err != nil {
return err
}
}
return nil
}
// Decode uses the range decoder to decode a value with the given number of
// given bits. The most-significant bit is decoded first.
func (dc directCodec) Decode(d *rangeDecoder) (v uint32, err error) {
for i := int(dc) - 1; i >= 0; i-- {
x, err := d.DirectDecodeBit()
if err != nil {
return 0, err
}
v = (v << 1) | x
}
return v, nil
}

@ -0,0 +1,140 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
// Constants used by the distance codec.
const (
// minimum supported distance
minDistance = 1
// maximum supported distance, value is used for the eos marker.
maxDistance = 1 << 32
// number of the supported len states
lenStates = 4
// start for the position models
startPosModel = 4
// first index with align bits support
endPosModel = 14
// bits for the position slots
posSlotBits = 6
// number of align bits
alignBits = 4
)
// distCodec provides encoding and decoding of distance values.
type distCodec struct {
posSlotCodecs [lenStates]treeCodec
posModel [endPosModel - startPosModel]treeReverseCodec
alignCodec treeReverseCodec
}
// deepcopy initializes dc as deep copy of the source.
func (dc *distCodec) deepcopy(src *distCodec) {
if dc == src {
return
}
for i := range dc.posSlotCodecs {
dc.posSlotCodecs[i].deepcopy(&src.posSlotCodecs[i])
}
for i := range dc.posModel {
dc.posModel[i].deepcopy(&src.posModel[i])
}
dc.alignCodec.deepcopy(&src.alignCodec)
}
// newDistCodec creates a new distance codec.
func (dc *distCodec) init() {
for i := range dc.posSlotCodecs {
dc.posSlotCodecs[i] = makeTreeCodec(posSlotBits)
}
for i := range dc.posModel {
posSlot := startPosModel + i
bits := (posSlot >> 1) - 1
dc.posModel[i] = makeTreeReverseCodec(bits)
}
dc.alignCodec = makeTreeReverseCodec(alignBits)
}
// lenState converts the value l to a supported lenState value.
func lenState(l uint32) uint32 {
if l >= lenStates {
l = lenStates - 1
}
return l
}
// Encode encodes the distance using the parameter l. Dist can have values from
// the full range of uint32 values. To get the distance offset the actual match
// distance has to be decreased by 1. A distance offset of 0xffffffff (eos)
// indicates the end of the stream.
func (dc *distCodec) Encode(e *rangeEncoder, dist uint32, l uint32) (err error) {
// Compute the posSlot using nlz32
var posSlot uint32
var bits uint32
if dist < startPosModel {
posSlot = dist
} else {
bits = uint32(30 - nlz32(dist))
posSlot = startPosModel - 2 + (bits << 1)
posSlot += (dist >> uint(bits)) & 1
}
if err = dc.posSlotCodecs[lenState(l)].Encode(e, posSlot); err != nil {
return
}
switch {
case posSlot < startPosModel:
return nil
case posSlot < endPosModel:
tc := &dc.posModel[posSlot-startPosModel]
return tc.Encode(dist, e)
}
dic := directCodec(bits - alignBits)
if err = dic.Encode(e, dist>>alignBits); err != nil {
return
}
return dc.alignCodec.Encode(dist, e)
}
// Decode decodes the distance offset using the parameter l. The dist value
// 0xffffffff (eos) indicates the end of the stream. Add one to the distance
// offset to get the actual match distance.
func (dc *distCodec) Decode(d *rangeDecoder, l uint32) (dist uint32, err error) {
posSlot, err := dc.posSlotCodecs[lenState(l)].Decode(d)
if err != nil {
return
}
// posSlot equals distance
if posSlot < startPosModel {
return posSlot, nil
}
// posSlot uses the individual models
bits := (posSlot >> 1) - 1
dist = (2 | (posSlot & 1)) << bits
var u uint32
if posSlot < endPosModel {
tc := &dc.posModel[posSlot-startPosModel]
if u, err = tc.Decode(d); err != nil {
return 0, err
}
dist += u
return dist, nil
}
// posSlots use direct encoding and a single model for the four align
// bits.
dic := directCodec(bits - alignBits)
if u, err = dic.Decode(d); err != nil {
return 0, err
}
dist += u << alignBits
if u, err = dc.alignCodec.Decode(d); err != nil {
return 0, err
}
dist += u
return dist, nil
}

@ -0,0 +1,268 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"fmt"
"io"
)
// opLenMargin provides the upper limit of the number of bytes required
// to encode a single operation.
const opLenMargin = 16
// compressFlags control the compression process.
type compressFlags uint32
// Values for compressFlags.
const (
// all data should be compressed, even if compression is not
// optimal.
all compressFlags = 1 << iota
)
// encoderFlags provide the flags for an encoder.
type encoderFlags uint32
// Flags for the encoder.
const (
// eosMarker requests an EOS marker to be written.
eosMarker encoderFlags = 1 << iota
)
// Encoder compresses data buffered in the encoder dictionary and writes
// it into a byte writer.
type encoder struct {
dict *encoderDict
state *state
re *rangeEncoder
start int64
// generate eos marker
marker bool
limit bool
margin int
}
// newEncoder creates a new encoder. If the byte writer must be
// limited use LimitedByteWriter provided by this package. The flags
// argument supports the eosMarker flag, controlling whether a
// terminating end-of-stream marker must be written.
func newEncoder(bw io.ByteWriter, state *state, dict *encoderDict,
flags encoderFlags) (e *encoder, err error) {
re, err := newRangeEncoder(bw)
if err != nil {
return nil, err
}
e = &encoder{
dict: dict,
state: state,
re: re,
marker: flags&eosMarker != 0,
start: dict.Pos(),
margin: opLenMargin,
}
if e.marker {
e.margin += 5
}
return e, nil
}
// Write writes the bytes from p into the dictionary. If not enough
// space is available the data in the dictionary buffer will be
// compressed to make additional space available. If the limit of the
// underlying writer has been reached ErrLimit will be returned.
func (e *encoder) Write(p []byte) (n int, err error) {
for {
k, err := e.dict.Write(p[n:])
n += k
if err == ErrNoSpace {
if err = e.compress(0); err != nil {
return n, err
}
continue
}
return n, err
}
}
// Reopen reopens the encoder with a new byte writer.
func (e *encoder) Reopen(bw io.ByteWriter) error {
var err error
if e.re, err = newRangeEncoder(bw); err != nil {
return err
}
e.start = e.dict.Pos()
e.limit = false
return nil
}
// writeLiteral writes a literal into the LZMA stream
func (e *encoder) writeLiteral(l lit) error {
var err error
state, state2, _ := e.state.states(e.dict.Pos())
if err = e.state.isMatch[state2].Encode(e.re, 0); err != nil {
return err
}
litState := e.state.litState(e.dict.ByteAt(1), e.dict.Pos())
match := e.dict.ByteAt(int(e.state.rep[0]) + 1)
err = e.state.litCodec.Encode(e.re, l.b, state, match, litState)
if err != nil {
return err
}
e.state.updateStateLiteral()
return nil
}
// iverson implements the Iverson operator as proposed by Donald Knuth in his
// book Concrete Mathematics.
func iverson(ok bool) uint32 {
if ok {
return 1
}
return 0
}
// writeMatch writes a repetition operation into the operation stream
func (e *encoder) writeMatch(m match) error {
var err error
if !(minDistance <= m.distance && m.distance <= maxDistance) {
panic(fmt.Errorf("match distance %d out of range", m.distance))
}
dist := uint32(m.distance - minDistance)
if !(minMatchLen <= m.n && m.n <= maxMatchLen) &&
!(dist == e.state.rep[0] && m.n == 1) {
panic(fmt.Errorf(
"match length %d out of range; dist %d rep[0] %d",
m.n, dist, e.state.rep[0]))
}
state, state2, posState := e.state.states(e.dict.Pos())
if err = e.state.isMatch[state2].Encode(e.re, 1); err != nil {
return err
}
g := 0
for ; g < 4; g++ {
if e.state.rep[g] == dist {
break
}
}
b := iverson(g < 4)
if err = e.state.isRep[state].Encode(e.re, b); err != nil {
return err
}
n := uint32(m.n - minMatchLen)
if b == 0 {
// simple match
e.state.rep[3], e.state.rep[2], e.state.rep[1], e.state.rep[0] =
e.state.rep[2], e.state.rep[1], e.state.rep[0], dist
e.state.updateStateMatch()
if err = e.state.lenCodec.Encode(e.re, n, posState); err != nil {
return err
}
return e.state.distCodec.Encode(e.re, dist, n)
}
b = iverson(g != 0)
if err = e.state.isRepG0[state].Encode(e.re, b); err != nil {
return err
}
if b == 0 {
// g == 0
b = iverson(m.n != 1)
if err = e.state.isRepG0Long[state2].Encode(e.re, b); err != nil {
return err
}
if b == 0 {
e.state.updateStateShortRep()
return nil
}
} else {
// g in {1,2,3}
b = iverson(g != 1)
if err = e.state.isRepG1[state].Encode(e.re, b); err != nil {
return err
}
if b == 1 {
// g in {2,3}
b = iverson(g != 2)
err = e.state.isRepG2[state].Encode(e.re, b)
if err != nil {
return err
}
if b == 1 {
e.state.rep[3] = e.state.rep[2]
}
e.state.rep[2] = e.state.rep[1]
}
e.state.rep[1] = e.state.rep[0]
e.state.rep[0] = dist
}
e.state.updateStateRep()
return e.state.repLenCodec.Encode(e.re, n, posState)
}
// writeOp writes a single operation to the range encoder. The function
// checks whether there is enough space available to close the LZMA
// stream.
func (e *encoder) writeOp(op operation) error {
if e.re.Available() < int64(e.margin) {
return ErrLimit
}
switch x := op.(type) {
case lit:
return e.writeLiteral(x)
case match:
return e.writeMatch(x)
default:
panic("unexpected operation")
}
}
// compress compressed data from the dictionary buffer. If the flag all
// is set, all data in the dictionary buffer will be compressed. The
// function returns ErrLimit if the underlying writer has reached its
// limit.
func (e *encoder) compress(flags compressFlags) error {
n := 0
if flags&all == 0 {
n = maxMatchLen - 1
}
d := e.dict
m := d.m
for d.Buffered() > n {
op := m.NextOp(e.state.rep)
if err := e.writeOp(op); err != nil {
return err
}
d.Discard(op.Len())
}
return nil
}
// eosMatch is a pseudo operation that indicates the end of the stream.
var eosMatch = match{distance: maxDistance, n: minMatchLen}
// Close terminates the LZMA stream. If requested the end-of-stream
// marker will be written. If the byte writer limit has been or will be
// reached during compression of the remaining data in the buffer the
// LZMA stream will be closed and data will remain in the buffer.
func (e *encoder) Close() error {
err := e.compress(all)
if err != nil && err != ErrLimit {
return err
}
if e.marker {
if err := e.writeMatch(eosMatch); err != nil {
return err
}
}
err = e.re.Close()
return err
}
// Compressed returns the number bytes of the input data that been
// compressed.
func (e *encoder) Compressed() int64 {
return e.dict.Pos() - e.start
}

@ -0,0 +1,149 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
"io"
)
// matcher is an interface that supports the identification of the next
// operation.
type matcher interface {
io.Writer
SetDict(d *encoderDict)
NextOp(rep [4]uint32) operation
}
// encoderDict provides the dictionary of the encoder. It includes an
// additional buffer atop of the actual dictionary.
type encoderDict struct {
buf buffer
m matcher
head int64
capacity int
// preallocated array
data [maxMatchLen]byte
}
// newEncoderDict creates the encoder dictionary. The argument bufSize
// defines the size of the additional buffer.
func newEncoderDict(dictCap, bufSize int, m matcher) (d *encoderDict, err error) {
if !(1 <= dictCap && int64(dictCap) <= MaxDictCap) {
return nil, errors.New(
"lzma: dictionary capacity out of range")
}
if bufSize < 1 {
return nil, errors.New(
"lzma: buffer size must be larger than zero")
}
d = &encoderDict{
buf: *newBuffer(dictCap + bufSize),
capacity: dictCap,
m: m,
}
m.SetDict(d)
return d, nil
}
// Discard discards n bytes. Note that n must not be larger than
// MaxMatchLen.
func (d *encoderDict) Discard(n int) {
p := d.data[:n]
k, _ := d.buf.Read(p)
if k < n {
panic(fmt.Errorf("lzma: can't discard %d bytes", n))
}
d.head += int64(n)
d.m.Write(p)
}
// Len returns the data available in the encoder dictionary.
func (d *encoderDict) Len() int {
n := d.buf.Available()
if int64(n) > d.head {
return int(d.head)
}
return n
}
// DictLen returns the actual length of data in the dictionary.
func (d *encoderDict) DictLen() int {
if d.head < int64(d.capacity) {
return int(d.head)
}
return d.capacity
}
// Available returns the number of bytes that can be written by a
// following Write call.
func (d *encoderDict) Available() int {
return d.buf.Available() - d.DictLen()
}
// Write writes data into the dictionary buffer. Note that the position
// of the dictionary head will not be moved. If there is not enough
// space in the buffer ErrNoSpace will be returned.
func (d *encoderDict) Write(p []byte) (n int, err error) {
m := d.Available()
if len(p) > m {
p = p[:m]
err = ErrNoSpace
}
var e error
if n, e = d.buf.Write(p); e != nil {
err = e
}
return n, err
}
// Pos returns the position of the head.
func (d *encoderDict) Pos() int64 { return d.head }
// ByteAt returns the byte at the given distance.
func (d *encoderDict) ByteAt(distance int) byte {
if !(0 < distance && distance <= d.Len()) {
return 0
}
i := d.buf.rear - distance
if i < 0 {
i += len(d.buf.data)
}
return d.buf.data[i]
}
// CopyN copies the last n bytes from the dictionary into the provided
// writer. This is used for copying uncompressed data into an
// uncompressed segment.
func (d *encoderDict) CopyN(w io.Writer, n int) (written int, err error) {
if n <= 0 {
return 0, nil
}
m := d.Len()
if n > m {
n = m
err = ErrNoSpace
}
i := d.buf.rear - n
var e error
if i < 0 {
i += len(d.buf.data)
if written, e = w.Write(d.buf.data[i:]); e != nil {
return written, e
}
i = 0
}
var k int
k, e = w.Write(d.buf.data[i:d.buf.rear])
written += k
if e != nil {
err = e
}
return written, err
}
// Buffered returns the number of bytes in the buffer.
func (d *encoderDict) Buffered() int { return d.buf.Buffered() }

Binary file not shown.

@ -0,0 +1,309 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
"github.com/ulikunitz/xz/internal/hash"
)
/* For compression we need to find byte sequences that match the byte
* sequence at the dictionary head. A hash table is a simple method to
* provide this capability.
*/
// maxMatches limits the number of matches requested from the Matches
// function. This controls the speed of the overall encoding.
const maxMatches = 16
// shortDists defines the number of short distances supported by the
// implementation.
const shortDists = 8
// The minimum is somehow arbitrary but the maximum is limited by the
// memory requirements of the hash table.
const (
minTableExponent = 9
maxTableExponent = 20
)
// newRoller contains the function used to create an instance of the
// hash.Roller.
var newRoller = func(n int) hash.Roller { return hash.NewCyclicPoly(n) }
// hashTable stores the hash table including the rolling hash method.
//
// We implement chained hashing into a circular buffer. Each entry in
// the circular buffer stores the delta distance to the next position with a
// word that has the same hash value.
type hashTable struct {
dict *encoderDict
// actual hash table
t []int64
// circular list data with the offset to the next word
data []uint32
front int
// mask for computing the index for the hash table
mask uint64
// hash offset; initial value is -int64(wordLen)
hoff int64
// length of the hashed word
wordLen int
// hash roller for computing the hash values for the Write
// method
wr hash.Roller
// hash roller for computing arbitrary hashes
hr hash.Roller
// preallocated slices
p [maxMatches]int64
distances [maxMatches + shortDists]int
}
// hashTableExponent derives the hash table exponent from the dictionary
// capacity.
func hashTableExponent(n uint32) int {
e := 30 - nlz32(n)
switch {
case e < minTableExponent:
e = minTableExponent
case e > maxTableExponent:
e = maxTableExponent
}
return e
}
// newHashTable creates a new hash table for words of length wordLen
func newHashTable(capacity int, wordLen int) (t *hashTable, err error) {
if !(0 < capacity) {
return nil, errors.New(
"newHashTable: capacity must not be negative")
}
exp := hashTableExponent(uint32(capacity))
if !(1 <= wordLen && wordLen <= 4) {
return nil, errors.New("newHashTable: " +
"argument wordLen out of range")
}
n := 1 << uint(exp)
if n <= 0 {
panic("newHashTable: exponent is too large")
}
t = &hashTable{
t: make([]int64, n),
data: make([]uint32, capacity),
mask: (uint64(1) << uint(exp)) - 1,
hoff: -int64(wordLen),
wordLen: wordLen,
wr: newRoller(wordLen),
hr: newRoller(wordLen),
}
return t, nil
}
func (t *hashTable) SetDict(d *encoderDict) { t.dict = d }
// buffered returns the number of bytes that are currently hashed.
func (t *hashTable) buffered() int {
n := t.hoff + 1
switch {
case n <= 0:
return 0
case n >= int64(len(t.data)):
return len(t.data)
}
return int(n)
}
// addIndex adds n to an index ensuring that is stays inside the
// circular buffer for the hash chain.
func (t *hashTable) addIndex(i, n int) int {
i += n - len(t.data)
if i < 0 {
i += len(t.data)
}
return i
}
// putDelta puts the delta instance at the current front of the circular
// chain buffer.
func (t *hashTable) putDelta(delta uint32) {
t.data[t.front] = delta
t.front = t.addIndex(t.front, 1)
}
// putEntry puts a new entry into the hash table. If there is already a
// value stored it is moved into the circular chain buffer.
func (t *hashTable) putEntry(h uint64, pos int64) {
if pos < 0 {
return
}
i := h & t.mask
old := t.t[i] - 1
t.t[i] = pos + 1
var delta int64
if old >= 0 {
delta = pos - old
if delta > 1<<32-1 || delta > int64(t.buffered()) {
delta = 0
}
}
t.putDelta(uint32(delta))
}
// WriteByte converts a single byte into a hash and puts them into the hash
// table.
func (t *hashTable) WriteByte(b byte) error {
h := t.wr.RollByte(b)
t.hoff++
t.putEntry(h, t.hoff)
return nil
}
// Write converts the bytes provided into hash tables and stores the
// abbreviated offsets into the hash table. The method will never return an
// error.
func (t *hashTable) Write(p []byte) (n int, err error) {
for _, b := range p {
// WriteByte doesn't generate an error.
t.WriteByte(b)
}
return len(p), nil
}
// getMatches the matches for a specific hash. The functions returns the
// number of positions found.
//
// TODO: Make a getDistances because that we are actually interested in.
func (t *hashTable) getMatches(h uint64, positions []int64) (n int) {
if t.hoff < 0 || len(positions) == 0 {
return 0
}
buffered := t.buffered()
tailPos := t.hoff + 1 - int64(buffered)
rear := t.front - buffered
if rear >= 0 {
rear -= len(t.data)
}
// get the slot for the hash
pos := t.t[h&t.mask] - 1
delta := pos - tailPos
for {
if delta < 0 {
return n
}
positions[n] = tailPos + delta
n++
if n >= len(positions) {
return n
}
i := rear + int(delta)
if i < 0 {
i += len(t.data)
}
u := t.data[i]
if u == 0 {
return n
}
delta -= int64(u)
}
}
// hash computes the rolling hash for the word stored in p. For correct
// results its length must be equal to t.wordLen.
func (t *hashTable) hash(p []byte) uint64 {
var h uint64
for _, b := range p {
h = t.hr.RollByte(b)
}
return h
}
// Matches fills the positions slice with potential matches. The
// functions returns the number of positions filled into positions. The
// byte slice p must have word length of the hash table.
func (t *hashTable) Matches(p []byte, positions []int64) int {
if len(p) != t.wordLen {
panic(fmt.Errorf(
"byte slice must have length %d", t.wordLen))
}
h := t.hash(p)
return t.getMatches(h, positions)
}
// NextOp identifies the next operation using the hash table.
//
// TODO: Use all repetitions to find matches.
func (t *hashTable) NextOp(rep [4]uint32) operation {
// get positions
data := t.dict.data[:maxMatchLen]
n, _ := t.dict.buf.Peek(data)
data = data[:n]
var p []int64
if n < t.wordLen {
p = t.p[:0]
} else {
p = t.p[:maxMatches]
n = t.Matches(data[:t.wordLen], p)
p = p[:n]
}
// convert positions in potential distances
head := t.dict.head
dists := append(t.distances[:0], 1, 2, 3, 4, 5, 6, 7, 8)
for _, pos := range p {
dis := int(head - pos)
if dis > shortDists {
dists = append(dists, dis)
}
}
// check distances
var m match
dictLen := t.dict.DictLen()
for _, dist := range dists {
if dist > dictLen {
continue
}
// Here comes a trick. We are only interested in matches
// that are longer than the matches we have been found
// before. So before we test the whole byte sequence at
// the given distance, we test the first byte that would
// make the match longer. If it doesn't match the byte
// to match, we don't to care any longer.
i := t.dict.buf.rear - dist + m.n
if i < 0 {
i += len(t.dict.buf.data)
}
if t.dict.buf.data[i] != data[m.n] {
// We can't get a longer match. Jump to the next
// distance.
continue
}
n := t.dict.buf.matchLen(dist, data)
switch n {
case 0:
continue
case 1:
if uint32(dist-minDistance) != rep[0] {
continue
}
}
if n > m.n {
m = match{int64(dist), n}
if n == len(data) {
// No better match will be found.
break
}
}
}
if m.n == 0 {
return lit{data[0]}
}
return m
}

@ -0,0 +1,167 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
)
// uint32LE reads an uint32 integer from a byte slice
func uint32LE(b []byte) uint32 {
x := uint32(b[3]) << 24
x |= uint32(b[2]) << 16
x |= uint32(b[1]) << 8
x |= uint32(b[0])
return x
}
// uint64LE converts the uint64 value stored as little endian to an uint64
// value.
func uint64LE(b []byte) uint64 {
x := uint64(b[7]) << 56
x |= uint64(b[6]) << 48
x |= uint64(b[5]) << 40
x |= uint64(b[4]) << 32
x |= uint64(b[3]) << 24
x |= uint64(b[2]) << 16
x |= uint64(b[1]) << 8
x |= uint64(b[0])
return x
}
// putUint32LE puts an uint32 integer into a byte slice that must have at least
// a length of 4 bytes.
func putUint32LE(b []byte, x uint32) {
b[0] = byte(x)
b[1] = byte(x >> 8)
b[2] = byte(x >> 16)
b[3] = byte(x >> 24)
}
// putUint64LE puts the uint64 value into the byte slice as little endian
// value. The byte slice b must have at least place for 8 bytes.
func putUint64LE(b []byte, x uint64) {
b[0] = byte(x)
b[1] = byte(x >> 8)
b[2] = byte(x >> 16)
b[3] = byte(x >> 24)
b[4] = byte(x >> 32)
b[5] = byte(x >> 40)
b[6] = byte(x >> 48)
b[7] = byte(x >> 56)
}
// noHeaderSize defines the value of the length field in the LZMA header.
const noHeaderSize uint64 = 1<<64 - 1
// HeaderLen provides the length of the LZMA file header.
const HeaderLen = 13
// header represents the header of an LZMA file.
type header struct {
properties Properties
dictCap int
// uncompressed size; negative value if no size is given
size int64
}
// marshalBinary marshals the header.
func (h *header) marshalBinary() (data []byte, err error) {
if err = h.properties.verify(); err != nil {
return nil, err
}
if !(0 <= h.dictCap && int64(h.dictCap) <= MaxDictCap) {
return nil, fmt.Errorf("lzma: DictCap %d out of range",
h.dictCap)
}
data = make([]byte, 13)
// property byte
data[0] = h.properties.Code()
// dictionary capacity
putUint32LE(data[1:5], uint32(h.dictCap))
// uncompressed size
var s uint64
if h.size > 0 {
s = uint64(h.size)
} else {
s = noHeaderSize
}
putUint64LE(data[5:], s)
return data, nil
}
// unmarshalBinary unmarshals the header.
func (h *header) unmarshalBinary(data []byte) error {
if len(data) != HeaderLen {
return errors.New("lzma.unmarshalBinary: data has wrong length")
}
// properties
var err error
if h.properties, err = PropertiesForCode(data[0]); err != nil {
return err
}
// dictionary capacity
h.dictCap = int(uint32LE(data[1:]))
if h.dictCap < 0 {
return errors.New(
"LZMA header: dictionary capacity exceeds maximum " +
"integer")
}
// uncompressed size
s := uint64LE(data[5:])
if s == noHeaderSize {
h.size = -1
} else {
h.size = int64(s)
if h.size < 0 {
return errors.New(
"LZMA header: uncompressed size " +
"out of int64 range")
}
}
return nil
}
// validDictCap checks whether the dictionary capacity is correct. This
// is used to weed out wrong file headers.
func validDictCap(dictcap int) bool {
if int64(dictcap) == MaxDictCap {
return true
}
for n := uint(10); n < 32; n++ {
if dictcap == 1<<n {
return true
}
if dictcap == 1<<n+1<<(n-1) {
return true
}
}
return false
}
// ValidHeader checks for a valid LZMA file header. It allows only
// dictionary sizes of 2^n or 2^n+2^(n-1) with n >= 10 or 2^32-1. If
// there is an explicit size it must not exceed 256 GiB. The length of
// the data argument must be HeaderLen.
func ValidHeader(data []byte) bool {
var h header
if err := h.unmarshalBinary(data); err != nil {
return false
}
if !validDictCap(h.dictCap) {
return false
}
return h.size < 0 || h.size <= 1<<38
}

@ -0,0 +1,398 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
"io"
)
const (
// maximum size of compressed data in a chunk
maxCompressed = 1 << 16
// maximum size of uncompressed data in a chunk
maxUncompressed = 1 << 21
)
// chunkType represents the type of an LZMA2 chunk. Note that this
// value is an internal representation and no actual encoding of a LZMA2
// chunk header.
type chunkType byte
// Possible values for the chunk type.
const (
// end of stream
cEOS chunkType = iota
// uncompressed; reset dictionary
cUD
// uncompressed; no reset of dictionary
cU
// LZMA compressed; no reset
cL
// LZMA compressed; reset state
cLR
// LZMA compressed; reset state; new property value
cLRN
// LZMA compressed; reset state; new property value; reset dictionary
cLRND
)
// chunkTypeStrings provide a string representation for the chunk types.
var chunkTypeStrings = [...]string{
cEOS: "EOS",
cU: "U",
cUD: "UD",
cL: "L",
cLR: "LR",
cLRN: "LRN",
cLRND: "LRND",
}
// String returns a string representation of the chunk type.
func (c chunkType) String() string {
if !(cEOS <= c && c <= cLRND) {
return "unknown"
}
return chunkTypeStrings[c]
}
// Actual encodings for the chunk types in the value. Note that the high
// uncompressed size bits are stored in the header byte additionally.
const (
hEOS = 0
hUD = 1
hU = 2
hL = 1 << 7
hLR = 1<<7 | 1<<5
hLRN = 1<<7 | 1<<6
hLRND = 1<<7 | 1<<6 | 1<<5
)
// errHeaderByte indicates an unsupported value for the chunk header
// byte. These bytes starts the variable-length chunk header.
var errHeaderByte = errors.New("lzma: unsupported chunk header byte")
// headerChunkType converts the header byte into a chunk type. It
// ignores the uncompressed size bits in the chunk header byte.
func headerChunkType(h byte) (c chunkType, err error) {
if h&hL == 0 {
// no compression
switch h {
case hEOS:
c = cEOS
case hUD:
c = cUD
case hU:
c = cU
default:
return 0, errHeaderByte
}
return
}
switch h & hLRND {
case hL:
c = cL
case hLR:
c = cLR
case hLRN:
c = cLRN
case hLRND:
c = cLRND
default:
return 0, errHeaderByte
}
return
}
// uncompressedHeaderLen provides the length of an uncompressed header
const uncompressedHeaderLen = 3
// headerLen returns the length of the LZMA2 header for a given chunk
// type.
func headerLen(c chunkType) int {
switch c {
case cEOS:
return 1
case cU, cUD:
return uncompressedHeaderLen
case cL, cLR:
return 5
case cLRN, cLRND:
return 6
}
panic(fmt.Errorf("unsupported chunk type %d", c))
}
// chunkHeader represents the contents of a chunk header.
type chunkHeader struct {
ctype chunkType
uncompressed uint32
compressed uint16
props Properties
}
// String returns a string representation of the chunk header.
func (h *chunkHeader) String() string {
return fmt.Sprintf("%s %d %d %s", h.ctype, h.uncompressed,
h.compressed, &h.props)
}
// UnmarshalBinary reads the content of the chunk header from the data
// slice. The slice must have the correct length.
func (h *chunkHeader) UnmarshalBinary(data []byte) error {
if len(data) == 0 {
return errors.New("no data")
}
c, err := headerChunkType(data[0])
if err != nil {
return err
}
n := headerLen(c)
if len(data) < n {
return errors.New("incomplete data")
}
if len(data) > n {
return errors.New("invalid data length")
}
*h = chunkHeader{ctype: c}
if c == cEOS {
return nil
}
h.uncompressed = uint32(uint16BE(data[1:3]))
if c <= cU {
return nil
}
h.uncompressed |= uint32(data[0]&^hLRND) << 16
h.compressed = uint16BE(data[3:5])
if c <= cLR {
return nil
}
h.props, err = PropertiesForCode(data[5])
return err
}
// MarshalBinary encodes the chunk header value. The function checks
// whether the content of the chunk header is correct.
func (h *chunkHeader) MarshalBinary() (data []byte, err error) {
if h.ctype > cLRND {
return nil, errors.New("invalid chunk type")
}
if err = h.props.verify(); err != nil {
return nil, err
}
data = make([]byte, headerLen(h.ctype))
switch h.ctype {
case cEOS:
return data, nil
case cUD:
data[0] = hUD
case cU:
data[0] = hU
case cL:
data[0] = hL
case cLR:
data[0] = hLR
case cLRN:
data[0] = hLRN
case cLRND:
data[0] = hLRND
}
putUint16BE(data[1:3], uint16(h.uncompressed))
if h.ctype <= cU {
return data, nil
}
data[0] |= byte(h.uncompressed>>16) &^ hLRND
putUint16BE(data[3:5], h.compressed)
if h.ctype <= cLR {
return data, nil
}
data[5] = h.props.Code()
return data, nil
}
// readChunkHeader reads the chunk header from the IO reader.
func readChunkHeader(r io.Reader) (h *chunkHeader, err error) {
p := make([]byte, 1, 6)
if _, err = io.ReadFull(r, p); err != nil {
return
}
c, err := headerChunkType(p[0])
if err != nil {
return
}
p = p[:headerLen(c)]
if _, err = io.ReadFull(r, p[1:]); err != nil {
return
}
h = new(chunkHeader)
if err = h.UnmarshalBinary(p); err != nil {
return nil, err
}
return h, nil
}
// uint16BE converts a big-endian uint16 representation to an uint16
// value.
func uint16BE(p []byte) uint16 {
return uint16(p[0])<<8 | uint16(p[1])
}
// putUint16BE puts the big-endian uint16 presentation into the given
// slice.
func putUint16BE(p []byte, x uint16) {
p[0] = byte(x >> 8)
p[1] = byte(x)
}
// chunkState is used to manage the state of the chunks
type chunkState byte
// start and stop define the initial and terminating state of the chunk
// state
const (
start chunkState = 'S'
stop chunkState = 'T'
)
// errors for the chunk state handling
var (
errChunkType = errors.New("lzma: unexpected chunk type")
errState = errors.New("lzma: wrong chunk state")
)
// next transitions state based on chunk type input
func (c *chunkState) next(ctype chunkType) error {
switch *c {
// start state
case 'S':
switch ctype {
case cEOS:
*c = 'T'
case cUD:
*c = 'R'
case cLRND:
*c = 'L'
default:
return errChunkType
}
// normal LZMA mode
case 'L':
switch ctype {
case cEOS:
*c = 'T'
case cUD:
*c = 'R'
case cU:
*c = 'U'
case cL, cLR, cLRN, cLRND:
break
default:
return errChunkType
}
// reset required
case 'R':
switch ctype {
case cEOS:
*c = 'T'
case cUD, cU:
break
case cLRN, cLRND:
*c = 'L'
default:
return errChunkType
}
// uncompressed
case 'U':
switch ctype {
case cEOS:
*c = 'T'
case cUD:
*c = 'R'
case cU:
break
case cL, cLR, cLRN, cLRND:
*c = 'L'
default:
return errChunkType
}
// terminal state
case 'T':
return errChunkType
default:
return errState
}
return nil
}
// defaultChunkType returns the default chunk type for each chunk state.
func (c chunkState) defaultChunkType() chunkType {
switch c {
case 'S':
return cLRND
case 'L', 'U':
return cL
case 'R':
return cLRN
default:
// no error
return cEOS
}
}
// maxDictCap defines the maximum dictionary capacity supported by the
// LZMA2 dictionary capacity encoding.
const maxDictCap = 1<<32 - 1
// maxDictCapCode defines the maximum dictionary capacity code.
const maxDictCapCode = 40
// The function decodes the dictionary capacity byte, but doesn't change
// for the correct range of the given byte.
func decodeDictCap(c byte) int64 {
return (2 | int64(c)&1) << (11 + (c>>1)&0x1f)
}
// DecodeDictCap decodes the encoded dictionary capacity. The function
// returns an error if the code is out of range.
func DecodeDictCap(c byte) (n int64, err error) {
if c >= maxDictCapCode {
if c == maxDictCapCode {
return maxDictCap, nil
}
return 0, errors.New("lzma: invalid dictionary size code")
}
return decodeDictCap(c), nil
}
// EncodeDictCap encodes a dictionary capacity. The function returns the
// code for the capacity that is greater or equal n. If n exceeds the
// maximum support dictionary capacity, the maximum value is returned.
func EncodeDictCap(n int64) byte {
a, b := byte(0), byte(40)
for a < b {
c := a + (b-a)>>1
m := decodeDictCap(c)
if n <= m {
if n == m {
return c
}
b = c
} else {
a = c + 1
}
}
return a
}

@ -0,0 +1,116 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import "errors"
// maxPosBits defines the number of bits of the position value that are used to
// to compute the posState value. The value is used to select the tree codec
// for length encoding and decoding.
const maxPosBits = 4
// minMatchLen and maxMatchLen give the minimum and maximum values for
// encoding and decoding length values. minMatchLen is also used as base
// for the encoded length values.
const (
minMatchLen = 2
maxMatchLen = minMatchLen + 16 + 256 - 1
)
// lengthCodec support the encoding of the length value.
type lengthCodec struct {
choice [2]prob
low [1 << maxPosBits]treeCodec
mid [1 << maxPosBits]treeCodec
high treeCodec
}
// deepcopy initializes the lc value as deep copy of the source value.
func (lc *lengthCodec) deepcopy(src *lengthCodec) {
if lc == src {
return
}
lc.choice = src.choice
for i := range lc.low {
lc.low[i].deepcopy(&src.low[i])
}
for i := range lc.mid {
lc.mid[i].deepcopy(&src.mid[i])
}
lc.high.deepcopy(&src.high)
}
// init initializes a new length codec.
func (lc *lengthCodec) init() {
for i := range lc.choice {
lc.choice[i] = probInit
}
for i := range lc.low {
lc.low[i] = makeTreeCodec(3)
}
for i := range lc.mid {
lc.mid[i] = makeTreeCodec(3)
}
lc.high = makeTreeCodec(8)
}
// Encode encodes the length offset. The length offset l can be compute by
// subtracting minMatchLen (2) from the actual length.
//
// l = length - minMatchLen
//
func (lc *lengthCodec) Encode(e *rangeEncoder, l uint32, posState uint32,
) (err error) {
if l > maxMatchLen-minMatchLen {
return errors.New("lengthCodec.Encode: l out of range")
}
if l < 8 {
if err = lc.choice[0].Encode(e, 0); err != nil {
return
}
return lc.low[posState].Encode(e, l)
}
if err = lc.choice[0].Encode(e, 1); err != nil {
return
}
if l < 16 {
if err = lc.choice[1].Encode(e, 0); err != nil {
return
}
return lc.mid[posState].Encode(e, l-8)
}
if err = lc.choice[1].Encode(e, 1); err != nil {
return
}
if err = lc.high.Encode(e, l-16); err != nil {
return
}
return nil
}
// Decode reads the length offset. Add minMatchLen to compute the actual length
// to the length offset l.
func (lc *lengthCodec) Decode(d *rangeDecoder, posState uint32,
) (l uint32, err error) {
var b uint32
if b, err = lc.choice[0].Decode(d); err != nil {
return
}
if b == 0 {
l, err = lc.low[posState].Decode(d)
return
}
if b, err = lc.choice[1].Decode(d); err != nil {
return
}
if b == 0 {
l, err = lc.mid[posState].Decode(d)
l += 8
return
}
l, err = lc.high.Decode(d)
l += 16
return
}

@ -0,0 +1,125 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
// literalCodec supports the encoding of literal. It provides 768 probability
// values per literal state. The upper 512 probabilities are used with the
// context of a match bit.
type literalCodec struct {
probs []prob
}
// deepcopy initializes literal codec c as a deep copy of the source.
func (c *literalCodec) deepcopy(src *literalCodec) {
if c == src {
return
}
c.probs = make([]prob, len(src.probs))
copy(c.probs, src.probs)
}
// init initializes the literal codec.
func (c *literalCodec) init(lc, lp int) {
switch {
case !(minLC <= lc && lc <= maxLC):
panic("lc out of range")
case !(minLP <= lp && lp <= maxLP):
panic("lp out of range")
}
c.probs = make([]prob, 0x300<<uint(lc+lp))
for i := range c.probs {
c.probs[i] = probInit
}
}
// Encode encodes the byte s using a range encoder as well as the current LZMA
// encoder state, a match byte and the literal state.
func (c *literalCodec) Encode(e *rangeEncoder, s byte,
state uint32, match byte, litState uint32,
) (err error) {
k := litState * 0x300
probs := c.probs[k : k+0x300]
symbol := uint32(1)
r := uint32(s)
if state >= 7 {
m := uint32(match)
for {
matchBit := (m >> 7) & 1
m <<= 1
bit := (r >> 7) & 1
r <<= 1
i := ((1 + matchBit) << 8) | symbol
if err = probs[i].Encode(e, bit); err != nil {
return
}
symbol = (symbol << 1) | bit
if matchBit != bit {
break
}
if symbol >= 0x100 {
break
}
}
}
for symbol < 0x100 {
bit := (r >> 7) & 1
r <<= 1
if err = probs[symbol].Encode(e, bit); err != nil {
return
}
symbol = (symbol << 1) | bit
}
return nil
}
// Decode decodes a literal byte using the range decoder as well as the LZMA
// state, a match byte, and the literal state.
func (c *literalCodec) Decode(d *rangeDecoder,
state uint32, match byte, litState uint32,
) (s byte, err error) {
k := litState * 0x300
probs := c.probs[k : k+0x300]
symbol := uint32(1)
if state >= 7 {
m := uint32(match)
for {
matchBit := (m >> 7) & 1
m <<= 1
i := ((1 + matchBit) << 8) | symbol
bit, err := d.DecodeBit(&probs[i])
if err != nil {
return 0, err
}
symbol = (symbol << 1) | bit
if matchBit != bit {
break
}
if symbol >= 0x100 {
break
}
}
}
for symbol < 0x100 {
bit, err := d.DecodeBit(&probs[symbol])
if err != nil {
return 0, err
}
symbol = (symbol << 1) | bit
}
s = byte(symbol - 0x100)
return s, nil
}
// minLC and maxLC define the range for LC values.
const (
minLC = 0
maxLC = 8
)
// minLC and maxLC define the range for LP values.
const (
minLP = 0
maxLP = 4
)

@ -0,0 +1,52 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import "errors"
// MatchAlgorithm identifies an algorithm to find matches in the
// dictionary.
type MatchAlgorithm byte
// Supported matcher algorithms.
const (
HashTable4 MatchAlgorithm = iota
BinaryTree
)
// maStrings are used by the String method.
var maStrings = map[MatchAlgorithm]string{
HashTable4: "HashTable4",
BinaryTree: "BinaryTree",
}
// String returns a string representation of the Matcher.
func (a MatchAlgorithm) String() string {
if s, ok := maStrings[a]; ok {
return s
}
return "unknown"
}
var errUnsupportedMatchAlgorithm = errors.New(
"lzma: unsupported match algorithm value")
// verify checks whether the matcher value is supported.
func (a MatchAlgorithm) verify() error {
if _, ok := maStrings[a]; !ok {
return errUnsupportedMatchAlgorithm
}
return nil
}
func (a MatchAlgorithm) new(dictCap int) (m matcher, err error) {
switch a {
case HashTable4:
return newHashTable(dictCap, 4)
case BinaryTree:
return newBinTree(dictCap)
}
return nil, errUnsupportedMatchAlgorithm
}

@ -0,0 +1,55 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"fmt"
"unicode"
)
// operation represents an operation on the dictionary during encoding or
// decoding.
type operation interface {
Len() int
}
// rep represents a repetition at the given distance and the given length
type match struct {
// supports all possible distance values, including the eos marker
distance int64
// length
n int
}
// Len returns the number of bytes matched.
func (m match) Len() int {
return m.n
}
// String returns a string representation for the repetition.
func (m match) String() string {
return fmt.Sprintf("M{%d,%d}", m.distance, m.n)
}
// lit represents a single byte literal.
type lit struct {
b byte
}
// Len returns 1 for the single byte literal.
func (l lit) Len() int {
return 1
}
// String returns a string representation for the literal.
func (l lit) String() string {
var c byte
if unicode.IsPrint(rune(l.b)) {
c = l.b
} else {
c = '.'
}
return fmt.Sprintf("L{%c/%02x}", c, l.b)
}

@ -0,0 +1,53 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
// movebits defines the number of bits used for the updates of probability
// values.
const movebits = 5
// probbits defines the number of bits of a probability value.
const probbits = 11
// probInit defines 0.5 as initial value for prob values.
const probInit prob = 1 << (probbits - 1)
// Type prob represents probabilities. The type can also be used to encode and
// decode single bits.
type prob uint16
// Dec decreases the probability. The decrease is proportional to the
// probability value.
func (p *prob) dec() {
*p -= *p >> movebits
}
// Inc increases the probability. The Increase is proportional to the
// difference of 1 and the probability value.
func (p *prob) inc() {
*p += ((1 << probbits) - *p) >> movebits
}
// Computes the new bound for a given range using the probability value.
func (p prob) bound(r uint32) uint32 {
return (r >> probbits) * uint32(p)
}
// Bits returns 1. One is the number of bits that can be encoded or decoded
// with a single prob value.
func (p prob) Bits() int {
return 1
}
// Encode encodes the least-significant bit of v. Note that the p value will be
// changed.
func (p *prob) Encode(e *rangeEncoder, v uint32) error {
return e.EncodeBit(v, p)
}
// Decode decodes a single bit. Note that the p value will change.
func (p *prob) Decode(d *rangeDecoder) (v uint32, err error) {
return d.DecodeBit(p)
}

@ -0,0 +1,69 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
)
// maximum and minimum values for the LZMA properties.
const (
minPB = 0
maxPB = 4
)
// maxPropertyCode is the possible maximum of a properties code byte.
const maxPropertyCode = (maxPB+1)*(maxLP+1)*(maxLC+1) - 1
// Properties contains the parameters LC, LP and PB. The parameter LC
// defines the number of literal context bits; parameter LP the number
// of literal position bits and PB the number of position bits.
type Properties struct {
LC int
LP int
PB int
}
// String returns the properties in a string representation.
func (p *Properties) String() string {
return fmt.Sprintf("LC %d LP %d PB %d", p.LC, p.LP, p.PB)
}
// PropertiesForCode converts a properties code byte into a Properties value.
func PropertiesForCode(code byte) (p Properties, err error) {
if code > maxPropertyCode {
return p, errors.New("lzma: invalid properties code")
}
p.LC = int(code % 9)
code /= 9
p.LP = int(code % 5)
code /= 5
p.PB = int(code % 5)
return p, err
}
// verify checks the properties for correctness.
func (p *Properties) verify() error {
if p == nil {
return errors.New("lzma: properties are nil")
}
if !(minLC <= p.LC && p.LC <= maxLC) {
return errors.New("lzma: lc out of range")
}
if !(minLP <= p.LP && p.LP <= maxLP) {
return errors.New("lzma: lp out of range")
}
if !(minPB <= p.PB && p.PB <= maxPB) {
return errors.New("lzma: pb out of range")
}
return nil
}
// Code converts the properties to a byte. The function assumes that
// the properties components are all in range.
func (p Properties) Code() byte {
return byte((p.PB*5+p.LP)*9 + p.LC)
}

@ -0,0 +1,222 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"io"
)
// rangeEncoder implements range encoding of single bits. The low value can
// overflow therefore we need uint64. The cache value is used to handle
// overflows.
type rangeEncoder struct {
lbw *LimitedByteWriter
nrange uint32
low uint64
cacheLen int64
cache byte
}
// maxInt64 provides the maximal value of the int64 type
const maxInt64 = 1<<63 - 1
// newRangeEncoder creates a new range encoder.
func newRangeEncoder(bw io.ByteWriter) (re *rangeEncoder, err error) {
lbw, ok := bw.(*LimitedByteWriter)
if !ok {
lbw = &LimitedByteWriter{BW: bw, N: maxInt64}
}
return &rangeEncoder{
lbw: lbw,
nrange: 0xffffffff,
cacheLen: 1}, nil
}
// Available returns the number of bytes that still can be written. The
// method takes the bytes that will be currently written by Close into
// account.
func (e *rangeEncoder) Available() int64 {
return e.lbw.N - (e.cacheLen + 4)
}
// writeByte writes a single byte to the underlying writer. An error is
// returned if the limit is reached. The written byte will be counted if
// the underlying writer doesn't return an error.
func (e *rangeEncoder) writeByte(c byte) error {
if e.Available() < 1 {
return ErrLimit
}
return e.lbw.WriteByte(c)
}
// DirectEncodeBit encodes the least-significant bit of b with probability 1/2.
func (e *rangeEncoder) DirectEncodeBit(b uint32) error {
e.nrange >>= 1
e.low += uint64(e.nrange) & (0 - (uint64(b) & 1))
// normalize
const top = 1 << 24
if e.nrange >= top {
return nil
}
e.nrange <<= 8
return e.shiftLow()
}
// EncodeBit encodes the least significant bit of b. The p value will be
// updated by the function depending on the bit encoded.
func (e *rangeEncoder) EncodeBit(b uint32, p *prob) error {
bound := p.bound(e.nrange)
if b&1 == 0 {
e.nrange = bound
p.inc()
} else {
e.low += uint64(bound)
e.nrange -= bound
p.dec()
}
// normalize
const top = 1 << 24
if e.nrange >= top {
return nil
}
e.nrange <<= 8
return e.shiftLow()
}
// Close writes a complete copy of the low value.
func (e *rangeEncoder) Close() error {
for i := 0; i < 5; i++ {
if err := e.shiftLow(); err != nil {
return err
}
}
return nil
}
// shiftLow shifts the low value for 8 bit. The shifted byte is written into
// the byte writer. The cache value is used to handle overflows.
func (e *rangeEncoder) shiftLow() error {
if uint32(e.low) < 0xff000000 || (e.low>>32) != 0 {
tmp := e.cache
for {
err := e.writeByte(tmp + byte(e.low>>32))
if err != nil {
return err
}
tmp = 0xff
e.cacheLen--
if e.cacheLen <= 0 {
if e.cacheLen < 0 {
panic("negative cacheLen")
}
break
}
}
e.cache = byte(uint32(e.low) >> 24)
}
e.cacheLen++
e.low = uint64(uint32(e.low) << 8)
return nil
}
// rangeDecoder decodes single bits of the range encoding stream.
type rangeDecoder struct {
br io.ByteReader
nrange uint32
code uint32
}
// newRangeDecoder initializes a range decoder. It reads five bytes from the
// reader and therefore may return an error.
func newRangeDecoder(br io.ByteReader) (d *rangeDecoder, err error) {
d = &rangeDecoder{br: br, nrange: 0xffffffff}
b, err := d.br.ReadByte()
if err != nil {
return nil, err
}
if b != 0 {
return nil, errors.New("newRangeDecoder: first byte not zero")
}
for i := 0; i < 4; i++ {
if err = d.updateCode(); err != nil {
return nil, err
}
}
if d.code >= d.nrange {
return nil, errors.New("newRangeDecoder: d.code >= d.nrange")
}
return d, nil
}
// possiblyAtEnd checks whether the decoder may be at the end of the stream.
func (d *rangeDecoder) possiblyAtEnd() bool {
return d.code == 0
}
// DirectDecodeBit decodes a bit with probability 1/2. The return value b will
// contain the bit at the least-significant position. All other bits will be
// zero.
func (d *rangeDecoder) DirectDecodeBit() (b uint32, err error) {
d.nrange >>= 1
d.code -= d.nrange
t := 0 - (d.code >> 31)
d.code += d.nrange & t
b = (t + 1) & 1
// d.code will stay less then d.nrange
// normalize
// assume d.code < d.nrange
const top = 1 << 24
if d.nrange >= top {
return b, nil
}
d.nrange <<= 8
// d.code < d.nrange will be maintained
return b, d.updateCode()
}
// decodeBit decodes a single bit. The bit will be returned at the
// least-significant position. All other bits will be zero. The probability
// value will be updated.
func (d *rangeDecoder) DecodeBit(p *prob) (b uint32, err error) {
bound := p.bound(d.nrange)
if d.code < bound {
d.nrange = bound
p.inc()
b = 0
} else {
d.code -= bound
d.nrange -= bound
p.dec()
b = 1
}
// normalize
// assume d.code < d.nrange
const top = 1 << 24
if d.nrange >= top {
return b, nil
}
d.nrange <<= 8
// d.code < d.nrange will be maintained
return b, d.updateCode()
}
// updateCode reads a new byte into the code.
func (d *rangeDecoder) updateCode() error {
b, err := d.br.ReadByte()
if err != nil {
return err
}
d.code = (d.code << 8) | uint32(b)
return nil
}

@ -0,0 +1,100 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package lzma supports the decoding and encoding of LZMA streams.
// Reader and Writer support the classic LZMA format. Reader2 and
// Writer2 support the decoding and encoding of LZMA2 streams.
//
// The package is written completely in Go and doesn't rely on any external
// library.
package lzma
import (
"errors"
"io"
)
// ReaderConfig stores the parameters for the reader of the classic LZMA
// format.
type ReaderConfig struct {
DictCap int
}
// fill converts the zero values of the configuration to the default values.
func (c *ReaderConfig) fill() {
if c.DictCap == 0 {
c.DictCap = 8 * 1024 * 1024
}
}
// Verify checks the reader configuration for errors. Zero values will
// be replaced by default values.
func (c *ReaderConfig) Verify() error {
c.fill()
if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
return errors.New("lzma: dictionary capacity is out of range")
}
return nil
}
// Reader provides a reader for LZMA files or streams.
type Reader struct {
lzma io.Reader
h header
d *decoder
}
// NewReader creates a new reader for an LZMA stream using the classic
// format. NewReader reads and checks the header of the LZMA stream.
func NewReader(lzma io.Reader) (r *Reader, err error) {
return ReaderConfig{}.NewReader(lzma)
}
// NewReader creates a new reader for an LZMA stream in the classic
// format. The function reads and verifies the the header of the LZMA
// stream.
func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) {
if err = c.Verify(); err != nil {
return nil, err
}
data := make([]byte, HeaderLen)
if _, err := io.ReadFull(lzma, data); err != nil {
if err == io.EOF {
return nil, errors.New("lzma: unexpected EOF")
}
return nil, err
}
r = &Reader{lzma: lzma}
if err = r.h.unmarshalBinary(data); err != nil {
return nil, err
}
if r.h.dictCap < MinDictCap {
return nil, errors.New("lzma: dictionary capacity too small")
}
dictCap := r.h.dictCap
if c.DictCap > dictCap {
dictCap = c.DictCap
}
state := newState(r.h.properties)
dict, err := newDecoderDict(dictCap)
if err != nil {
return nil, err
}
r.d, err = newDecoder(ByteReader(lzma), state, dict, r.h.size)
if err != nil {
return nil, err
}
return r, nil
}
// EOSMarker indicates that an EOS marker has been encountered.
func (r *Reader) EOSMarker() bool {
return r.d.eosMarker
}
// Read returns uncompressed data.
func (r *Reader) Read(p []byte) (n int, err error) {
return r.d.Read(p)
}

@ -0,0 +1,231 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"io"
"github.com/ulikunitz/xz/internal/xlog"
)
// Reader2Config stores the parameters for the LZMA2 reader.
// format.
type Reader2Config struct {
DictCap int
}
// fill converts the zero values of the configuration to the default values.
func (c *Reader2Config) fill() {
if c.DictCap == 0 {
c.DictCap = 8 * 1024 * 1024
}
}
// Verify checks the reader configuration for errors. Zero configuration values
// will be replaced by default values.
func (c *Reader2Config) Verify() error {
c.fill()
if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
return errors.New("lzma: dictionary capacity is out of range")
}
return nil
}
// Reader2 supports the reading of LZMA2 chunk sequences. Note that the
// first chunk should have a dictionary reset and the first compressed
// chunk a properties reset. The chunk sequence may not be terminated by
// an end-of-stream chunk.
type Reader2 struct {
r io.Reader
err error
dict *decoderDict
ur *uncompressedReader
decoder *decoder
chunkReader io.Reader
cstate chunkState
}
// NewReader2 creates a reader for an LZMA2 chunk sequence.
func NewReader2(lzma2 io.Reader) (r *Reader2, err error) {
return Reader2Config{}.NewReader2(lzma2)
}
// NewReader2 creates an LZMA2 reader using the given configuration.
func (c Reader2Config) NewReader2(lzma2 io.Reader) (r *Reader2, err error) {
if err = c.Verify(); err != nil {
return nil, err
}
r = &Reader2{r: lzma2, cstate: start}
r.dict, err = newDecoderDict(c.DictCap)
if err != nil {
return nil, err
}
if err = r.startChunk(); err != nil {
r.err = err
}
return r, nil
}
// uncompressed tests whether the chunk type specifies an uncompressed
// chunk.
func uncompressed(ctype chunkType) bool {
return ctype == cU || ctype == cUD
}
// startChunk parses a new chunk.
func (r *Reader2) startChunk() error {
r.chunkReader = nil
header, err := readChunkHeader(r.r)
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err
}
xlog.Debugf("chunk header %v", header)
if err = r.cstate.next(header.ctype); err != nil {
return err
}
if r.cstate == stop {
return io.EOF
}
if header.ctype == cUD || header.ctype == cLRND {
r.dict.Reset()
}
size := int64(header.uncompressed) + 1
if uncompressed(header.ctype) {
if r.ur != nil {
r.ur.Reopen(r.r, size)
} else {
r.ur = newUncompressedReader(r.r, r.dict, size)
}
r.chunkReader = r.ur
return nil
}
br := ByteReader(io.LimitReader(r.r, int64(header.compressed)+1))
if r.decoder == nil {
state := newState(header.props)
r.decoder, err = newDecoder(br, state, r.dict, size)
if err != nil {
return err
}
r.chunkReader = r.decoder
return nil
}
switch header.ctype {
case cLR:
r.decoder.State.Reset()
case cLRN, cLRND:
r.decoder.State = newState(header.props)
}
err = r.decoder.Reopen(br, size)
if err != nil {
return err
}
r.chunkReader = r.decoder
return nil
}
// Read reads data from the LZMA2 chunk sequence.
func (r *Reader2) Read(p []byte) (n int, err error) {
if r.err != nil {
return 0, r.err
}
for n < len(p) {
var k int
k, err = r.chunkReader.Read(p[n:])
n += k
if err != nil {
if err == io.EOF {
err = r.startChunk()
if err == nil {
continue
}
}
r.err = err
return n, err
}
if k == 0 {
r.err = errors.New("lzma: Reader2 doesn't get data")
return n, r.err
}
}
return n, nil
}
// EOS returns whether the LZMA2 stream has been terminated by an
// end-of-stream chunk.
func (r *Reader2) EOS() bool {
return r.cstate == stop
}
// uncompressedReader is used to read uncompressed chunks.
type uncompressedReader struct {
lr io.LimitedReader
Dict *decoderDict
eof bool
err error
}
// newUncompressedReader initializes a new uncompressedReader.
func newUncompressedReader(r io.Reader, dict *decoderDict, size int64) *uncompressedReader {
ur := &uncompressedReader{
lr: io.LimitedReader{R: r, N: size},
Dict: dict,
}
return ur
}
// Reopen reinitializes an uncompressed reader.
func (ur *uncompressedReader) Reopen(r io.Reader, size int64) {
ur.err = nil
ur.eof = false
ur.lr = io.LimitedReader{R: r, N: size}
}
// fill reads uncompressed data into the dictionary.
func (ur *uncompressedReader) fill() error {
if !ur.eof {
n, err := io.CopyN(ur.Dict, &ur.lr, int64(ur.Dict.Available()))
if err != io.EOF {
return err
}
ur.eof = true
if n > 0 {
return nil
}
}
if ur.lr.N != 0 {
return io.ErrUnexpectedEOF
}
return io.EOF
}
// Read reads uncompressed data from the limited reader.
func (ur *uncompressedReader) Read(p []byte) (n int, err error) {
if ur.err != nil {
return 0, ur.err
}
for {
var k int
k, err = ur.Dict.Read(p[n:])
n += k
if n >= len(p) {
return n, nil
}
if err != nil {
break
}
err = ur.fill()
if err != nil {
break
}
}
ur.err = err
return n, err
}

@ -0,0 +1,145 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
// states defines the overall state count
const states = 12
// State maintains the full state of the operation encoding or decoding
// process.
type state struct {
rep [4]uint32
isMatch [states << maxPosBits]prob
isRepG0Long [states << maxPosBits]prob
isRep [states]prob
isRepG0 [states]prob
isRepG1 [states]prob
isRepG2 [states]prob
litCodec literalCodec
lenCodec lengthCodec
repLenCodec lengthCodec
distCodec distCodec
state uint32
posBitMask uint32
Properties Properties
}
// initProbSlice initializes a slice of probabilities.
func initProbSlice(p []prob) {
for i := range p {
p[i] = probInit
}
}
// Reset sets all state information to the original values.
func (s *state) Reset() {
p := s.Properties
*s = state{
Properties: p,
// dict: s.dict,
posBitMask: (uint32(1) << uint(p.PB)) - 1,
}
initProbSlice(s.isMatch[:])
initProbSlice(s.isRep[:])
initProbSlice(s.isRepG0[:])
initProbSlice(s.isRepG1[:])
initProbSlice(s.isRepG2[:])
initProbSlice(s.isRepG0Long[:])
s.litCodec.init(p.LC, p.LP)
s.lenCodec.init()
s.repLenCodec.init()
s.distCodec.init()
}
// newState creates a new state from the give Properties.
func newState(p Properties) *state {
s := &state{Properties: p}
s.Reset()
return s
}
// deepcopy initializes s as a deep copy of the source.
func (s *state) deepcopy(src *state) {
if s == src {
return
}
s.rep = src.rep
s.isMatch = src.isMatch
s.isRepG0Long = src.isRepG0Long
s.isRep = src.isRep
s.isRepG0 = src.isRepG0
s.isRepG1 = src.isRepG1
s.isRepG2 = src.isRepG2
s.litCodec.deepcopy(&src.litCodec)
s.lenCodec.deepcopy(&src.lenCodec)
s.repLenCodec.deepcopy(&src.repLenCodec)
s.distCodec.deepcopy(&src.distCodec)
s.state = src.state
s.posBitMask = src.posBitMask
s.Properties = src.Properties
}
// cloneState creates a new clone of the give state.
func cloneState(src *state) *state {
s := new(state)
s.deepcopy(src)
return s
}
// updateStateLiteral updates the state for a literal.
func (s *state) updateStateLiteral() {
switch {
case s.state < 4:
s.state = 0
return
case s.state < 10:
s.state -= 3
return
}
s.state -= 6
}
// updateStateMatch updates the state for a match.
func (s *state) updateStateMatch() {
if s.state < 7 {
s.state = 7
} else {
s.state = 10
}
}
// updateStateRep updates the state for a repetition.
func (s *state) updateStateRep() {
if s.state < 7 {
s.state = 8
} else {
s.state = 11
}
}
// updateStateShortRep updates the state for a short repetition.
func (s *state) updateStateShortRep() {
if s.state < 7 {
s.state = 9
} else {
s.state = 11
}
}
// states computes the states of the operation codec.
func (s *state) states(dictHead int64) (state1, state2, posState uint32) {
state1 = s.state
posState = uint32(dictHead) & s.posBitMask
state2 = (s.state << maxPosBits) | posState
return
}
// litState computes the literal state.
func (s *state) litState(prev byte, dictHead int64) uint32 {
lp, lc := uint(s.Properties.LP), uint(s.Properties.LC)
litState := ((uint32(dictHead) & ((1 << lp) - 1)) << lc) |
(uint32(prev) >> (8 - lc))
return litState
}

@ -0,0 +1,133 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
// treeCodec encodes or decodes values with a fixed bit size. It is using a
// tree of probability value. The root of the tree is the most-significant bit.
type treeCodec struct {
probTree
}
// makeTreeCodec makes a tree codec. The bits value must be inside the range
// [1,32].
func makeTreeCodec(bits int) treeCodec {
return treeCodec{makeProbTree(bits)}
}
// deepcopy initializes tc as a deep copy of the source.
func (tc *treeCodec) deepcopy(src *treeCodec) {
tc.probTree.deepcopy(&src.probTree)
}
// Encode uses the range encoder to encode a fixed-bit-size value.
func (tc *treeCodec) Encode(e *rangeEncoder, v uint32) (err error) {
m := uint32(1)
for i := int(tc.bits) - 1; i >= 0; i-- {
b := (v >> uint(i)) & 1
if err := e.EncodeBit(b, &tc.probs[m]); err != nil {
return err
}
m = (m << 1) | b
}
return nil
}
// Decodes uses the range decoder to decode a fixed-bit-size value. Errors may
// be caused by the range decoder.
func (tc *treeCodec) Decode(d *rangeDecoder) (v uint32, err error) {
m := uint32(1)
for j := 0; j < int(tc.bits); j++ {
b, err := d.DecodeBit(&tc.probs[m])
if err != nil {
return 0, err
}
m = (m << 1) | b
}
return m - (1 << uint(tc.bits)), nil
}
// treeReverseCodec is another tree codec, where the least-significant bit is
// the start of the probability tree.
type treeReverseCodec struct {
probTree
}
// deepcopy initializes the treeReverseCodec as a deep copy of the
// source.
func (tc *treeReverseCodec) deepcopy(src *treeReverseCodec) {
tc.probTree.deepcopy(&src.probTree)
}
// makeTreeReverseCodec creates treeReverseCodec value. The bits argument must
// be in the range [1,32].
func makeTreeReverseCodec(bits int) treeReverseCodec {
return treeReverseCodec{makeProbTree(bits)}
}
// Encode uses range encoder to encode a fixed-bit-size value. The range
// encoder may cause errors.
func (tc *treeReverseCodec) Encode(v uint32, e *rangeEncoder) (err error) {
m := uint32(1)
for i := uint(0); i < uint(tc.bits); i++ {
b := (v >> i) & 1
if err := e.EncodeBit(b, &tc.probs[m]); err != nil {
return err
}
m = (m << 1) | b
}
return nil
}
// Decodes uses the range decoder to decode a fixed-bit-size value. Errors
// returned by the range decoder will be returned.
func (tc *treeReverseCodec) Decode(d *rangeDecoder) (v uint32, err error) {
m := uint32(1)
for j := uint(0); j < uint(tc.bits); j++ {
b, err := d.DecodeBit(&tc.probs[m])
if err != nil {
return 0, err
}
m = (m << 1) | b
v |= b << j
}
return v, nil
}
// probTree stores enough probability values to be used by the treeEncode and
// treeDecode methods of the range coder types.
type probTree struct {
probs []prob
bits byte
}
// deepcopy initializes the probTree value as a deep copy of the source.
func (t *probTree) deepcopy(src *probTree) {
if t == src {
return
}
t.probs = make([]prob, len(src.probs))
copy(t.probs, src.probs)
t.bits = src.bits
}
// makeProbTree initializes a probTree structure.
func makeProbTree(bits int) probTree {
if !(1 <= bits && bits <= 32) {
panic("bits outside of range [1,32]")
}
t := probTree{
bits: byte(bits),
probs: make([]prob, 1<<uint(bits)),
}
for i := range t.probs {
t.probs[i] = probInit
}
return t
}
// Bits provides the number of bits for the values to de- or encode.
func (t *probTree) Bits() int {
return int(t.bits)
}

@ -0,0 +1,209 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"bufio"
"errors"
"io"
)
// MinDictCap and MaxDictCap provide the range of supported dictionary
// capacities.
const (
MinDictCap = 1 << 12
MaxDictCap = 1<<32 - 1
)
// WriterConfig defines the configuration parameter for a writer.
type WriterConfig struct {
// Properties for the encoding. If the it is nil the value
// {LC: 3, LP: 0, PB: 2} will be chosen.
Properties *Properties
// The capacity of the dictionary. If DictCap is zero, the value
// 8 MiB will be chosen.
DictCap int
// Size of the lookahead buffer; value 0 indicates default size
// 4096
BufSize int
// Match algorithm
Matcher MatchAlgorithm
// SizeInHeader indicates that the header will contain an
// explicit size.
SizeInHeader bool
// Size of the data to be encoded. A positive value will imply
// than an explicit size will be set in the header.
Size int64
// EOSMarker requests whether the EOSMarker needs to be written.
// If no explicit size is been given the EOSMarker will be
// set automatically.
EOSMarker bool
}
// fill converts zero-value fields to their explicit default values.
func (c *WriterConfig) fill() {
if c.Properties == nil {
c.Properties = &Properties{LC: 3, LP: 0, PB: 2}
}
if c.DictCap == 0 {
c.DictCap = 8 * 1024 * 1024
}
if c.BufSize == 0 {
c.BufSize = 4096
}
if c.Size > 0 {
c.SizeInHeader = true
}
if !c.SizeInHeader {
c.EOSMarker = true
}
}
// Verify checks WriterConfig for errors. Verify will replace zero
// values with default values.
func (c *WriterConfig) Verify() error {
c.fill()
var err error
if c == nil {
return errors.New("lzma: WriterConfig is nil")
}
if c.Properties == nil {
return errors.New("lzma: WriterConfig has no Properties set")
}
if err = c.Properties.verify(); err != nil {
return err
}
if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
return errors.New("lzma: dictionary capacity is out of range")
}
if !(maxMatchLen <= c.BufSize) {
return errors.New("lzma: lookahead buffer size too small")
}
if c.SizeInHeader {
if c.Size < 0 {
return errors.New("lzma: negative size not supported")
}
} else if !c.EOSMarker {
return errors.New("lzma: EOS marker is required")
}
if err = c.Matcher.verify(); err != nil {
return err
}
return nil
}
// header returns the header structure for this configuration.
func (c *WriterConfig) header() header {
h := header{
properties: *c.Properties,
dictCap: c.DictCap,
size: -1,
}
if c.SizeInHeader {
h.size = c.Size
}
return h
}
// Writer writes an LZMA stream in the classic format.
type Writer struct {
h header
bw io.ByteWriter
buf *bufio.Writer
e *encoder
}
// NewWriter creates a new LZMA writer for the classic format. The
// method will write the header to the underlying stream.
func (c WriterConfig) NewWriter(lzma io.Writer) (w *Writer, err error) {
if err = c.Verify(); err != nil {
return nil, err
}
w = &Writer{h: c.header()}
var ok bool
w.bw, ok = lzma.(io.ByteWriter)
if !ok {
w.buf = bufio.NewWriter(lzma)
w.bw = w.buf
}
state := newState(w.h.properties)
m, err := c.Matcher.new(w.h.dictCap)
if err != nil {
return nil, err
}
dict, err := newEncoderDict(w.h.dictCap, c.BufSize, m)
if err != nil {
return nil, err
}
var flags encoderFlags
if c.EOSMarker {
flags = eosMarker
}
if w.e, err = newEncoder(w.bw, state, dict, flags); err != nil {
return nil, err
}
if err = w.writeHeader(); err != nil {
return nil, err
}
return w, nil
}
// NewWriter creates a new LZMA writer using the classic format. The
// function writes the header to the underlying stream.
func NewWriter(lzma io.Writer) (w *Writer, err error) {
return WriterConfig{}.NewWriter(lzma)
}
// writeHeader writes the LZMA header into the stream.
func (w *Writer) writeHeader() error {
data, err := w.h.marshalBinary()
if err != nil {
return err
}
_, err = w.bw.(io.Writer).Write(data)
return err
}
// Write puts data into the Writer.
func (w *Writer) Write(p []byte) (n int, err error) {
if w.h.size >= 0 {
m := w.h.size
m -= w.e.Compressed() + int64(w.e.dict.Buffered())
if m < 0 {
m = 0
}
if m < int64(len(p)) {
p = p[:m]
err = ErrNoSpace
}
}
var werr error
if n, werr = w.e.Write(p); werr != nil {
err = werr
}
return n, err
}
// Close closes the writer stream. It ensures that all data from the
// buffer will be compressed and the LZMA stream will be finished.
func (w *Writer) Close() error {
if w.h.size >= 0 {
n := w.e.Compressed() + int64(w.e.dict.Buffered())
if n != w.h.size {
return errSize
}
}
err := w.e.Close()
if w.buf != nil {
ferr := w.buf.Flush()
if err == nil {
err = ferr
}
}
return err
}

@ -0,0 +1,305 @@
// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"bytes"
"errors"
"io"
)
// Writer2Config is used to create a Writer2 using parameters.
type Writer2Config struct {
// The properties for the encoding. If the it is nil the value
// {LC: 3, LP: 0, PB: 2} will be chosen.
Properties *Properties
// The capacity of the dictionary. If DictCap is zero, the value
// 8 MiB will be chosen.
DictCap int
// Size of the lookahead buffer; value 0 indicates default size
// 4096
BufSize int
// Match algorithm
Matcher MatchAlgorithm
}
// fill replaces zero values with default values.
func (c *Writer2Config) fill() {
if c.Properties == nil {
c.Properties = &Properties{LC: 3, LP: 0, PB: 2}
}
if c.DictCap == 0 {
c.DictCap = 8 * 1024 * 1024
}
if c.BufSize == 0 {
c.BufSize = 4096
}
}
// Verify checks the Writer2Config for correctness. Zero values will be
// replaced by default values.
func (c *Writer2Config) Verify() error {
c.fill()
var err error
if c == nil {
return errors.New("lzma: WriterConfig is nil")
}
if c.Properties == nil {
return errors.New("lzma: WriterConfig has no Properties set")
}
if err = c.Properties.verify(); err != nil {
return err
}
if !(MinDictCap <= c.DictCap && int64(c.DictCap) <= MaxDictCap) {
return errors.New("lzma: dictionary capacity is out of range")
}
if !(maxMatchLen <= c.BufSize) {
return errors.New("lzma: lookahead buffer size too small")
}
if c.Properties.LC+c.Properties.LP > 4 {
return errors.New("lzma: sum of lc and lp exceeds 4")
}
if err = c.Matcher.verify(); err != nil {
return err
}
return nil
}
// Writer2 supports the creation of an LZMA2 stream. But note that
// written data is buffered, so call Flush or Close to write data to the
// underlying writer. The Close method writes the end-of-stream marker
// to the stream. So you may be able to concatenate the output of two
// writers as long the output of the first writer has only been flushed
// but not closed.
//
// Any change to the fields Properties, DictCap must be done before the
// first call to Write, Flush or Close.
type Writer2 struct {
w io.Writer
start *state
encoder *encoder
cstate chunkState
ctype chunkType
buf bytes.Buffer
lbw LimitedByteWriter
}
// NewWriter2 creates an LZMA2 chunk sequence writer with the default
// parameters and options.
func NewWriter2(lzma2 io.Writer) (w *Writer2, err error) {
return Writer2Config{}.NewWriter2(lzma2)
}
// NewWriter2 creates a new LZMA2 writer using the given configuration.
func (c Writer2Config) NewWriter2(lzma2 io.Writer) (w *Writer2, err error) {
if err = c.Verify(); err != nil {
return nil, err
}
w = &Writer2{
w: lzma2,
start: newState(*c.Properties),
cstate: start,
ctype: start.defaultChunkType(),
}
w.buf.Grow(maxCompressed)
w.lbw = LimitedByteWriter{BW: &w.buf, N: maxCompressed}
m, err := c.Matcher.new(c.DictCap)
if err != nil {
return nil, err
}
d, err := newEncoderDict(c.DictCap, c.BufSize, m)
if err != nil {
return nil, err
}
w.encoder, err = newEncoder(&w.lbw, cloneState(w.start), d, 0)
if err != nil {
return nil, err
}
return w, nil
}
// written returns the number of bytes written to the current chunk
func (w *Writer2) written() int {
if w.encoder == nil {
return 0
}
return int(w.encoder.Compressed()) + w.encoder.dict.Buffered()
}
// errClosed indicates that the writer is closed.
var errClosed = errors.New("lzma: writer closed")
// Writes data to LZMA2 stream. Note that written data will be buffered.
// Use Flush or Close to ensure that data is written to the underlying
// writer.
func (w *Writer2) Write(p []byte) (n int, err error) {
if w.cstate == stop {
return 0, errClosed
}
for n < len(p) {
m := maxUncompressed - w.written()
if m <= 0 {
panic("lzma: maxUncompressed reached")
}
var q []byte
if n+m < len(p) {
q = p[n : n+m]
} else {
q = p[n:]
}
k, err := w.encoder.Write(q)
n += k
if err != nil && err != ErrLimit {
return n, err
}
if err == ErrLimit || k == m {
if err = w.flushChunk(); err != nil {
return n, err
}
}
}
return n, nil
}
// writeUncompressedChunk writes an uncompressed chunk to the LZMA2
// stream.
func (w *Writer2) writeUncompressedChunk() error {
u := w.encoder.Compressed()
if u <= 0 {
return errors.New("lzma: can't write empty uncompressed chunk")
}
if u > maxUncompressed {
panic("overrun of uncompressed data limit")
}
switch w.ctype {
case cLRND:
w.ctype = cUD
default:
w.ctype = cU
}
w.encoder.state = w.start
header := chunkHeader{
ctype: w.ctype,
uncompressed: uint32(u - 1),
}
hdata, err := header.MarshalBinary()
if err != nil {
return err
}
if _, err = w.w.Write(hdata); err != nil {
return err
}
_, err = w.encoder.dict.CopyN(w.w, int(u))
return err
}
// writeCompressedChunk writes a compressed chunk to the underlying
// writer.
func (w *Writer2) writeCompressedChunk() error {
if w.ctype == cU || w.ctype == cUD {
panic("chunk type uncompressed")
}
u := w.encoder.Compressed()
if u <= 0 {
return errors.New("writeCompressedChunk: empty chunk")
}
if u > maxUncompressed {
panic("overrun of uncompressed data limit")
}
c := w.buf.Len()
if c <= 0 {
panic("no compressed data")
}
if c > maxCompressed {
panic("overrun of compressed data limit")
}
header := chunkHeader{
ctype: w.ctype,
uncompressed: uint32(u - 1),
compressed: uint16(c - 1),
props: w.encoder.state.Properties,
}
hdata, err := header.MarshalBinary()
if err != nil {
return err
}
if _, err = w.w.Write(hdata); err != nil {
return err
}
_, err = io.Copy(w.w, &w.buf)
return err
}
// writes a single chunk to the underlying writer.
func (w *Writer2) writeChunk() error {
u := int(uncompressedHeaderLen + w.encoder.Compressed())
c := headerLen(w.ctype) + w.buf.Len()
if u < c {
return w.writeUncompressedChunk()
}
return w.writeCompressedChunk()
}
// flushChunk terminates the current chunk. The encoder will be reset
// to support the next chunk.
func (w *Writer2) flushChunk() error {
if w.written() == 0 {
return nil
}
var err error
if err = w.encoder.Close(); err != nil {
return err
}
if err = w.writeChunk(); err != nil {
return err
}
w.buf.Reset()
w.lbw.N = maxCompressed
if err = w.encoder.Reopen(&w.lbw); err != nil {
return err
}
if err = w.cstate.next(w.ctype); err != nil {
return err
}
w.ctype = w.cstate.defaultChunkType()
w.start = cloneState(w.encoder.state)
return nil
}
// Flush writes all buffered data out to the underlying stream. This
// could result in multiple chunks to be created.
func (w *Writer2) Flush() error {
if w.cstate == stop {
return errClosed
}
for w.written() > 0 {
if err := w.flushChunk(); err != nil {
return err
}
}
return nil
}
// Close terminates the LZMA2 stream with an EOS chunk.
func (w *Writer2) Close() error {
if w.cstate == stop {
return errClosed
}
if err := w.Flush(); err != nil {
return nil
}
// write zero byte EOS chunk
_, err := w.w.Write([]byte{0})
if err != nil {
return err
}
w.cstate = stop
return nil
}

3
vendor/golang.org/x/text/AUTHORS generated vendored

@ -0,0 +1,3 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.

@ -0,0 +1,3 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.

27
vendor/golang.org/x/text/LICENSE generated vendored

@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

22
vendor/golang.org/x/text/PATENTS generated vendored

@ -0,0 +1,22 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the Go project.
Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.

@ -0,0 +1,335 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package encoding defines an interface for character encodings, such as Shift
// JIS and Windows 1252, that can convert to and from UTF-8.
//
// Encoding implementations are provided in other packages, such as
// golang.org/x/text/encoding/charmap and
// golang.org/x/text/encoding/japanese.
package encoding // import "golang.org/x/text/encoding"
import (
"errors"
"io"
"strconv"
"unicode/utf8"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/transform"
)
// TODO:
// - There seems to be some inconsistency in when decoders return errors
// and when not. Also documentation seems to suggest they shouldn't return
// errors at all (except for UTF-16).
// - Encoders seem to rely on or at least benefit from the input being in NFC
// normal form. Perhaps add an example how users could prepare their output.
// Encoding is a character set encoding that can be transformed to and from
// UTF-8.
type Encoding interface {
// NewDecoder returns a Decoder.
NewDecoder() *Decoder
// NewEncoder returns an Encoder.
NewEncoder() *Encoder
}
// A Decoder converts bytes to UTF-8. It implements transform.Transformer.
//
// Transforming source bytes that are not of that encoding will not result in an
// error per se. Each byte that cannot be transcoded will be represented in the
// output by the UTF-8 encoding of '\uFFFD', the replacement rune.
type Decoder struct {
transform.Transformer
// This forces external creators of Decoders to use names in struct
// initializers, allowing for future extendibility without having to break
// code.
_ struct{}
}
// Bytes converts the given encoded bytes to UTF-8. It returns the converted
// bytes or nil, err if any error occurred.
func (d *Decoder) Bytes(b []byte) ([]byte, error) {
b, _, err := transform.Bytes(d, b)
if err != nil {
return nil, err
}
return b, nil
}
// String converts the given encoded string to UTF-8. It returns the converted
// string or "", err if any error occurred.
func (d *Decoder) String(s string) (string, error) {
s, _, err := transform.String(d, s)
if err != nil {
return "", err
}
return s, nil
}
// Reader wraps another Reader to decode its bytes.
//
// The Decoder may not be used for any other operation as long as the returned
// Reader is in use.
func (d *Decoder) Reader(r io.Reader) io.Reader {
return transform.NewReader(r, d)
}
// An Encoder converts bytes from UTF-8. It implements transform.Transformer.
//
// Each rune that cannot be transcoded will result in an error. In this case,
// the transform will consume all source byte up to, not including the offending
// rune. Transforming source bytes that are not valid UTF-8 will be replaced by
// `\uFFFD`. To return early with an error instead, use transform.Chain to
// preprocess the data with a UTF8Validator.
type Encoder struct {
transform.Transformer
// This forces external creators of Encoders to use names in struct
// initializers, allowing for future extendibility without having to break
// code.
_ struct{}
}
// Bytes converts bytes from UTF-8. It returns the converted bytes or nil, err if
// any error occurred.
func (e *Encoder) Bytes(b []byte) ([]byte, error) {
b, _, err := transform.Bytes(e, b)
if err != nil {
return nil, err
}
return b, nil
}
// String converts a string from UTF-8. It returns the converted string or
// "", err if any error occurred.
func (e *Encoder) String(s string) (string, error) {
s, _, err := transform.String(e, s)
if err != nil {
return "", err
}
return s, nil
}
// Writer wraps another Writer to encode its UTF-8 output.
//
// The Encoder may not be used for any other operation as long as the returned
// Writer is in use.
func (e *Encoder) Writer(w io.Writer) io.Writer {
return transform.NewWriter(w, e)
}
// ASCIISub is the ASCII substitute character, as recommended by
// https://unicode.org/reports/tr36/#Text_Comparison
const ASCIISub = '\x1a'
// Nop is the nop encoding. Its transformed bytes are the same as the source
// bytes; it does not replace invalid UTF-8 sequences.
var Nop Encoding = nop{}
type nop struct{}
func (nop) NewDecoder() *Decoder {
return &Decoder{Transformer: transform.Nop}
}
func (nop) NewEncoder() *Encoder {
return &Encoder{Transformer: transform.Nop}
}
// Replacement is the replacement encoding. Decoding from the replacement
// encoding yields a single '\uFFFD' replacement rune. Encoding from UTF-8 to
// the replacement encoding yields the same as the source bytes except that
// invalid UTF-8 is converted to '\uFFFD'.
//
// It is defined at http://encoding.spec.whatwg.org/#replacement
var Replacement Encoding = replacement{}
type replacement struct{}
func (replacement) NewDecoder() *Decoder {
return &Decoder{Transformer: replacementDecoder{}}
}
func (replacement) NewEncoder() *Encoder {
return &Encoder{Transformer: replacementEncoder{}}
}
func (replacement) ID() (mib identifier.MIB, other string) {
return identifier.Replacement, ""
}
type replacementDecoder struct{ transform.NopResetter }
func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if len(dst) < 3 {
return 0, 0, transform.ErrShortDst
}
if atEOF {
const fffd = "\ufffd"
dst[0] = fffd[0]
dst[1] = fffd[1]
dst[2] = fffd[2]
nDst = 3
}
return nDst, len(src), nil
}
type replacementEncoder struct{ transform.NopResetter }
func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
for ; nSrc < len(src); nSrc += size {
r = rune(src[nSrc])
// Decode a 1-byte rune.
if r < utf8.RuneSelf {
size = 1
} else {
// Decode a multi-byte rune.
r, size = utf8.DecodeRune(src[nSrc:])
if size == 1 {
// All valid runes of size 1 (those below utf8.RuneSelf) were
// handled above. We have invalid UTF-8 or we haven't seen the
// full character yet.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
r = '\ufffd'
}
}
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
return nDst, nSrc, err
}
// HTMLEscapeUnsupported wraps encoders to replace source runes outside the
// repertoire of the destination encoding with HTML escape sequences.
//
// This wrapper exists to comply to URL and HTML forms requiring a
// non-terminating legacy encoder. The produced sequences may lead to data
// loss as they are indistinguishable from legitimate input. To avoid this
// issue, use UTF-8 encodings whenever possible.
func HTMLEscapeUnsupported(e *Encoder) *Encoder {
return &Encoder{Transformer: &errorHandler{e, errorToHTML}}
}
// ReplaceUnsupported wraps encoders to replace source runes outside the
// repertoire of the destination encoding with an encoding-specific
// replacement.
//
// This wrapper is only provided for backwards compatibility and legacy
// handling. Its use is strongly discouraged. Use UTF-8 whenever possible.
func ReplaceUnsupported(e *Encoder) *Encoder {
return &Encoder{Transformer: &errorHandler{e, errorToReplacement}}
}
type errorHandler struct {
*Encoder
handler func(dst []byte, r rune, err repertoireError) (n int, ok bool)
}
// TODO: consider making this error public in some form.
type repertoireError interface {
Replacement() byte
}
func (h errorHandler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
nDst, nSrc, err = h.Transformer.Transform(dst, src, atEOF)
for err != nil {
rerr, ok := err.(repertoireError)
if !ok {
return nDst, nSrc, err
}
r, sz := utf8.DecodeRune(src[nSrc:])
n, ok := h.handler(dst[nDst:], r, rerr)
if !ok {
return nDst, nSrc, transform.ErrShortDst
}
err = nil
nDst += n
if nSrc += sz; nSrc < len(src) {
var dn, sn int
dn, sn, err = h.Transformer.Transform(dst[nDst:], src[nSrc:], atEOF)
nDst += dn
nSrc += sn
}
}
return nDst, nSrc, err
}
func errorToHTML(dst []byte, r rune, err repertoireError) (n int, ok bool) {
buf := [8]byte{}
b := strconv.AppendUint(buf[:0], uint64(r), 10)
if n = len(b) + len("&#;"); n >= len(dst) {
return 0, false
}
dst[0] = '&'
dst[1] = '#'
dst[copy(dst[2:], b)+2] = ';'
return n, true
}
func errorToReplacement(dst []byte, r rune, err repertoireError) (n int, ok bool) {
if len(dst) == 0 {
return 0, false
}
dst[0] = err.Replacement()
return 1, true
}
// ErrInvalidUTF8 means that a transformer encountered invalid UTF-8.
var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8")
// UTF8Validator is a transformer that returns ErrInvalidUTF8 on the first
// input byte that is not valid UTF-8.
var UTF8Validator transform.Transformer = utf8Validator{}
type utf8Validator struct{ transform.NopResetter }
func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := len(src)
if n > len(dst) {
n = len(dst)
}
for i := 0; i < n; {
if c := src[i]; c < utf8.RuneSelf {
dst[i] = c
i++
continue
}
_, size := utf8.DecodeRune(src[i:])
if size == 1 {
// All valid runes of size 1 (those below utf8.RuneSelf) were
// handled above. We have invalid UTF-8 or we haven't seen the
// full character yet.
err = ErrInvalidUTF8
if !atEOF && !utf8.FullRune(src[i:]) {
err = transform.ErrShortSrc
}
return i, i, err
}
if i+size > len(dst) {
return i, i, transform.ErrShortDst
}
for ; size > 0; size-- {
dst[i] = src[i]
i++
}
}
if len(src) > len(dst) {
err = transform.ErrShortDst
}
return n, n, err
}

@ -0,0 +1,81 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go
// Package identifier defines the contract between implementations of Encoding
// and Index by defining identifiers that uniquely identify standardized coded
// character sets (CCS) and character encoding schemes (CES), which we will
// together refer to as encodings, for which Encoding implementations provide
// converters to and from UTF-8. This package is typically only of concern to
// implementers of Indexes and Encodings.
//
// One part of the identifier is the MIB code, which is defined by IANA and
// uniquely identifies a CCS or CES. Each code is associated with data that
// references authorities, official documentation as well as aliases and MIME
// names.
//
// Not all CESs are covered by the IANA registry. The "other" string that is
// returned by ID can be used to identify other character sets or versions of
// existing ones.
//
// It is recommended that each package that provides a set of Encodings provide
// the All and Common variables to reference all supported encodings and
// commonly used subset. This allows Index implementations to include all
// available encodings without explicitly referencing or knowing about them.
package identifier
// Note: this package is internal, but could be made public if there is a need
// for writing third-party Indexes and Encodings.
// References:
// - http://source.icu-project.org/repos/icu/icu/trunk/source/data/mappings/convrtrs.txt
// - http://www.iana.org/assignments/character-sets/character-sets.xhtml
// - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib
// - http://www.ietf.org/rfc/rfc2978.txt
// - https://www.unicode.org/reports/tr22/
// - http://www.w3.org/TR/encoding/
// - https://encoding.spec.whatwg.org/
// - https://encoding.spec.whatwg.org/encodings.json
// - https://tools.ietf.org/html/rfc6657#section-5
// Interface can be implemented by Encodings to define the CCS or CES for which
// it implements conversions.
type Interface interface {
// ID returns an encoding identifier. Exactly one of the mib and other
// values should be non-zero.
//
// In the usual case it is only necessary to indicate the MIB code. The
// other string can be used to specify encodings for which there is no MIB,
// such as "x-mac-dingbat".
//
// The other string may only contain the characters a-z, A-Z, 0-9, - and _.
ID() (mib MIB, other string)
// NOTE: the restrictions on the encoding are to allow extending the syntax
// with additional information such as versions, vendors and other variants.
}
// A MIB identifies an encoding. It is derived from the IANA MIB codes and adds
// some identifiers for some encodings that are not covered by the IANA
// standard.
//
// See http://www.iana.org/assignments/ianacharset-mib.
type MIB uint16
// These additional MIB types are not defined in IANA. They are added because
// they are common and defined within the text repo.
const (
// Unofficial marks the start of encodings not registered by IANA.
Unofficial MIB = 10000 + iota
// Replacement is the WhatWG replacement encoding.
Replacement
// XUserDefined is the code for x-user-defined.
XUserDefined
// MacintoshCyrillic is the code for x-mac-cyrillic.
MacintoshCyrillic
)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,75 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package internal contains code that is shared among encoding implementations.
package internal
import (
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/transform"
)
// Encoding is an implementation of the Encoding interface that adds the String
// and ID methods to an existing encoding.
type Encoding struct {
encoding.Encoding
Name string
MIB identifier.MIB
}
// _ verifies that Encoding implements identifier.Interface.
var _ identifier.Interface = (*Encoding)(nil)
func (e *Encoding) String() string {
return e.Name
}
func (e *Encoding) ID() (mib identifier.MIB, other string) {
return e.MIB, ""
}
// SimpleEncoding is an Encoding that combines two Transformers.
type SimpleEncoding struct {
Decoder transform.Transformer
Encoder transform.Transformer
}
func (e *SimpleEncoding) NewDecoder() *encoding.Decoder {
return &encoding.Decoder{Transformer: e.Decoder}
}
func (e *SimpleEncoding) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{Transformer: e.Encoder}
}
// FuncEncoding is an Encoding that combines two functions returning a new
// Transformer.
type FuncEncoding struct {
Decoder func() transform.Transformer
Encoder func() transform.Transformer
}
func (e FuncEncoding) NewDecoder() *encoding.Decoder {
return &encoding.Decoder{Transformer: e.Decoder()}
}
func (e FuncEncoding) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{Transformer: e.Encoder()}
}
// A RepertoireError indicates a rune is not in the repertoire of a destination
// encoding. It is associated with an encoding-specific suggested replacement
// byte.
type RepertoireError byte
// Error implements the error interrface.
func (r RepertoireError) Error() string {
return "encoding: rune not supported by encoding."
}
// Replacement returns the replacement string associated with this error.
func (r RepertoireError) Replacement() byte { return byte(r) }
var ErrASCIIReplacement = RepertoireError(encoding.ASCIISub)

@ -0,0 +1,12 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package simplifiedchinese
import (
"golang.org/x/text/encoding"
)
// All is a list of all defined encodings in this package.
var All = []encoding.Encoding{GB18030, GBK, HZGB2312}

@ -0,0 +1,269 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package simplifiedchinese
import (
"unicode/utf8"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/transform"
)
var (
// GB18030 is the GB18030 encoding.
GB18030 encoding.Encoding = &gbk18030
// GBK is the GBK encoding. It encodes an extension of the GB2312 character set
// and is also known as Code Page 936.
GBK encoding.Encoding = &gbk
)
var gbk = internal.Encoding{
&internal.SimpleEncoding{
gbkDecoder{gb18030: false},
gbkEncoder{gb18030: false},
},
"GBK",
identifier.GBK,
}
var gbk18030 = internal.Encoding{
&internal.SimpleEncoding{
gbkDecoder{gb18030: true},
gbkEncoder{gb18030: true},
},
"GB18030",
identifier.GB18030,
}
type gbkDecoder struct {
transform.NopResetter
gb18030 bool
}
func (d gbkDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
loop:
for ; nSrc < len(src); nSrc += size {
switch c0 := src[nSrc]; {
case c0 < utf8.RuneSelf:
r, size = rune(c0), 1
// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
// says to treat "gbk" as Code Page 936.
case c0 == 0x80:
r, size = '€', 1
case c0 < 0xff:
if nSrc+1 >= len(src) {
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = utf8.RuneError, 1
goto write
}
c1 := src[nSrc+1]
switch {
case 0x40 <= c1 && c1 < 0x7f:
c1 -= 0x40
case 0x80 <= c1 && c1 < 0xff:
c1 -= 0x41
case d.gb18030 && 0x30 <= c1 && c1 < 0x40:
if nSrc+3 >= len(src) {
if !atEOF {
err = transform.ErrShortSrc
break loop
}
// The second byte here is always ASCII, so we can set size
// to 1 in all cases.
r, size = utf8.RuneError, 1
goto write
}
c2 := src[nSrc+2]
if c2 < 0x81 || 0xff <= c2 {
r, size = utf8.RuneError, 1
goto write
}
c3 := src[nSrc+3]
if c3 < 0x30 || 0x3a <= c3 {
r, size = utf8.RuneError, 1
goto write
}
size = 4
r = ((rune(c0-0x81)*10+rune(c1-0x30))*126+rune(c2-0x81))*10 + rune(c3-0x30)
if r < 39420 {
i, j := 0, len(gb18030)
for i < j {
h := i + (j-i)/2
if r >= rune(gb18030[h][0]) {
i = h + 1
} else {
j = h
}
}
dec := &gb18030[i-1]
r += rune(dec[1]) - rune(dec[0])
goto write
}
r -= 189000
if 0 <= r && r < 0x100000 {
r += 0x10000
} else {
r, size = utf8.RuneError, 1
}
goto write
default:
r, size = utf8.RuneError, 1
goto write
}
r, size = '\ufffd', 2
if i := int(c0-0x81)*190 + int(c1); i < len(decode) {
r = rune(decode[i])
if r == 0 {
r = '\ufffd'
}
}
default:
r, size = utf8.RuneError, 1
}
write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
return nDst, nSrc, err
}
type gbkEncoder struct {
transform.NopResetter
gb18030 bool
}
func (e gbkEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, r2, size := rune(0), rune(0), 0
for ; nSrc < len(src); nSrc += size {
r = rune(src[nSrc])
// Decode a 1-byte rune.
if r < utf8.RuneSelf {
size = 1
} else {
// Decode a multi-byte rune.
r, size = utf8.DecodeRune(src[nSrc:])
if size == 1 {
// All valid runes of size 1 (those below utf8.RuneSelf) were
// handled above. We have invalid UTF-8 or we haven't seen the
// full character yet.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
}
// func init checks that the switch covers all tables.
switch {
case encode0Low <= r && r < encode0High:
if r2 = rune(encode0[r-encode0Low]); r2 != 0 {
goto write2
}
case encode1Low <= r && r < encode1High:
// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
// says to treat "gbk" as Code Page 936.
if r == '€' {
r = 0x80
goto write1
}
if r2 = rune(encode1[r-encode1Low]); r2 != 0 {
goto write2
}
case encode2Low <= r && r < encode2High:
if r2 = rune(encode2[r-encode2Low]); r2 != 0 {
goto write2
}
case encode3Low <= r && r < encode3High:
if r2 = rune(encode3[r-encode3Low]); r2 != 0 {
goto write2
}
case encode4Low <= r && r < encode4High:
if r2 = rune(encode4[r-encode4Low]); r2 != 0 {
goto write2
}
}
if e.gb18030 {
if r < 0x10000 {
i, j := 0, len(gb18030)
for i < j {
h := i + (j-i)/2
if r >= rune(gb18030[h][1]) {
i = h + 1
} else {
j = h
}
}
dec := &gb18030[i-1]
r += rune(dec[0]) - rune(dec[1])
goto write4
} else if r < 0x110000 {
r += 189000 - 0x10000
goto write4
}
}
err = internal.ErrASCIIReplacement
break
}
write1:
if nDst >= len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst] = uint8(r)
nDst++
continue
write2:
if nDst+2 > len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst+0] = uint8(r2 >> 8)
dst[nDst+1] = uint8(r2)
nDst += 2
continue
write4:
if nDst+4 > len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst+3] = uint8(r%10 + 0x30)
r /= 10
dst[nDst+2] = uint8(r%126 + 0x81)
r /= 126
dst[nDst+1] = uint8(r%10 + 0x30)
r /= 10
dst[nDst+0] = uint8(r + 0x81)
nDst += 4
continue
}
return nDst, nSrc, err
}
func init() {
// Check that the hard-coded encode switch covers all tables.
if numEncodeTables != 5 {
panic("bad numEncodeTables")
}
}

@ -0,0 +1,245 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package simplifiedchinese
import (
"unicode/utf8"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/transform"
)
// HZGB2312 is the HZ-GB2312 encoding.
var HZGB2312 encoding.Encoding = &hzGB2312
var hzGB2312 = internal.Encoding{
internal.FuncEncoding{hzGB2312NewDecoder, hzGB2312NewEncoder},
"HZ-GB2312",
identifier.HZGB2312,
}
func hzGB2312NewDecoder() transform.Transformer {
return new(hzGB2312Decoder)
}
func hzGB2312NewEncoder() transform.Transformer {
return new(hzGB2312Encoder)
}
const (
asciiState = iota
gbState
)
type hzGB2312Decoder int
func (d *hzGB2312Decoder) Reset() {
*d = asciiState
}
func (d *hzGB2312Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
loop:
for ; nSrc < len(src); nSrc += size {
c0 := src[nSrc]
if c0 >= utf8.RuneSelf {
r, size = utf8.RuneError, 1
goto write
}
if c0 == '~' {
if nSrc+1 >= len(src) {
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = utf8.RuneError, 1
goto write
}
size = 2
switch src[nSrc+1] {
case '{':
*d = gbState
continue
case '}':
*d = asciiState
continue
case '~':
if nDst >= len(dst) {
err = transform.ErrShortDst
break loop
}
dst[nDst] = '~'
nDst++
continue
case '\n':
continue
default:
r = utf8.RuneError
goto write
}
}
if *d == asciiState {
r, size = rune(c0), 1
} else {
if nSrc+1 >= len(src) {
if !atEOF {
err = transform.ErrShortSrc
break loop
}
r, size = utf8.RuneError, 1
goto write
}
size = 2
c1 := src[nSrc+1]
if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
// error
} else if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) {
r = rune(decode[i])
if r != 0 {
goto write
}
}
if c1 > utf8.RuneSelf {
// Be consistent and always treat non-ASCII as a single error.
size = 1
}
r = utf8.RuneError
}
write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
return nDst, nSrc, err
}
type hzGB2312Encoder int
func (d *hzGB2312Encoder) Reset() {
*d = asciiState
}
func (e *hzGB2312Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
for ; nSrc < len(src); nSrc += size {
r = rune(src[nSrc])
// Decode a 1-byte rune.
if r < utf8.RuneSelf {
size = 1
if r == '~' {
if nDst+2 > len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst+0] = '~'
dst[nDst+1] = '~'
nDst += 2
continue
} else if *e != asciiState {
if nDst+3 > len(dst) {
err = transform.ErrShortDst
break
}
*e = asciiState
dst[nDst+0] = '~'
dst[nDst+1] = '}'
nDst += 2
} else if nDst >= len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst] = uint8(r)
nDst += 1
continue
}
// Decode a multi-byte rune.
r, size = utf8.DecodeRune(src[nSrc:])
if size == 1 {
// All valid runes of size 1 (those below utf8.RuneSelf) were
// handled above. We have invalid UTF-8 or we haven't seen the
// full character yet.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
}
// func init checks that the switch covers all tables.
switch {
case encode0Low <= r && r < encode0High:
if r = rune(encode0[r-encode0Low]); r != 0 {
goto writeGB
}
case encode1Low <= r && r < encode1High:
if r = rune(encode1[r-encode1Low]); r != 0 {
goto writeGB
}
case encode2Low <= r && r < encode2High:
if r = rune(encode2[r-encode2Low]); r != 0 {
goto writeGB
}
case encode3Low <= r && r < encode3High:
if r = rune(encode3[r-encode3Low]); r != 0 {
goto writeGB
}
case encode4Low <= r && r < encode4High:
if r = rune(encode4[r-encode4Low]); r != 0 {
goto writeGB
}
}
terminateInASCIIState:
// Switch back to ASCII state in case of error so that an ASCII
// replacement character can be written in the correct state.
if *e != asciiState {
if nDst+2 > len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst+0] = '~'
dst[nDst+1] = '}'
nDst += 2
}
err = internal.ErrASCIIReplacement
break
writeGB:
c0 := uint8(r>>8) - 0x80
c1 := uint8(r) - 0x80
if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
goto terminateInASCIIState
}
if *e == asciiState {
if nDst+4 > len(dst) {
err = transform.ErrShortDst
break
}
*e = gbState
dst[nDst+0] = '~'
dst[nDst+1] = '{'
nDst += 2
} else if nDst+2 > len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst+0] = c0
dst[nDst+1] = c1
nDst += 2
continue
}
// TODO: should one always terminate in ASCII state to make it safe to
// concatenate two HZ-GB2312-encoded strings?
return nDst, nSrc, err
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,709 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package transform provides reader and writer wrappers that transform the
// bytes passing through as well as various transformations. Example
// transformations provided by other packages include normalization and
// conversion between character sets.
package transform // import "golang.org/x/text/transform"
import (
"bytes"
"errors"
"io"
"unicode/utf8"
)
var (
// ErrShortDst means that the destination buffer was too short to
// receive all of the transformed bytes.
ErrShortDst = errors.New("transform: short destination buffer")
// ErrShortSrc means that the source buffer has insufficient data to
// complete the transformation.
ErrShortSrc = errors.New("transform: short source buffer")
// ErrEndOfSpan means that the input and output (the transformed input)
// are not identical.
ErrEndOfSpan = errors.New("transform: input and output are not identical")
// errInconsistentByteCount means that Transform returned success (nil
// error) but also returned nSrc inconsistent with the src argument.
errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
// errShortInternal means that an internal buffer is not large enough
// to make progress and the Transform operation must be aborted.
errShortInternal = errors.New("transform: short internal buffer")
)
// Transformer transforms bytes.
type Transformer interface {
// Transform writes to dst the transformed bytes read from src, and
// returns the number of dst bytes written and src bytes read. The
// atEOF argument tells whether src represents the last bytes of the
// input.
//
// Callers should always process the nDst bytes produced and account
// for the nSrc bytes consumed before considering the error err.
//
// A nil error means that all of the transformed bytes (whether freshly
// transformed from src or left over from previous Transform calls)
// were written to dst. A nil error can be returned regardless of
// whether atEOF is true. If err is nil then nSrc must equal len(src);
// the converse is not necessarily true.
//
// ErrShortDst means that dst was too short to receive all of the
// transformed bytes. ErrShortSrc means that src had insufficient data
// to complete the transformation. If both conditions apply, then
// either error may be returned. Other than the error conditions listed
// here, implementations are free to report other errors that arise.
Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
// Reset resets the state and allows a Transformer to be reused.
Reset()
}
// SpanningTransformer extends the Transformer interface with a Span method
// that determines how much of the input already conforms to the Transformer.
type SpanningTransformer interface {
Transformer
// Span returns a position in src such that transforming src[:n] results in
// identical output src[:n] for these bytes. It does not necessarily return
// the largest such n. The atEOF argument tells whether src represents the
// last bytes of the input.
//
// Callers should always account for the n bytes consumed before
// considering the error err.
//
// A nil error means that all input bytes are known to be identical to the
// output produced by the Transformer. A nil error can be returned
// regardless of whether atEOF is true. If err is nil, then n must
// equal len(src); the converse is not necessarily true.
//
// ErrEndOfSpan means that the Transformer output may differ from the
// input after n bytes. Note that n may be len(src), meaning that the output
// would contain additional bytes after otherwise identical output.
// ErrShortSrc means that src had insufficient data to determine whether the
// remaining bytes would change. Other than the error conditions listed
// here, implementations are free to report other errors that arise.
//
// Calling Span can modify the Transformer state as a side effect. In
// effect, it does the transformation just as calling Transform would, only
// without copying to a destination buffer and only up to a point it can
// determine the input and output bytes are the same. This is obviously more
// limited than calling Transform, but can be more efficient in terms of
// copying and allocating buffers. Calls to Span and Transform may be
// interleaved.
Span(src []byte, atEOF bool) (n int, err error)
}
// NopResetter can be embedded by implementations of Transformer to add a nop
// Reset method.
type NopResetter struct{}
// Reset implements the Reset method of the Transformer interface.
func (NopResetter) Reset() {}
// Reader wraps another io.Reader by transforming the bytes read.
type Reader struct {
r io.Reader
t Transformer
err error
// dst[dst0:dst1] contains bytes that have been transformed by t but
// not yet copied out via Read.
dst []byte
dst0, dst1 int
// src[src0:src1] contains bytes that have been read from r but not
// yet transformed through t.
src []byte
src0, src1 int
// transformComplete is whether the transformation is complete,
// regardless of whether or not it was successful.
transformComplete bool
}
const defaultBufSize = 4096
// NewReader returns a new Reader that wraps r by transforming the bytes read
// via t. It calls Reset on t.
func NewReader(r io.Reader, t Transformer) *Reader {
t.Reset()
return &Reader{
r: r,
t: t,
dst: make([]byte, defaultBufSize),
src: make([]byte, defaultBufSize),
}
}
// Read implements the io.Reader interface.
func (r *Reader) Read(p []byte) (int, error) {
n, err := 0, error(nil)
for {
// Copy out any transformed bytes and return the final error if we are done.
if r.dst0 != r.dst1 {
n = copy(p, r.dst[r.dst0:r.dst1])
r.dst0 += n
if r.dst0 == r.dst1 && r.transformComplete {
return n, r.err
}
return n, nil
} else if r.transformComplete {
return 0, r.err
}
// Try to transform some source bytes, or to flush the transformer if we
// are out of source bytes. We do this even if r.r.Read returned an error.
// As the io.Reader documentation says, "process the n > 0 bytes returned
// before considering the error".
if r.src0 != r.src1 || r.err != nil {
r.dst0 = 0
r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], r.err == io.EOF)
r.src0 += n
switch {
case err == nil:
if r.src0 != r.src1 {
r.err = errInconsistentByteCount
}
// The Transform call was successful; we are complete if we
// cannot read more bytes into src.
r.transformComplete = r.err != nil
continue
case err == ErrShortDst && (r.dst1 != 0 || n != 0):
// Make room in dst by copying out, and try again.
continue
case err == ErrShortSrc && r.src1-r.src0 != len(r.src) && r.err == nil:
// Read more bytes into src via the code below, and try again.
default:
r.transformComplete = true
// The reader error (r.err) takes precedence over the
// transformer error (err) unless r.err is nil or io.EOF.
if r.err == nil || r.err == io.EOF {
r.err = err
}
continue
}
}
// Move any untransformed source bytes to the start of the buffer
// and read more bytes.
if r.src0 != 0 {
r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1])
}
n, r.err = r.r.Read(r.src[r.src1:])
r.src1 += n
}
}
// TODO: implement ReadByte (and ReadRune??).
// Writer wraps another io.Writer by transforming the bytes read.
// The user needs to call Close to flush unwritten bytes that may
// be buffered.
type Writer struct {
w io.Writer
t Transformer
dst []byte
// src[:n] contains bytes that have not yet passed through t.
src []byte
n int
}
// NewWriter returns a new Writer that wraps w by transforming the bytes written
// via t. It calls Reset on t.
func NewWriter(w io.Writer, t Transformer) *Writer {
t.Reset()
return &Writer{
w: w,
t: t,
dst: make([]byte, defaultBufSize),
src: make([]byte, defaultBufSize),
}
}
// Write implements the io.Writer interface. If there are not enough
// bytes available to complete a Transform, the bytes will be buffered
// for the next write. Call Close to convert the remaining bytes.
func (w *Writer) Write(data []byte) (n int, err error) {
src := data
if w.n > 0 {
// Append bytes from data to the last remainder.
// TODO: limit the amount copied on first try.
n = copy(w.src[w.n:], data)
w.n += n
src = w.src[:w.n]
}
for {
nDst, nSrc, err := w.t.Transform(w.dst, src, false)
if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
return n, werr
}
src = src[nSrc:]
if w.n == 0 {
n += nSrc
} else if len(src) <= n {
// Enough bytes from w.src have been consumed. We make src point
// to data instead to reduce the copying.
w.n = 0
n -= len(src)
src = data[n:]
if n < len(data) && (err == nil || err == ErrShortSrc) {
continue
}
}
switch err {
case ErrShortDst:
// This error is okay as long as we are making progress.
if nDst > 0 || nSrc > 0 {
continue
}
case ErrShortSrc:
if len(src) < len(w.src) {
m := copy(w.src, src)
// If w.n > 0, bytes from data were already copied to w.src and n
// was already set to the number of bytes consumed.
if w.n == 0 {
n += m
}
w.n = m
err = nil
} else if nDst > 0 || nSrc > 0 {
// Not enough buffer to store the remainder. Keep processing as
// long as there is progress. Without this case, transforms that
// require a lookahead larger than the buffer may result in an
// error. This is not something one may expect to be common in
// practice, but it may occur when buffers are set to small
// sizes during testing.
continue
}
case nil:
if w.n > 0 {
err = errInconsistentByteCount
}
}
return n, err
}
}
// Close implements the io.Closer interface.
func (w *Writer) Close() error {
src := w.src[:w.n]
for {
nDst, nSrc, err := w.t.Transform(w.dst, src, true)
if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
return werr
}
if err != ErrShortDst {
return err
}
src = src[nSrc:]
}
}
type nop struct{ NopResetter }
func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := copy(dst, src)
if n < len(src) {
err = ErrShortDst
}
return n, n, err
}
func (nop) Span(src []byte, atEOF bool) (n int, err error) {
return len(src), nil
}
type discard struct{ NopResetter }
func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return 0, len(src), nil
}
var (
// Discard is a Transformer for which all Transform calls succeed
// by consuming all bytes and writing nothing.
Discard Transformer = discard{}
// Nop is a SpanningTransformer that copies src to dst.
Nop SpanningTransformer = nop{}
)
// chain is a sequence of links. A chain with N Transformers has N+1 links and
// N+1 buffers. Of those N+1 buffers, the first and last are the src and dst
// buffers given to chain.Transform and the middle N-1 buffers are intermediate
// buffers owned by the chain. The i'th link transforms bytes from the i'th
// buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer
// chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N).
type chain struct {
link []link
err error
// errStart is the index at which the error occurred plus 1. Processing
// errStart at this level at the next call to Transform. As long as
// errStart > 0, chain will not consume any more source bytes.
errStart int
}
func (c *chain) fatalError(errIndex int, err error) {
if i := errIndex + 1; i > c.errStart {
c.errStart = i
c.err = err
}
}
type link struct {
t Transformer
// b[p:n] holds the bytes to be transformed by t.
b []byte
p int
n int
}
func (l *link) src() []byte {
return l.b[l.p:l.n]
}
func (l *link) dst() []byte {
return l.b[l.n:]
}
// Chain returns a Transformer that applies t in sequence.
func Chain(t ...Transformer) Transformer {
if len(t) == 0 {
return nop{}
}
c := &chain{link: make([]link, len(t)+1)}
for i, tt := range t {
c.link[i].t = tt
}
// Allocate intermediate buffers.
b := make([][defaultBufSize]byte, len(t)-1)
for i := range b {
c.link[i+1].b = b[i][:]
}
return c
}
// Reset resets the state of Chain. It calls Reset on all the Transformers.
func (c *chain) Reset() {
for i, l := range c.link {
if l.t != nil {
l.t.Reset()
}
c.link[i].p, c.link[i].n = 0, 0
}
}
// TODO: make chain use Span (is going to be fun to implement!)
// Transform applies the transformers of c in sequence.
func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// Set up src and dst in the chain.
srcL := &c.link[0]
dstL := &c.link[len(c.link)-1]
srcL.b, srcL.p, srcL.n = src, 0, len(src)
dstL.b, dstL.n = dst, 0
var lastFull, needProgress bool // for detecting progress
// i is the index of the next Transformer to apply, for i in [low, high].
// low is the lowest index for which c.link[low] may still produce bytes.
// high is the highest index for which c.link[high] has a Transformer.
// The error returned by Transform determines whether to increase or
// decrease i. We try to completely fill a buffer before converting it.
for low, i, high := c.errStart, c.errStart, len(c.link)-2; low <= i && i <= high; {
in, out := &c.link[i], &c.link[i+1]
nDst, nSrc, err0 := in.t.Transform(out.dst(), in.src(), atEOF && low == i)
out.n += nDst
in.p += nSrc
if i > 0 && in.p == in.n {
in.p, in.n = 0, 0
}
needProgress, lastFull = lastFull, false
switch err0 {
case ErrShortDst:
// Process the destination buffer next. Return if we are already
// at the high index.
if i == high {
return dstL.n, srcL.p, ErrShortDst
}
if out.n != 0 {
i++
// If the Transformer at the next index is not able to process any
// source bytes there is nothing that can be done to make progress
// and the bytes will remain unprocessed. lastFull is used to
// detect this and break out of the loop with a fatal error.
lastFull = true
continue
}
// The destination buffer was too small, but is completely empty.
// Return a fatal error as this transformation can never complete.
c.fatalError(i, errShortInternal)
case ErrShortSrc:
if i == 0 {
// Save ErrShortSrc in err. All other errors take precedence.
err = ErrShortSrc
break
}
// Source bytes were depleted before filling up the destination buffer.
// Verify we made some progress, move the remaining bytes to the errStart
// and try to get more source bytes.
if needProgress && nSrc == 0 || in.n-in.p == len(in.b) {
// There were not enough source bytes to proceed while the source
// buffer cannot hold any more bytes. Return a fatal error as this
// transformation can never complete.
c.fatalError(i, errShortInternal)
break
}
// in.b is an internal buffer and we can make progress.
in.p, in.n = 0, copy(in.b, in.src())
fallthrough
case nil:
// if i == low, we have depleted the bytes at index i or any lower levels.
// In that case we increase low and i. In all other cases we decrease i to
// fetch more bytes before proceeding to the next index.
if i > low {
i--
continue
}
default:
c.fatalError(i, err0)
}
// Exhausted level low or fatal error: increase low and continue
// to process the bytes accepted so far.
i++
low = i
}
// If c.errStart > 0, this means we found a fatal error. We will clear
// all upstream buffers. At this point, no more progress can be made
// downstream, as Transform would have bailed while handling ErrShortDst.
if c.errStart > 0 {
for i := 1; i < c.errStart; i++ {
c.link[i].p, c.link[i].n = 0, 0
}
err, c.errStart, c.err = c.err, 0, nil
}
return dstL.n, srcL.p, err
}
// Deprecated: Use runes.Remove instead.
func RemoveFunc(f func(r rune) bool) Transformer {
return removeF(f)
}
type removeF func(r rune) bool
func (removeF) Reset() {}
// Transform implements the Transformer interface.
func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] {
if r = rune(src[0]); r < utf8.RuneSelf {
sz = 1
} else {
r, sz = utf8.DecodeRune(src)
if sz == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src) {
err = ErrShortSrc
break
}
// We replace illegal bytes with RuneError. Not doing so might
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
// The resulting byte sequence may subsequently contain runes
// for which t(r) is true that were passed unnoticed.
if !t(r) {
if nDst+3 > len(dst) {
err = ErrShortDst
break
}
nDst += copy(dst[nDst:], "\uFFFD")
}
nSrc++
continue
}
}
if !t(r) {
if nDst+sz > len(dst) {
err = ErrShortDst
break
}
nDst += copy(dst[nDst:], src[:sz])
}
nSrc += sz
}
return
}
// grow returns a new []byte that is longer than b, and copies the first n bytes
// of b to the start of the new slice.
func grow(b []byte, n int) []byte {
m := len(b)
if m <= 32 {
m = 64
} else if m <= 256 {
m *= 2
} else {
m += m >> 1
}
buf := make([]byte, m)
copy(buf, b[:n])
return buf
}
const initialBufSize = 128
// String returns a string with the result of converting s[:n] using t, where
// n <= len(s). If err == nil, n will be len(s). It calls Reset on t.
func String(t Transformer, s string) (result string, n int, err error) {
t.Reset()
if s == "" {
// Fast path for the common case for empty input. Results in about a
// 86% reduction of running time for BenchmarkStringLowerEmpty.
if _, _, err := t.Transform(nil, nil, true); err == nil {
return "", 0, nil
}
}
// Allocate only once. Note that both dst and src escape when passed to
// Transform.
buf := [2 * initialBufSize]byte{}
dst := buf[:initialBufSize:initialBufSize]
src := buf[initialBufSize : 2*initialBufSize]
// The input string s is transformed in multiple chunks (starting with a
// chunk size of initialBufSize). nDst and nSrc are per-chunk (or
// per-Transform-call) indexes, pDst and pSrc are overall indexes.
nDst, nSrc := 0, 0
pDst, pSrc := 0, 0
// pPrefix is the length of a common prefix: the first pPrefix bytes of the
// result will equal the first pPrefix bytes of s. It is not guaranteed to
// be the largest such value, but if pPrefix, len(result) and len(s) are
// all equal after the final transform (i.e. calling Transform with atEOF
// being true returned nil error) then we don't need to allocate a new
// result string.
pPrefix := 0
for {
// Invariant: pDst == pPrefix && pSrc == pPrefix.
n := copy(src, s[pSrc:])
nDst, nSrc, err = t.Transform(dst, src[:n], pSrc+n == len(s))
pDst += nDst
pSrc += nSrc
// TODO: let transformers implement an optional Spanner interface, akin
// to norm's QuickSpan. This would even allow us to avoid any allocation.
if !bytes.Equal(dst[:nDst], src[:nSrc]) {
break
}
pPrefix = pSrc
if err == ErrShortDst {
// A buffer can only be short if a transformer modifies its input.
break
} else if err == ErrShortSrc {
if nSrc == 0 {
// No progress was made.
break
}
// Equal so far and !atEOF, so continue checking.
} else if err != nil || pPrefix == len(s) {
return string(s[:pPrefix]), pPrefix, err
}
}
// Post-condition: pDst == pPrefix + nDst && pSrc == pPrefix + nSrc.
// We have transformed the first pSrc bytes of the input s to become pDst
// transformed bytes. Those transformed bytes are discontiguous: the first
// pPrefix of them equal s[:pPrefix] and the last nDst of them equal
// dst[:nDst]. We copy them around, into a new dst buffer if necessary, so
// that they become one contiguous slice: dst[:pDst].
if pPrefix != 0 {
newDst := dst
if pDst > len(newDst) {
newDst = make([]byte, len(s)+nDst-nSrc)
}
copy(newDst[pPrefix:pDst], dst[:nDst])
copy(newDst[:pPrefix], s[:pPrefix])
dst = newDst
}
// Prevent duplicate Transform calls with atEOF being true at the end of
// the input. Also return if we have an unrecoverable error.
if (err == nil && pSrc == len(s)) ||
(err != nil && err != ErrShortDst && err != ErrShortSrc) {
return string(dst[:pDst]), pSrc, err
}
// Transform the remaining input, growing dst and src buffers as necessary.
for {
n := copy(src, s[pSrc:])
atEOF := pSrc+n == len(s)
nDst, nSrc, err := t.Transform(dst[pDst:], src[:n], atEOF)
pDst += nDst
pSrc += nSrc
// If we got ErrShortDst or ErrShortSrc, do not grow as long as we can
// make progress. This may avoid excessive allocations.
if err == ErrShortDst {
if nDst == 0 {
dst = grow(dst, pDst)
}
} else if err == ErrShortSrc {
if atEOF {
return string(dst[:pDst]), pSrc, err
}
if nSrc == 0 {
src = grow(src, 0)
}
} else if err != nil || pSrc == len(s) {
return string(dst[:pDst]), pSrc, err
}
}
}
// Bytes returns a new byte slice with the result of converting b[:n] using t,
// where n <= len(b). If err == nil, n will be len(b). It calls Reset on t.
func Bytes(t Transformer, b []byte) (result []byte, n int, err error) {
return doAppend(t, 0, make([]byte, len(b)), b)
}
// Append appends the result of converting src[:n] using t to dst, where
// n <= len(src), If err == nil, n will be len(src). It calls Reset on t.
func Append(t Transformer, dst, src []byte) (result []byte, n int, err error) {
if len(dst) == cap(dst) {
n := len(src) + len(dst) // It is okay for this to be 0.
b := make([]byte, n)
dst = b[:copy(b, dst)]
}
return doAppend(t, len(dst), dst[:cap(dst)], src)
}
func doAppend(t Transformer, pDst int, dst, src []byte) (result []byte, n int, err error) {
t.Reset()
pSrc := 0
for {
nDst, nSrc, err := t.Transform(dst[pDst:], src[pSrc:], true)
pDst += nDst
pSrc += nSrc
if err != ErrShortDst {
return dst[:pDst], pSrc, err
}
// Grow the destination buffer, but do not grow as long as we can make
// progress. This may avoid excessive allocations.
if nDst == 0 {
dst = grow(dst, pDst)
}
}
}

25
vendor/modules.txt vendored

@ -0,0 +1,25 @@
# github.com/dtapps/gostring v1.0.0
## explicit
github.com/dtapps/gostring
# github.com/saracen/go7z v0.0.0-20191010121135-9c09b6bd7fda
## explicit
github.com/saracen/go7z
github.com/saracen/go7z/filters
github.com/saracen/go7z/headers
# github.com/saracen/go7z-fixtures v0.0.0-20190623165746-aa6b8fba1d2f
## explicit
# github.com/saracen/solidblock v0.0.0-20190426153529-45df20abab6f
## explicit
github.com/saracen/solidblock
# github.com/ulikunitz/xz v0.5.10
## explicit; go 1.12
github.com/ulikunitz/xz/internal/hash
github.com/ulikunitz/xz/internal/xlog
github.com/ulikunitz/xz/lzma
# golang.org/x/text v0.3.7
## explicit; go 1.17
golang.org/x/text/encoding
golang.org/x/text/encoding/internal
golang.org/x/text/encoding/internal/identifier
golang.org/x/text/encoding/simplifiedchinese
golang.org/x/text/transform
Loading…
Cancel
Save