Add support for snappy and lz4 compression
This commit is contained in:
parent
2b6aa85302
commit
965d3deb8d
|
@ -16,7 +16,7 @@ import (
|
|||
|
||||
var (
|
||||
ErrInvalidAdminPrefix = errors.New("admin path must match the pattern " + AllowedCharacters)
|
||||
ErrInvalidCompression = errors.New("supported compression formats: flate, gzip, lzw, none, zlib, zstd")
|
||||
ErrInvalidCompression = errors.New("supported compression formats: flate, gzip, lz4, lzw, none, snappy, zlib, zstd")
|
||||
ErrInvalidConcurrency = errors.New("concurrency limit must be a positive integer")
|
||||
ErrInvalidFileCountRange = errors.New("maximum file count limit must be greater than or equal to minimum file count limit")
|
||||
ErrInvalidFileCountValue = errors.New("file count limits must be non-negative integers no greater than 2147483647")
|
||||
|
|
23
cmd/index.go
23
cmd/index.go
|
@ -17,8 +17,10 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/golang/snappy"
|
||||
"github.com/julienschmidt/httprouter"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
lz4 "github.com/pierrec/lz4/v4"
|
||||
"seedno.de/seednode/roulette/types"
|
||||
)
|
||||
|
||||
|
@ -91,16 +93,20 @@ func (index *fileIndex) isEmpty() bool {
|
|||
return length == 0
|
||||
}
|
||||
|
||||
func getReader(format string, file io.Reader) (io.ReadCloser, error) {
|
||||
func getReader(format string, file io.Reader) (io.Reader, error) {
|
||||
switch format {
|
||||
case "flate":
|
||||
return flate.NewReader(file), nil
|
||||
case "gzip":
|
||||
return gzip.NewReader(file)
|
||||
case "lz4":
|
||||
return lz4.NewReader(file), nil
|
||||
case "lzw":
|
||||
return lzw.NewReader(file, lzw.LSB, 8), nil
|
||||
case "none":
|
||||
return io.NopCloser(file), nil
|
||||
case "snappy":
|
||||
return snappy.NewReader(file), nil
|
||||
case "zlib":
|
||||
return zlib.NewReader(file)
|
||||
case "zstd":
|
||||
|
@ -117,11 +123,22 @@ func getWriter(format string, file io.WriteCloser) (io.WriteCloser, error) {
|
|||
case "flate":
|
||||
return flate.NewWriter(file, flate.DefaultCompression)
|
||||
case "gzip":
|
||||
return gzip.NewWriter(file), nil
|
||||
return gzip.NewWriterLevel(file, gzip.BestCompression)
|
||||
case "lz4":
|
||||
encoder := lz4.NewWriter(file)
|
||||
|
||||
err := encoder.Apply(lz4.CompressionLevelOption(lz4.Level9))
|
||||
if err != nil {
|
||||
return file, err
|
||||
}
|
||||
|
||||
return encoder, nil
|
||||
case "lzw":
|
||||
return lzw.NewWriter(file, lzw.LSB, 8), nil
|
||||
case "none":
|
||||
return file, nil
|
||||
case "snappy":
|
||||
return snappy.NewBufferedWriter(file), nil
|
||||
case "zlib":
|
||||
return zlib.NewWriter(file), nil
|
||||
case "zstd":
|
||||
|
@ -202,7 +219,7 @@ func (index *fileIndex) Import(path string) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
//defer reader.Close()
|
||||
|
||||
dec := gob.NewDecoder(reader)
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ import (
|
|||
|
||||
const (
|
||||
AllowedCharacters string = `^[A-z0-9.\-_]+$`
|
||||
ReleaseVersion string = "3.10.0"
|
||||
ReleaseVersion string = "3.11.0"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -62,8 +62,10 @@ var (
|
|||
CompressionFormats = []string{
|
||||
"flate",
|
||||
"gzip",
|
||||
"lz4",
|
||||
"lzw",
|
||||
"none",
|
||||
"snappy",
|
||||
"zlib",
|
||||
"zstd",
|
||||
}
|
||||
|
@ -133,7 +135,7 @@ func init() {
|
|||
rootCmd.Flags().BoolVar(&CaseSensitive, "case-sensitive", false, "use case-sensitive matching for filters")
|
||||
rootCmd.Flags().BoolVar(&Code, "code", false, "enable support for source code files")
|
||||
rootCmd.Flags().StringVar(&CodeTheme, "code-theme", "solarized-dark256", "theme for source code syntax highlighting")
|
||||
rootCmd.Flags().StringVar(&Compression, "compression", "zstd", "compression format to use for index (flate, gzip, lzw, none, zlib, zstd)")
|
||||
rootCmd.Flags().StringVar(&Compression, "compression", "zstd", "compression format to use for index (flate, gzip, lz5, lzw, none, snappy, zlib, zstd)")
|
||||
rootCmd.Flags().IntVar(&Concurrency, "concurrency", 8192, "maximum concurrency for scan threads")
|
||||
rootCmd.Flags().BoolVar(&DisableButtons, "disable-buttons", false, "disable first/prev/next/last buttons")
|
||||
rootCmd.Flags().BoolVar(&ExitOnError, "exit-on-error", false, "shut down webserver on error, instead of just printing error")
|
||||
|
|
2
go.mod
2
go.mod
|
@ -4,8 +4,10 @@ go 1.21
|
|||
|
||||
require (
|
||||
github.com/alecthomas/chroma/v2 v2.12.0
|
||||
github.com/golang/snappy v0.0.4
|
||||
github.com/julienschmidt/httprouter v1.3.0
|
||||
github.com/klauspost/compress v1.17.4
|
||||
github.com/pierrec/lz4/v4 v4.1.19
|
||||
github.com/spf13/cobra v1.8.0
|
||||
github.com/yosssi/gohtml v0.0.0-20201013000340-ee4748c638f4
|
||||
golang.org/x/image v0.14.0
|
||||
|
|
4
go.sum
4
go.sum
|
@ -7,6 +7,8 @@ github.com/alecthomas/repr v0.2.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW5
|
|||
github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
|
||||
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
|
||||
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
|
||||
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
|
||||
|
@ -15,6 +17,8 @@ github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4d
|
|||
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
|
||||
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
|
||||
github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
||||
github.com/pierrec/lz4/v4 v4.1.19 h1:tYLzDnjDXh9qIxSTKHwXwOYmm9d887Y7Y1ZkyXYHAN4=
|
||||
github.com/pierrec/lz4/v4 v4.1.19/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
|
||||
github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
cmd/snappytool/snappytool
|
||||
testdata/bench
|
||||
|
||||
# These explicitly listed benchmark data files are for an obsolete version of
|
||||
# snappy_test.go.
|
||||
testdata/alice29.txt
|
||||
testdata/asyoulik.txt
|
||||
testdata/fireworks.jpeg
|
||||
testdata/geo.protodata
|
||||
testdata/html
|
||||
testdata/html_x_4
|
||||
testdata/kppkn.gtb
|
||||
testdata/lcet10.txt
|
||||
testdata/paper-100k.pdf
|
||||
testdata/plrabn12.txt
|
||||
testdata/urls.10K
|
|
@ -0,0 +1,18 @@
|
|||
# This is the official list of Snappy-Go authors for copyright purposes.
|
||||
# This file is distinct from the CONTRIBUTORS files.
|
||||
# See the latter for an explanation.
|
||||
|
||||
# Names should be added to this file as
|
||||
# Name or Organization <email address>
|
||||
# The email address is not required for organizations.
|
||||
|
||||
# Please keep the list sorted.
|
||||
|
||||
Amazon.com, Inc
|
||||
Damian Gryski <dgryski@gmail.com>
|
||||
Eric Buth <eric@topos.com>
|
||||
Google Inc.
|
||||
Jan Mercl <0xjnml@gmail.com>
|
||||
Klaus Post <klauspost@gmail.com>
|
||||
Rodolfo Carvalho <rhcarvalho@gmail.com>
|
||||
Sebastien Binet <seb.binet@gmail.com>
|
|
@ -0,0 +1,41 @@
|
|||
# This is the official list of people who can contribute
|
||||
# (and typically have contributed) code to the Snappy-Go repository.
|
||||
# The AUTHORS file lists the copyright holders; this file
|
||||
# lists people. For example, Google employees are listed here
|
||||
# but not in AUTHORS, because Google holds the copyright.
|
||||
#
|
||||
# The submission process automatically checks to make sure
|
||||
# that people submitting code are listed in this file (by email address).
|
||||
#
|
||||
# Names should be added to this file only after verifying that
|
||||
# the individual or the individual's organization has agreed to
|
||||
# the appropriate Contributor License Agreement, found here:
|
||||
#
|
||||
# http://code.google.com/legal/individual-cla-v1.0.html
|
||||
# http://code.google.com/legal/corporate-cla-v1.0.html
|
||||
#
|
||||
# The agreement for individuals can be filled out on the web.
|
||||
#
|
||||
# When adding J Random Contributor's name to this file,
|
||||
# either J's name or J's organization's name should be
|
||||
# added to the AUTHORS file, depending on whether the
|
||||
# individual or corporate CLA was used.
|
||||
|
||||
# Names should be added to this file like so:
|
||||
# Name <email address>
|
||||
|
||||
# Please keep the list sorted.
|
||||
|
||||
Alex Legg <alexlegg@google.com>
|
||||
Damian Gryski <dgryski@gmail.com>
|
||||
Eric Buth <eric@topos.com>
|
||||
Jan Mercl <0xjnml@gmail.com>
|
||||
Jonathan Swinney <jswinney@amazon.com>
|
||||
Kai Backman <kaib@golang.org>
|
||||
Klaus Post <klauspost@gmail.com>
|
||||
Marc-Antoine Ruel <maruel@chromium.org>
|
||||
Nigel Tao <nigeltao@golang.org>
|
||||
Rob Pike <r@golang.org>
|
||||
Rodolfo Carvalho <rhcarvalho@gmail.com>
|
||||
Russ Cox <rsc@golang.org>
|
||||
Sebastien Binet <seb.binet@gmail.com>
|
|
@ -0,0 +1,27 @@
|
|||
Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,107 @@
|
|||
The Snappy compression format in the Go programming language.
|
||||
|
||||
To download and install from source:
|
||||
$ go get github.com/golang/snappy
|
||||
|
||||
Unless otherwise noted, the Snappy-Go source files are distributed
|
||||
under the BSD-style license found in the LICENSE file.
|
||||
|
||||
|
||||
|
||||
Benchmarks.
|
||||
|
||||
The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
|
||||
or so files, the same set used by the C++ Snappy code (github.com/google/snappy
|
||||
and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
|
||||
3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
|
||||
|
||||
"go test -test.bench=."
|
||||
|
||||
_UFlat0-8 2.19GB/s ± 0% html
|
||||
_UFlat1-8 1.41GB/s ± 0% urls
|
||||
_UFlat2-8 23.5GB/s ± 2% jpg
|
||||
_UFlat3-8 1.91GB/s ± 0% jpg_200
|
||||
_UFlat4-8 14.0GB/s ± 1% pdf
|
||||
_UFlat5-8 1.97GB/s ± 0% html4
|
||||
_UFlat6-8 814MB/s ± 0% txt1
|
||||
_UFlat7-8 785MB/s ± 0% txt2
|
||||
_UFlat8-8 857MB/s ± 0% txt3
|
||||
_UFlat9-8 719MB/s ± 1% txt4
|
||||
_UFlat10-8 2.84GB/s ± 0% pb
|
||||
_UFlat11-8 1.05GB/s ± 0% gaviota
|
||||
|
||||
_ZFlat0-8 1.04GB/s ± 0% html
|
||||
_ZFlat1-8 534MB/s ± 0% urls
|
||||
_ZFlat2-8 15.7GB/s ± 1% jpg
|
||||
_ZFlat3-8 740MB/s ± 3% jpg_200
|
||||
_ZFlat4-8 9.20GB/s ± 1% pdf
|
||||
_ZFlat5-8 991MB/s ± 0% html4
|
||||
_ZFlat6-8 379MB/s ± 0% txt1
|
||||
_ZFlat7-8 352MB/s ± 0% txt2
|
||||
_ZFlat8-8 396MB/s ± 1% txt3
|
||||
_ZFlat9-8 327MB/s ± 1% txt4
|
||||
_ZFlat10-8 1.33GB/s ± 1% pb
|
||||
_ZFlat11-8 605MB/s ± 1% gaviota
|
||||
|
||||
|
||||
|
||||
"go test -test.bench=. -tags=noasm"
|
||||
|
||||
_UFlat0-8 621MB/s ± 2% html
|
||||
_UFlat1-8 494MB/s ± 1% urls
|
||||
_UFlat2-8 23.2GB/s ± 1% jpg
|
||||
_UFlat3-8 1.12GB/s ± 1% jpg_200
|
||||
_UFlat4-8 4.35GB/s ± 1% pdf
|
||||
_UFlat5-8 609MB/s ± 0% html4
|
||||
_UFlat6-8 296MB/s ± 0% txt1
|
||||
_UFlat7-8 288MB/s ± 0% txt2
|
||||
_UFlat8-8 309MB/s ± 1% txt3
|
||||
_UFlat9-8 280MB/s ± 1% txt4
|
||||
_UFlat10-8 753MB/s ± 0% pb
|
||||
_UFlat11-8 400MB/s ± 0% gaviota
|
||||
|
||||
_ZFlat0-8 409MB/s ± 1% html
|
||||
_ZFlat1-8 250MB/s ± 1% urls
|
||||
_ZFlat2-8 12.3GB/s ± 1% jpg
|
||||
_ZFlat3-8 132MB/s ± 0% jpg_200
|
||||
_ZFlat4-8 2.92GB/s ± 0% pdf
|
||||
_ZFlat5-8 405MB/s ± 1% html4
|
||||
_ZFlat6-8 179MB/s ± 1% txt1
|
||||
_ZFlat7-8 170MB/s ± 1% txt2
|
||||
_ZFlat8-8 189MB/s ± 1% txt3
|
||||
_ZFlat9-8 164MB/s ± 1% txt4
|
||||
_ZFlat10-8 479MB/s ± 1% pb
|
||||
_ZFlat11-8 270MB/s ± 1% gaviota
|
||||
|
||||
|
||||
|
||||
For comparison (Go's encoded output is byte-for-byte identical to C++'s), here
|
||||
are the numbers from C++ Snappy's
|
||||
|
||||
make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log
|
||||
|
||||
BM_UFlat/0 2.4GB/s html
|
||||
BM_UFlat/1 1.4GB/s urls
|
||||
BM_UFlat/2 21.8GB/s jpg
|
||||
BM_UFlat/3 1.5GB/s jpg_200
|
||||
BM_UFlat/4 13.3GB/s pdf
|
||||
BM_UFlat/5 2.1GB/s html4
|
||||
BM_UFlat/6 1.0GB/s txt1
|
||||
BM_UFlat/7 959.4MB/s txt2
|
||||
BM_UFlat/8 1.0GB/s txt3
|
||||
BM_UFlat/9 864.5MB/s txt4
|
||||
BM_UFlat/10 2.9GB/s pb
|
||||
BM_UFlat/11 1.2GB/s gaviota
|
||||
|
||||
BM_ZFlat/0 944.3MB/s html (22.31 %)
|
||||
BM_ZFlat/1 501.6MB/s urls (47.78 %)
|
||||
BM_ZFlat/2 14.3GB/s jpg (99.95 %)
|
||||
BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %)
|
||||
BM_ZFlat/4 8.3GB/s pdf (83.30 %)
|
||||
BM_ZFlat/5 903.5MB/s html4 (22.52 %)
|
||||
BM_ZFlat/6 336.0MB/s txt1 (57.88 %)
|
||||
BM_ZFlat/7 312.3MB/s txt2 (61.91 %)
|
||||
BM_ZFlat/8 353.1MB/s txt3 (54.99 %)
|
||||
BM_ZFlat/9 289.9MB/s txt4 (66.26 %)
|
||||
BM_ZFlat/10 1.2GB/s pb (19.68 %)
|
||||
BM_ZFlat/11 527.4MB/s gaviota (37.72 %)
|
|
@ -0,0 +1,264 @@
|
|||
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package snappy
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrCorrupt reports that the input is invalid.
|
||||
ErrCorrupt = errors.New("snappy: corrupt input")
|
||||
// ErrTooLarge reports that the uncompressed length is too large.
|
||||
ErrTooLarge = errors.New("snappy: decoded block is too large")
|
||||
// ErrUnsupported reports that the input isn't supported.
|
||||
ErrUnsupported = errors.New("snappy: unsupported input")
|
||||
|
||||
errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
|
||||
)
|
||||
|
||||
// DecodedLen returns the length of the decoded block.
|
||||
func DecodedLen(src []byte) (int, error) {
|
||||
v, _, err := decodedLen(src)
|
||||
return v, err
|
||||
}
|
||||
|
||||
// decodedLen returns the length of the decoded block and the number of bytes
|
||||
// that the length header occupied.
|
||||
func decodedLen(src []byte) (blockLen, headerLen int, err error) {
|
||||
v, n := binary.Uvarint(src)
|
||||
if n <= 0 || v > 0xffffffff {
|
||||
return 0, 0, ErrCorrupt
|
||||
}
|
||||
|
||||
const wordSize = 32 << (^uint(0) >> 32 & 1)
|
||||
if wordSize == 32 && v > 0x7fffffff {
|
||||
return 0, 0, ErrTooLarge
|
||||
}
|
||||
return int(v), n, nil
|
||||
}
|
||||
|
||||
const (
|
||||
decodeErrCodeCorrupt = 1
|
||||
decodeErrCodeUnsupportedLiteralLength = 2
|
||||
)
|
||||
|
||||
// Decode returns the decoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire decoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
//
|
||||
// Decode handles the Snappy block format, not the Snappy stream format.
|
||||
func Decode(dst, src []byte) ([]byte, error) {
|
||||
dLen, s, err := decodedLen(src)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if dLen <= len(dst) {
|
||||
dst = dst[:dLen]
|
||||
} else {
|
||||
dst = make([]byte, dLen)
|
||||
}
|
||||
switch decode(dst, src[s:]) {
|
||||
case 0:
|
||||
return dst, nil
|
||||
case decodeErrCodeUnsupportedLiteralLength:
|
||||
return nil, errUnsupportedLiteralLength
|
||||
}
|
||||
return nil, ErrCorrupt
|
||||
}
|
||||
|
||||
// NewReader returns a new Reader that decompresses from r, using the framing
|
||||
// format described at
|
||||
// https://github.com/google/snappy/blob/master/framing_format.txt
|
||||
func NewReader(r io.Reader) *Reader {
|
||||
return &Reader{
|
||||
r: r,
|
||||
decoded: make([]byte, maxBlockSize),
|
||||
buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize),
|
||||
}
|
||||
}
|
||||
|
||||
// Reader is an io.Reader that can read Snappy-compressed bytes.
|
||||
//
|
||||
// Reader handles the Snappy stream format, not the Snappy block format.
|
||||
type Reader struct {
|
||||
r io.Reader
|
||||
err error
|
||||
decoded []byte
|
||||
buf []byte
|
||||
// decoded[i:j] contains decoded bytes that have not yet been passed on.
|
||||
i, j int
|
||||
readHeader bool
|
||||
}
|
||||
|
||||
// Reset discards any buffered data, resets all state, and switches the Snappy
|
||||
// reader to read from r. This permits reusing a Reader rather than allocating
|
||||
// a new one.
|
||||
func (r *Reader) Reset(reader io.Reader) {
|
||||
r.r = reader
|
||||
r.err = nil
|
||||
r.i = 0
|
||||
r.j = 0
|
||||
r.readHeader = false
|
||||
}
|
||||
|
||||
func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
|
||||
if _, r.err = io.ReadFull(r.r, p); r.err != nil {
|
||||
if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
|
||||
r.err = ErrCorrupt
|
||||
}
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *Reader) fill() error {
|
||||
for r.i >= r.j {
|
||||
if !r.readFull(r.buf[:4], true) {
|
||||
return r.err
|
||||
}
|
||||
chunkType := r.buf[0]
|
||||
if !r.readHeader {
|
||||
if chunkType != chunkTypeStreamIdentifier {
|
||||
r.err = ErrCorrupt
|
||||
return r.err
|
||||
}
|
||||
r.readHeader = true
|
||||
}
|
||||
chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
|
||||
if chunkLen > len(r.buf) {
|
||||
r.err = ErrUnsupported
|
||||
return r.err
|
||||
}
|
||||
|
||||
// The chunk types are specified at
|
||||
// https://github.com/google/snappy/blob/master/framing_format.txt
|
||||
switch chunkType {
|
||||
case chunkTypeCompressedData:
|
||||
// Section 4.2. Compressed data (chunk type 0x00).
|
||||
if chunkLen < checksumSize {
|
||||
r.err = ErrCorrupt
|
||||
return r.err
|
||||
}
|
||||
buf := r.buf[:chunkLen]
|
||||
if !r.readFull(buf, false) {
|
||||
return r.err
|
||||
}
|
||||
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
|
||||
buf = buf[checksumSize:]
|
||||
|
||||
n, err := DecodedLen(buf)
|
||||
if err != nil {
|
||||
r.err = err
|
||||
return r.err
|
||||
}
|
||||
if n > len(r.decoded) {
|
||||
r.err = ErrCorrupt
|
||||
return r.err
|
||||
}
|
||||
if _, err := Decode(r.decoded, buf); err != nil {
|
||||
r.err = err
|
||||
return r.err
|
||||
}
|
||||
if crc(r.decoded[:n]) != checksum {
|
||||
r.err = ErrCorrupt
|
||||
return r.err
|
||||
}
|
||||
r.i, r.j = 0, n
|
||||
continue
|
||||
|
||||
case chunkTypeUncompressedData:
|
||||
// Section 4.3. Uncompressed data (chunk type 0x01).
|
||||
if chunkLen < checksumSize {
|
||||
r.err = ErrCorrupt
|
||||
return r.err
|
||||
}
|
||||
buf := r.buf[:checksumSize]
|
||||
if !r.readFull(buf, false) {
|
||||
return r.err
|
||||
}
|
||||
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
|
||||
// Read directly into r.decoded instead of via r.buf.
|
||||
n := chunkLen - checksumSize
|
||||
if n > len(r.decoded) {
|
||||
r.err = ErrCorrupt
|
||||
return r.err
|
||||
}
|
||||
if !r.readFull(r.decoded[:n], false) {
|
||||
return r.err
|
||||
}
|
||||
if crc(r.decoded[:n]) != checksum {
|
||||
r.err = ErrCorrupt
|
||||
return r.err
|
||||
}
|
||||
r.i, r.j = 0, n
|
||||
continue
|
||||
|
||||
case chunkTypeStreamIdentifier:
|
||||
// Section 4.1. Stream identifier (chunk type 0xff).
|
||||
if chunkLen != len(magicBody) {
|
||||
r.err = ErrCorrupt
|
||||
return r.err
|
||||
}
|
||||
if !r.readFull(r.buf[:len(magicBody)], false) {
|
||||
return r.err
|
||||
}
|
||||
for i := 0; i < len(magicBody); i++ {
|
||||
if r.buf[i] != magicBody[i] {
|
||||
r.err = ErrCorrupt
|
||||
return r.err
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if chunkType <= 0x7f {
|
||||
// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
|
||||
r.err = ErrUnsupported
|
||||
return r.err
|
||||
}
|
||||
// Section 4.4 Padding (chunk type 0xfe).
|
||||
// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
|
||||
if !r.readFull(r.buf[:chunkLen], false) {
|
||||
return r.err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read satisfies the io.Reader interface.
|
||||
func (r *Reader) Read(p []byte) (int, error) {
|
||||
if r.err != nil {
|
||||
return 0, r.err
|
||||
}
|
||||
|
||||
if err := r.fill(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
n := copy(p, r.decoded[r.i:r.j])
|
||||
r.i += n
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// ReadByte satisfies the io.ByteReader interface.
|
||||
func (r *Reader) ReadByte() (byte, error) {
|
||||
if r.err != nil {
|
||||
return 0, r.err
|
||||
}
|
||||
|
||||
if err := r.fill(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
c := r.decoded[r.i]
|
||||
r.i++
|
||||
return c, nil
|
||||
}
|
|
@ -0,0 +1,490 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// The asm code generally follows the pure Go code in decode_other.go, except
|
||||
// where marked with a "!!!".
|
||||
|
||||
// func decode(dst, src []byte) int
|
||||
//
|
||||
// All local variables fit into registers. The non-zero stack size is only to
|
||||
// spill registers and push args when issuing a CALL. The register allocation:
|
||||
// - AX scratch
|
||||
// - BX scratch
|
||||
// - CX length or x
|
||||
// - DX offset
|
||||
// - SI &src[s]
|
||||
// - DI &dst[d]
|
||||
// + R8 dst_base
|
||||
// + R9 dst_len
|
||||
// + R10 dst_base + dst_len
|
||||
// + R11 src_base
|
||||
// + R12 src_len
|
||||
// + R13 src_base + src_len
|
||||
// - R14 used by doCopy
|
||||
// - R15 used by doCopy
|
||||
//
|
||||
// The registers R8-R13 (marked with a "+") are set at the start of the
|
||||
// function, and after a CALL returns, and are not otherwise modified.
|
||||
//
|
||||
// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI.
|
||||
// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI.
|
||||
TEXT ·decode(SB), NOSPLIT, $48-56
|
||||
// Initialize SI, DI and R8-R13.
|
||||
MOVQ dst_base+0(FP), R8
|
||||
MOVQ dst_len+8(FP), R9
|
||||
MOVQ R8, DI
|
||||
MOVQ R8, R10
|
||||
ADDQ R9, R10
|
||||
MOVQ src_base+24(FP), R11
|
||||
MOVQ src_len+32(FP), R12
|
||||
MOVQ R11, SI
|
||||
MOVQ R11, R13
|
||||
ADDQ R12, R13
|
||||
|
||||
loop:
|
||||
// for s < len(src)
|
||||
CMPQ SI, R13
|
||||
JEQ end
|
||||
|
||||
// CX = uint32(src[s])
|
||||
//
|
||||
// switch src[s] & 0x03
|
||||
MOVBLZX (SI), CX
|
||||
MOVL CX, BX
|
||||
ANDL $3, BX
|
||||
CMPL BX, $1
|
||||
JAE tagCopy
|
||||
|
||||
// ----------------------------------------
|
||||
// The code below handles literal tags.
|
||||
|
||||
// case tagLiteral:
|
||||
// x := uint32(src[s] >> 2)
|
||||
// switch
|
||||
SHRL $2, CX
|
||||
CMPL CX, $60
|
||||
JAE tagLit60Plus
|
||||
|
||||
// case x < 60:
|
||||
// s++
|
||||
INCQ SI
|
||||
|
||||
doLit:
|
||||
// This is the end of the inner "switch", when we have a literal tag.
|
||||
//
|
||||
// We assume that CX == x and x fits in a uint32, where x is the variable
|
||||
// used in the pure Go decode_other.go code.
|
||||
|
||||
// length = int(x) + 1
|
||||
//
|
||||
// Unlike the pure Go code, we don't need to check if length <= 0 because
|
||||
// CX can hold 64 bits, so the increment cannot overflow.
|
||||
INCQ CX
|
||||
|
||||
// Prepare to check if copying length bytes will run past the end of dst or
|
||||
// src.
|
||||
//
|
||||
// AX = len(dst) - d
|
||||
// BX = len(src) - s
|
||||
MOVQ R10, AX
|
||||
SUBQ DI, AX
|
||||
MOVQ R13, BX
|
||||
SUBQ SI, BX
|
||||
|
||||
// !!! Try a faster technique for short (16 or fewer bytes) copies.
|
||||
//
|
||||
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
|
||||
// goto callMemmove // Fall back on calling runtime·memmove.
|
||||
// }
|
||||
//
|
||||
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
|
||||
// against 21 instead of 16, because it cannot assume that all of its input
|
||||
// is contiguous in memory and so it needs to leave enough source bytes to
|
||||
// read the next tag without refilling buffers, but Go's Decode assumes
|
||||
// contiguousness (the src argument is a []byte).
|
||||
CMPQ CX, $16
|
||||
JGT callMemmove
|
||||
CMPQ AX, $16
|
||||
JLT callMemmove
|
||||
CMPQ BX, $16
|
||||
JLT callMemmove
|
||||
|
||||
// !!! Implement the copy from src to dst as a 16-byte load and store.
|
||||
// (Decode's documentation says that dst and src must not overlap.)
|
||||
//
|
||||
// This always copies 16 bytes, instead of only length bytes, but that's
|
||||
// OK. If the input is a valid Snappy encoding then subsequent iterations
|
||||
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
|
||||
// non-nil error), so the overrun will be ignored.
|
||||
//
|
||||
// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
|
||||
// 16-byte loads and stores. This technique probably wouldn't be as
|
||||
// effective on architectures that are fussier about alignment.
|
||||
MOVOU 0(SI), X0
|
||||
MOVOU X0, 0(DI)
|
||||
|
||||
// d += length
|
||||
// s += length
|
||||
ADDQ CX, DI
|
||||
ADDQ CX, SI
|
||||
JMP loop
|
||||
|
||||
callMemmove:
|
||||
// if length > len(dst)-d || length > len(src)-s { etc }
|
||||
CMPQ CX, AX
|
||||
JGT errCorrupt
|
||||
CMPQ CX, BX
|
||||
JGT errCorrupt
|
||||
|
||||
// copy(dst[d:], src[s:s+length])
|
||||
//
|
||||
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
|
||||
// DI, SI and CX as arguments. Coincidentally, we also need to spill those
|
||||
// three registers to the stack, to save local variables across the CALL.
|
||||
MOVQ DI, 0(SP)
|
||||
MOVQ SI, 8(SP)
|
||||
MOVQ CX, 16(SP)
|
||||
MOVQ DI, 24(SP)
|
||||
MOVQ SI, 32(SP)
|
||||
MOVQ CX, 40(SP)
|
||||
CALL runtime·memmove(SB)
|
||||
|
||||
// Restore local variables: unspill registers from the stack and
|
||||
// re-calculate R8-R13.
|
||||
MOVQ 24(SP), DI
|
||||
MOVQ 32(SP), SI
|
||||
MOVQ 40(SP), CX
|
||||
MOVQ dst_base+0(FP), R8
|
||||
MOVQ dst_len+8(FP), R9
|
||||
MOVQ R8, R10
|
||||
ADDQ R9, R10
|
||||
MOVQ src_base+24(FP), R11
|
||||
MOVQ src_len+32(FP), R12
|
||||
MOVQ R11, R13
|
||||
ADDQ R12, R13
|
||||
|
||||
// d += length
|
||||
// s += length
|
||||
ADDQ CX, DI
|
||||
ADDQ CX, SI
|
||||
JMP loop
|
||||
|
||||
tagLit60Plus:
|
||||
// !!! This fragment does the
|
||||
//
|
||||
// s += x - 58; if uint(s) > uint(len(src)) { etc }
|
||||
//
|
||||
// checks. In the asm version, we code it once instead of once per switch case.
|
||||
ADDQ CX, SI
|
||||
SUBQ $58, SI
|
||||
MOVQ SI, BX
|
||||
SUBQ R11, BX
|
||||
CMPQ BX, R12
|
||||
JA errCorrupt
|
||||
|
||||
// case x == 60:
|
||||
CMPL CX, $61
|
||||
JEQ tagLit61
|
||||
JA tagLit62Plus
|
||||
|
||||
// x = uint32(src[s-1])
|
||||
MOVBLZX -1(SI), CX
|
||||
JMP doLit
|
||||
|
||||
tagLit61:
|
||||
// case x == 61:
|
||||
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
|
||||
MOVWLZX -2(SI), CX
|
||||
JMP doLit
|
||||
|
||||
tagLit62Plus:
|
||||
CMPL CX, $62
|
||||
JA tagLit63
|
||||
|
||||
// case x == 62:
|
||||
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
|
||||
MOVWLZX -3(SI), CX
|
||||
MOVBLZX -1(SI), BX
|
||||
SHLL $16, BX
|
||||
ORL BX, CX
|
||||
JMP doLit
|
||||
|
||||
tagLit63:
|
||||
// case x == 63:
|
||||
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
|
||||
MOVL -4(SI), CX
|
||||
JMP doLit
|
||||
|
||||
// The code above handles literal tags.
|
||||
// ----------------------------------------
|
||||
// The code below handles copy tags.
|
||||
|
||||
tagCopy4:
|
||||
// case tagCopy4:
|
||||
// s += 5
|
||||
ADDQ $5, SI
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
MOVQ SI, BX
|
||||
SUBQ R11, BX
|
||||
CMPQ BX, R12
|
||||
JA errCorrupt
|
||||
|
||||
// length = 1 + int(src[s-5])>>2
|
||||
SHRQ $2, CX
|
||||
INCQ CX
|
||||
|
||||
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
|
||||
MOVLQZX -4(SI), DX
|
||||
JMP doCopy
|
||||
|
||||
tagCopy2:
|
||||
// case tagCopy2:
|
||||
// s += 3
|
||||
ADDQ $3, SI
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
MOVQ SI, BX
|
||||
SUBQ R11, BX
|
||||
CMPQ BX, R12
|
||||
JA errCorrupt
|
||||
|
||||
// length = 1 + int(src[s-3])>>2
|
||||
SHRQ $2, CX
|
||||
INCQ CX
|
||||
|
||||
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
|
||||
MOVWQZX -2(SI), DX
|
||||
JMP doCopy
|
||||
|
||||
tagCopy:
|
||||
// We have a copy tag. We assume that:
|
||||
// - BX == src[s] & 0x03
|
||||
// - CX == src[s]
|
||||
CMPQ BX, $2
|
||||
JEQ tagCopy2
|
||||
JA tagCopy4
|
||||
|
||||
// case tagCopy1:
|
||||
// s += 2
|
||||
ADDQ $2, SI
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
MOVQ SI, BX
|
||||
SUBQ R11, BX
|
||||
CMPQ BX, R12
|
||||
JA errCorrupt
|
||||
|
||||
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||
MOVQ CX, DX
|
||||
ANDQ $0xe0, DX
|
||||
SHLQ $3, DX
|
||||
MOVBQZX -1(SI), BX
|
||||
ORQ BX, DX
|
||||
|
||||
// length = 4 + int(src[s-2])>>2&0x7
|
||||
SHRQ $2, CX
|
||||
ANDQ $7, CX
|
||||
ADDQ $4, CX
|
||||
|
||||
doCopy:
|
||||
// This is the end of the outer "switch", when we have a copy tag.
|
||||
//
|
||||
// We assume that:
|
||||
// - CX == length && CX > 0
|
||||
// - DX == offset
|
||||
|
||||
// if offset <= 0 { etc }
|
||||
CMPQ DX, $0
|
||||
JLE errCorrupt
|
||||
|
||||
// if d < offset { etc }
|
||||
MOVQ DI, BX
|
||||
SUBQ R8, BX
|
||||
CMPQ BX, DX
|
||||
JLT errCorrupt
|
||||
|
||||
// if length > len(dst)-d { etc }
|
||||
MOVQ R10, BX
|
||||
SUBQ DI, BX
|
||||
CMPQ CX, BX
|
||||
JGT errCorrupt
|
||||
|
||||
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
|
||||
//
|
||||
// Set:
|
||||
// - R14 = len(dst)-d
|
||||
// - R15 = &dst[d-offset]
|
||||
MOVQ R10, R14
|
||||
SUBQ DI, R14
|
||||
MOVQ DI, R15
|
||||
SUBQ DX, R15
|
||||
|
||||
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
|
||||
//
|
||||
// First, try using two 8-byte load/stores, similar to the doLit technique
|
||||
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
|
||||
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
|
||||
// and not one 16-byte load/store, and the first store has to be before the
|
||||
// second load, due to the overlap if offset is in the range [8, 16).
|
||||
//
|
||||
// if length > 16 || offset < 8 || len(dst)-d < 16 {
|
||||
// goto slowForwardCopy
|
||||
// }
|
||||
// copy 16 bytes
|
||||
// d += length
|
||||
CMPQ CX, $16
|
||||
JGT slowForwardCopy
|
||||
CMPQ DX, $8
|
||||
JLT slowForwardCopy
|
||||
CMPQ R14, $16
|
||||
JLT slowForwardCopy
|
||||
MOVQ 0(R15), AX
|
||||
MOVQ AX, 0(DI)
|
||||
MOVQ 8(R15), BX
|
||||
MOVQ BX, 8(DI)
|
||||
ADDQ CX, DI
|
||||
JMP loop
|
||||
|
||||
slowForwardCopy:
|
||||
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
|
||||
// can still try 8-byte load stores, provided we can overrun up to 10 extra
|
||||
// bytes. As above, the overrun will be fixed up by subsequent iterations
|
||||
// of the outermost loop.
|
||||
//
|
||||
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
|
||||
// commentary says:
|
||||
//
|
||||
// ----
|
||||
//
|
||||
// The main part of this loop is a simple copy of eight bytes at a time
|
||||
// until we've copied (at least) the requested amount of bytes. However,
|
||||
// if d and d-offset are less than eight bytes apart (indicating a
|
||||
// repeating pattern of length < 8), we first need to expand the pattern in
|
||||
// order to get the correct results. For instance, if the buffer looks like
|
||||
// this, with the eight-byte <d-offset> and <d> patterns marked as
|
||||
// intervals:
|
||||
//
|
||||
// abxxxxxxxxxxxx
|
||||
// [------] d-offset
|
||||
// [------] d
|
||||
//
|
||||
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
|
||||
// once, after which we can move <d> two bytes without moving <d-offset>:
|
||||
//
|
||||
// ababxxxxxxxxxx
|
||||
// [------] d-offset
|
||||
// [------] d
|
||||
//
|
||||
// and repeat the exercise until the two no longer overlap.
|
||||
//
|
||||
// This allows us to do very well in the special case of one single byte
|
||||
// repeated many times, without taking a big hit for more general cases.
|
||||
//
|
||||
// The worst case of extra writing past the end of the match occurs when
|
||||
// offset == 1 and length == 1; the last copy will read from byte positions
|
||||
// [0..7] and write to [4..11], whereas it was only supposed to write to
|
||||
// position 1. Thus, ten excess bytes.
|
||||
//
|
||||
// ----
|
||||
//
|
||||
// That "10 byte overrun" worst case is confirmed by Go's
|
||||
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
|
||||
// and finishSlowForwardCopy algorithm.
|
||||
//
|
||||
// if length > len(dst)-d-10 {
|
||||
// goto verySlowForwardCopy
|
||||
// }
|
||||
SUBQ $10, R14
|
||||
CMPQ CX, R14
|
||||
JGT verySlowForwardCopy
|
||||
|
||||
makeOffsetAtLeast8:
|
||||
// !!! As above, expand the pattern so that offset >= 8 and we can use
|
||||
// 8-byte load/stores.
|
||||
//
|
||||
// for offset < 8 {
|
||||
// copy 8 bytes from dst[d-offset:] to dst[d:]
|
||||
// length -= offset
|
||||
// d += offset
|
||||
// offset += offset
|
||||
// // The two previous lines together means that d-offset, and therefore
|
||||
// // R15, is unchanged.
|
||||
// }
|
||||
CMPQ DX, $8
|
||||
JGE fixUpSlowForwardCopy
|
||||
MOVQ (R15), BX
|
||||
MOVQ BX, (DI)
|
||||
SUBQ DX, CX
|
||||
ADDQ DX, DI
|
||||
ADDQ DX, DX
|
||||
JMP makeOffsetAtLeast8
|
||||
|
||||
fixUpSlowForwardCopy:
|
||||
// !!! Add length (which might be negative now) to d (implied by DI being
|
||||
// &dst[d]) so that d ends up at the right place when we jump back to the
|
||||
// top of the loop. Before we do that, though, we save DI to AX so that, if
|
||||
// length is positive, copying the remaining length bytes will write to the
|
||||
// right place.
|
||||
MOVQ DI, AX
|
||||
ADDQ CX, DI
|
||||
|
||||
finishSlowForwardCopy:
|
||||
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
|
||||
// length means that we overrun, but as above, that will be fixed up by
|
||||
// subsequent iterations of the outermost loop.
|
||||
CMPQ CX, $0
|
||||
JLE loop
|
||||
MOVQ (R15), BX
|
||||
MOVQ BX, (AX)
|
||||
ADDQ $8, R15
|
||||
ADDQ $8, AX
|
||||
SUBQ $8, CX
|
||||
JMP finishSlowForwardCopy
|
||||
|
||||
verySlowForwardCopy:
|
||||
// verySlowForwardCopy is a simple implementation of forward copy. In C
|
||||
// parlance, this is a do/while loop instead of a while loop, since we know
|
||||
// that length > 0. In Go syntax:
|
||||
//
|
||||
// for {
|
||||
// dst[d] = dst[d - offset]
|
||||
// d++
|
||||
// length--
|
||||
// if length == 0 {
|
||||
// break
|
||||
// }
|
||||
// }
|
||||
MOVB (R15), BX
|
||||
MOVB BX, (DI)
|
||||
INCQ R15
|
||||
INCQ DI
|
||||
DECQ CX
|
||||
JNZ verySlowForwardCopy
|
||||
JMP loop
|
||||
|
||||
// The code above handles copy tags.
|
||||
// ----------------------------------------
|
||||
|
||||
end:
|
||||
// This is the end of the "for s < len(src)".
|
||||
//
|
||||
// if d != len(dst) { etc }
|
||||
CMPQ DI, R10
|
||||
JNE errCorrupt
|
||||
|
||||
// return 0
|
||||
MOVQ $0, ret+48(FP)
|
||||
RET
|
||||
|
||||
errCorrupt:
|
||||
// return decodeErrCodeCorrupt
|
||||
MOVQ $1, ret+48(FP)
|
||||
RET
|
|
@ -0,0 +1,494 @@
|
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// The asm code generally follows the pure Go code in decode_other.go, except
|
||||
// where marked with a "!!!".
|
||||
|
||||
// func decode(dst, src []byte) int
|
||||
//
|
||||
// All local variables fit into registers. The non-zero stack size is only to
|
||||
// spill registers and push args when issuing a CALL. The register allocation:
|
||||
// - R2 scratch
|
||||
// - R3 scratch
|
||||
// - R4 length or x
|
||||
// - R5 offset
|
||||
// - R6 &src[s]
|
||||
// - R7 &dst[d]
|
||||
// + R8 dst_base
|
||||
// + R9 dst_len
|
||||
// + R10 dst_base + dst_len
|
||||
// + R11 src_base
|
||||
// + R12 src_len
|
||||
// + R13 src_base + src_len
|
||||
// - R14 used by doCopy
|
||||
// - R15 used by doCopy
|
||||
//
|
||||
// The registers R8-R13 (marked with a "+") are set at the start of the
|
||||
// function, and after a CALL returns, and are not otherwise modified.
|
||||
//
|
||||
// The d variable is implicitly R7 - R8, and len(dst)-d is R10 - R7.
|
||||
// The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6.
|
||||
TEXT ·decode(SB), NOSPLIT, $56-56
|
||||
// Initialize R6, R7 and R8-R13.
|
||||
MOVD dst_base+0(FP), R8
|
||||
MOVD dst_len+8(FP), R9
|
||||
MOVD R8, R7
|
||||
MOVD R8, R10
|
||||
ADD R9, R10, R10
|
||||
MOVD src_base+24(FP), R11
|
||||
MOVD src_len+32(FP), R12
|
||||
MOVD R11, R6
|
||||
MOVD R11, R13
|
||||
ADD R12, R13, R13
|
||||
|
||||
loop:
|
||||
// for s < len(src)
|
||||
CMP R13, R6
|
||||
BEQ end
|
||||
|
||||
// R4 = uint32(src[s])
|
||||
//
|
||||
// switch src[s] & 0x03
|
||||
MOVBU (R6), R4
|
||||
MOVW R4, R3
|
||||
ANDW $3, R3
|
||||
MOVW $1, R1
|
||||
CMPW R1, R3
|
||||
BGE tagCopy
|
||||
|
||||
// ----------------------------------------
|
||||
// The code below handles literal tags.
|
||||
|
||||
// case tagLiteral:
|
||||
// x := uint32(src[s] >> 2)
|
||||
// switch
|
||||
MOVW $60, R1
|
||||
LSRW $2, R4, R4
|
||||
CMPW R4, R1
|
||||
BLS tagLit60Plus
|
||||
|
||||
// case x < 60:
|
||||
// s++
|
||||
ADD $1, R6, R6
|
||||
|
||||
doLit:
|
||||
// This is the end of the inner "switch", when we have a literal tag.
|
||||
//
|
||||
// We assume that R4 == x and x fits in a uint32, where x is the variable
|
||||
// used in the pure Go decode_other.go code.
|
||||
|
||||
// length = int(x) + 1
|
||||
//
|
||||
// Unlike the pure Go code, we don't need to check if length <= 0 because
|
||||
// R4 can hold 64 bits, so the increment cannot overflow.
|
||||
ADD $1, R4, R4
|
||||
|
||||
// Prepare to check if copying length bytes will run past the end of dst or
|
||||
// src.
|
||||
//
|
||||
// R2 = len(dst) - d
|
||||
// R3 = len(src) - s
|
||||
MOVD R10, R2
|
||||
SUB R7, R2, R2
|
||||
MOVD R13, R3
|
||||
SUB R6, R3, R3
|
||||
|
||||
// !!! Try a faster technique for short (16 or fewer bytes) copies.
|
||||
//
|
||||
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
|
||||
// goto callMemmove // Fall back on calling runtime·memmove.
|
||||
// }
|
||||
//
|
||||
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
|
||||
// against 21 instead of 16, because it cannot assume that all of its input
|
||||
// is contiguous in memory and so it needs to leave enough source bytes to
|
||||
// read the next tag without refilling buffers, but Go's Decode assumes
|
||||
// contiguousness (the src argument is a []byte).
|
||||
CMP $16, R4
|
||||
BGT callMemmove
|
||||
CMP $16, R2
|
||||
BLT callMemmove
|
||||
CMP $16, R3
|
||||
BLT callMemmove
|
||||
|
||||
// !!! Implement the copy from src to dst as a 16-byte load and store.
|
||||
// (Decode's documentation says that dst and src must not overlap.)
|
||||
//
|
||||
// This always copies 16 bytes, instead of only length bytes, but that's
|
||||
// OK. If the input is a valid Snappy encoding then subsequent iterations
|
||||
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
|
||||
// non-nil error), so the overrun will be ignored.
|
||||
//
|
||||
// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
|
||||
// 16-byte loads and stores. This technique probably wouldn't be as
|
||||
// effective on architectures that are fussier about alignment.
|
||||
LDP 0(R6), (R14, R15)
|
||||
STP (R14, R15), 0(R7)
|
||||
|
||||
// d += length
|
||||
// s += length
|
||||
ADD R4, R7, R7
|
||||
ADD R4, R6, R6
|
||||
B loop
|
||||
|
||||
callMemmove:
|
||||
// if length > len(dst)-d || length > len(src)-s { etc }
|
||||
CMP R2, R4
|
||||
BGT errCorrupt
|
||||
CMP R3, R4
|
||||
BGT errCorrupt
|
||||
|
||||
// copy(dst[d:], src[s:s+length])
|
||||
//
|
||||
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
|
||||
// R7, R6 and R4 as arguments. Coincidentally, we also need to spill those
|
||||
// three registers to the stack, to save local variables across the CALL.
|
||||
MOVD R7, 8(RSP)
|
||||
MOVD R6, 16(RSP)
|
||||
MOVD R4, 24(RSP)
|
||||
MOVD R7, 32(RSP)
|
||||
MOVD R6, 40(RSP)
|
||||
MOVD R4, 48(RSP)
|
||||
CALL runtime·memmove(SB)
|
||||
|
||||
// Restore local variables: unspill registers from the stack and
|
||||
// re-calculate R8-R13.
|
||||
MOVD 32(RSP), R7
|
||||
MOVD 40(RSP), R6
|
||||
MOVD 48(RSP), R4
|
||||
MOVD dst_base+0(FP), R8
|
||||
MOVD dst_len+8(FP), R9
|
||||
MOVD R8, R10
|
||||
ADD R9, R10, R10
|
||||
MOVD src_base+24(FP), R11
|
||||
MOVD src_len+32(FP), R12
|
||||
MOVD R11, R13
|
||||
ADD R12, R13, R13
|
||||
|
||||
// d += length
|
||||
// s += length
|
||||
ADD R4, R7, R7
|
||||
ADD R4, R6, R6
|
||||
B loop
|
||||
|
||||
tagLit60Plus:
|
||||
// !!! This fragment does the
|
||||
//
|
||||
// s += x - 58; if uint(s) > uint(len(src)) { etc }
|
||||
//
|
||||
// checks. In the asm version, we code it once instead of once per switch case.
|
||||
ADD R4, R6, R6
|
||||
SUB $58, R6, R6
|
||||
MOVD R6, R3
|
||||
SUB R11, R3, R3
|
||||
CMP R12, R3
|
||||
BGT errCorrupt
|
||||
|
||||
// case x == 60:
|
||||
MOVW $61, R1
|
||||
CMPW R1, R4
|
||||
BEQ tagLit61
|
||||
BGT tagLit62Plus
|
||||
|
||||
// x = uint32(src[s-1])
|
||||
MOVBU -1(R6), R4
|
||||
B doLit
|
||||
|
||||
tagLit61:
|
||||
// case x == 61:
|
||||
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
|
||||
MOVHU -2(R6), R4
|
||||
B doLit
|
||||
|
||||
tagLit62Plus:
|
||||
CMPW $62, R4
|
||||
BHI tagLit63
|
||||
|
||||
// case x == 62:
|
||||
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
|
||||
MOVHU -3(R6), R4
|
||||
MOVBU -1(R6), R3
|
||||
ORR R3<<16, R4
|
||||
B doLit
|
||||
|
||||
tagLit63:
|
||||
// case x == 63:
|
||||
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
|
||||
MOVWU -4(R6), R4
|
||||
B doLit
|
||||
|
||||
// The code above handles literal tags.
|
||||
// ----------------------------------------
|
||||
// The code below handles copy tags.
|
||||
|
||||
tagCopy4:
|
||||
// case tagCopy4:
|
||||
// s += 5
|
||||
ADD $5, R6, R6
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
MOVD R6, R3
|
||||
SUB R11, R3, R3
|
||||
CMP R12, R3
|
||||
BGT errCorrupt
|
||||
|
||||
// length = 1 + int(src[s-5])>>2
|
||||
MOVD $1, R1
|
||||
ADD R4>>2, R1, R4
|
||||
|
||||
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
|
||||
MOVWU -4(R6), R5
|
||||
B doCopy
|
||||
|
||||
tagCopy2:
|
||||
// case tagCopy2:
|
||||
// s += 3
|
||||
ADD $3, R6, R6
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
MOVD R6, R3
|
||||
SUB R11, R3, R3
|
||||
CMP R12, R3
|
||||
BGT errCorrupt
|
||||
|
||||
// length = 1 + int(src[s-3])>>2
|
||||
MOVD $1, R1
|
||||
ADD R4>>2, R1, R4
|
||||
|
||||
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
|
||||
MOVHU -2(R6), R5
|
||||
B doCopy
|
||||
|
||||
tagCopy:
|
||||
// We have a copy tag. We assume that:
|
||||
// - R3 == src[s] & 0x03
|
||||
// - R4 == src[s]
|
||||
CMP $2, R3
|
||||
BEQ tagCopy2
|
||||
BGT tagCopy4
|
||||
|
||||
// case tagCopy1:
|
||||
// s += 2
|
||||
ADD $2, R6, R6
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
MOVD R6, R3
|
||||
SUB R11, R3, R3
|
||||
CMP R12, R3
|
||||
BGT errCorrupt
|
||||
|
||||
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||
MOVD R4, R5
|
||||
AND $0xe0, R5
|
||||
MOVBU -1(R6), R3
|
||||
ORR R5<<3, R3, R5
|
||||
|
||||
// length = 4 + int(src[s-2])>>2&0x7
|
||||
MOVD $7, R1
|
||||
AND R4>>2, R1, R4
|
||||
ADD $4, R4, R4
|
||||
|
||||
doCopy:
|
||||
// This is the end of the outer "switch", when we have a copy tag.
|
||||
//
|
||||
// We assume that:
|
||||
// - R4 == length && R4 > 0
|
||||
// - R5 == offset
|
||||
|
||||
// if offset <= 0 { etc }
|
||||
MOVD $0, R1
|
||||
CMP R1, R5
|
||||
BLE errCorrupt
|
||||
|
||||
// if d < offset { etc }
|
||||
MOVD R7, R3
|
||||
SUB R8, R3, R3
|
||||
CMP R5, R3
|
||||
BLT errCorrupt
|
||||
|
||||
// if length > len(dst)-d { etc }
|
||||
MOVD R10, R3
|
||||
SUB R7, R3, R3
|
||||
CMP R3, R4
|
||||
BGT errCorrupt
|
||||
|
||||
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
|
||||
//
|
||||
// Set:
|
||||
// - R14 = len(dst)-d
|
||||
// - R15 = &dst[d-offset]
|
||||
MOVD R10, R14
|
||||
SUB R7, R14, R14
|
||||
MOVD R7, R15
|
||||
SUB R5, R15, R15
|
||||
|
||||
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
|
||||
//
|
||||
// First, try using two 8-byte load/stores, similar to the doLit technique
|
||||
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
|
||||
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
|
||||
// and not one 16-byte load/store, and the first store has to be before the
|
||||
// second load, due to the overlap if offset is in the range [8, 16).
|
||||
//
|
||||
// if length > 16 || offset < 8 || len(dst)-d < 16 {
|
||||
// goto slowForwardCopy
|
||||
// }
|
||||
// copy 16 bytes
|
||||
// d += length
|
||||
CMP $16, R4
|
||||
BGT slowForwardCopy
|
||||
CMP $8, R5
|
||||
BLT slowForwardCopy
|
||||
CMP $16, R14
|
||||
BLT slowForwardCopy
|
||||
MOVD 0(R15), R2
|
||||
MOVD R2, 0(R7)
|
||||
MOVD 8(R15), R3
|
||||
MOVD R3, 8(R7)
|
||||
ADD R4, R7, R7
|
||||
B loop
|
||||
|
||||
slowForwardCopy:
|
||||
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
|
||||
// can still try 8-byte load stores, provided we can overrun up to 10 extra
|
||||
// bytes. As above, the overrun will be fixed up by subsequent iterations
|
||||
// of the outermost loop.
|
||||
//
|
||||
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
|
||||
// commentary says:
|
||||
//
|
||||
// ----
|
||||
//
|
||||
// The main part of this loop is a simple copy of eight bytes at a time
|
||||
// until we've copied (at least) the requested amount of bytes. However,
|
||||
// if d and d-offset are less than eight bytes apart (indicating a
|
||||
// repeating pattern of length < 8), we first need to expand the pattern in
|
||||
// order to get the correct results. For instance, if the buffer looks like
|
||||
// this, with the eight-byte <d-offset> and <d> patterns marked as
|
||||
// intervals:
|
||||
//
|
||||
// abxxxxxxxxxxxx
|
||||
// [------] d-offset
|
||||
// [------] d
|
||||
//
|
||||
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
|
||||
// once, after which we can move <d> two bytes without moving <d-offset>:
|
||||
//
|
||||
// ababxxxxxxxxxx
|
||||
// [------] d-offset
|
||||
// [------] d
|
||||
//
|
||||
// and repeat the exercise until the two no longer overlap.
|
||||
//
|
||||
// This allows us to do very well in the special case of one single byte
|
||||
// repeated many times, without taking a big hit for more general cases.
|
||||
//
|
||||
// The worst case of extra writing past the end of the match occurs when
|
||||
// offset == 1 and length == 1; the last copy will read from byte positions
|
||||
// [0..7] and write to [4..11], whereas it was only supposed to write to
|
||||
// position 1. Thus, ten excess bytes.
|
||||
//
|
||||
// ----
|
||||
//
|
||||
// That "10 byte overrun" worst case is confirmed by Go's
|
||||
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
|
||||
// and finishSlowForwardCopy algorithm.
|
||||
//
|
||||
// if length > len(dst)-d-10 {
|
||||
// goto verySlowForwardCopy
|
||||
// }
|
||||
SUB $10, R14, R14
|
||||
CMP R14, R4
|
||||
BGT verySlowForwardCopy
|
||||
|
||||
makeOffsetAtLeast8:
|
||||
// !!! As above, expand the pattern so that offset >= 8 and we can use
|
||||
// 8-byte load/stores.
|
||||
//
|
||||
// for offset < 8 {
|
||||
// copy 8 bytes from dst[d-offset:] to dst[d:]
|
||||
// length -= offset
|
||||
// d += offset
|
||||
// offset += offset
|
||||
// // The two previous lines together means that d-offset, and therefore
|
||||
// // R15, is unchanged.
|
||||
// }
|
||||
CMP $8, R5
|
||||
BGE fixUpSlowForwardCopy
|
||||
MOVD (R15), R3
|
||||
MOVD R3, (R7)
|
||||
SUB R5, R4, R4
|
||||
ADD R5, R7, R7
|
||||
ADD R5, R5, R5
|
||||
B makeOffsetAtLeast8
|
||||
|
||||
fixUpSlowForwardCopy:
|
||||
// !!! Add length (which might be negative now) to d (implied by R7 being
|
||||
// &dst[d]) so that d ends up at the right place when we jump back to the
|
||||
// top of the loop. Before we do that, though, we save R7 to R2 so that, if
|
||||
// length is positive, copying the remaining length bytes will write to the
|
||||
// right place.
|
||||
MOVD R7, R2
|
||||
ADD R4, R7, R7
|
||||
|
||||
finishSlowForwardCopy:
|
||||
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
|
||||
// length means that we overrun, but as above, that will be fixed up by
|
||||
// subsequent iterations of the outermost loop.
|
||||
MOVD $0, R1
|
||||
CMP R1, R4
|
||||
BLE loop
|
||||
MOVD (R15), R3
|
||||
MOVD R3, (R2)
|
||||
ADD $8, R15, R15
|
||||
ADD $8, R2, R2
|
||||
SUB $8, R4, R4
|
||||
B finishSlowForwardCopy
|
||||
|
||||
verySlowForwardCopy:
|
||||
// verySlowForwardCopy is a simple implementation of forward copy. In C
|
||||
// parlance, this is a do/while loop instead of a while loop, since we know
|
||||
// that length > 0. In Go syntax:
|
||||
//
|
||||
// for {
|
||||
// dst[d] = dst[d - offset]
|
||||
// d++
|
||||
// length--
|
||||
// if length == 0 {
|
||||
// break
|
||||
// }
|
||||
// }
|
||||
MOVB (R15), R3
|
||||
MOVB R3, (R7)
|
||||
ADD $1, R15, R15
|
||||
ADD $1, R7, R7
|
||||
SUB $1, R4, R4
|
||||
CBNZ R4, verySlowForwardCopy
|
||||
B loop
|
||||
|
||||
// The code above handles copy tags.
|
||||
// ----------------------------------------
|
||||
|
||||
end:
|
||||
// This is the end of the "for s < len(src)".
|
||||
//
|
||||
// if d != len(dst) { etc }
|
||||
CMP R10, R7
|
||||
BNE errCorrupt
|
||||
|
||||
// return 0
|
||||
MOVD $0, ret+48(FP)
|
||||
RET
|
||||
|
||||
errCorrupt:
|
||||
// return decodeErrCodeCorrupt
|
||||
MOVD $1, R2
|
||||
MOVD R2, ret+48(FP)
|
||||
RET
|
|
@ -0,0 +1,15 @@
|
|||
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
// +build amd64 arm64
|
||||
|
||||
package snappy
|
||||
|
||||
// decode has the same semantics as in decode_other.go.
|
||||
//
|
||||
//go:noescape
|
||||
func decode(dst, src []byte) int
|
|
@ -0,0 +1,115 @@
|
|||
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64,!arm64 appengine !gc noasm
|
||||
|
||||
package snappy
|
||||
|
||||
// decode writes the decoding of src to dst. It assumes that the varint-encoded
|
||||
// length of the decompressed bytes has already been read, and that len(dst)
|
||||
// equals that length.
|
||||
//
|
||||
// It returns 0 on success or a decodeErrCodeXxx error code on failure.
|
||||
func decode(dst, src []byte) int {
|
||||
var d, s, offset, length int
|
||||
for s < len(src) {
|
||||
switch src[s] & 0x03 {
|
||||
case tagLiteral:
|
||||
x := uint32(src[s] >> 2)
|
||||
switch {
|
||||
case x < 60:
|
||||
s++
|
||||
case x == 60:
|
||||
s += 2
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
x = uint32(src[s-1])
|
||||
case x == 61:
|
||||
s += 3
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
x = uint32(src[s-2]) | uint32(src[s-1])<<8
|
||||
case x == 62:
|
||||
s += 4
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
|
||||
case x == 63:
|
||||
s += 5
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
|
||||
}
|
||||
length = int(x) + 1
|
||||
if length <= 0 {
|
||||
return decodeErrCodeUnsupportedLiteralLength
|
||||
}
|
||||
if length > len(dst)-d || length > len(src)-s {
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
copy(dst[d:], src[s:s+length])
|
||||
d += length
|
||||
s += length
|
||||
continue
|
||||
|
||||
case tagCopy1:
|
||||
s += 2
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = 4 + int(src[s-2])>>2&0x7
|
||||
offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||
|
||||
case tagCopy2:
|
||||
s += 3
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = 1 + int(src[s-3])>>2
|
||||
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
|
||||
|
||||
case tagCopy4:
|
||||
s += 5
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = 1 + int(src[s-5])>>2
|
||||
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
|
||||
}
|
||||
|
||||
if offset <= 0 || d < offset || length > len(dst)-d {
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
// Copy from an earlier sub-slice of dst to a later sub-slice.
|
||||
// If no overlap, use the built-in copy:
|
||||
if offset >= length {
|
||||
copy(dst[d:d+length], dst[d-offset:])
|
||||
d += length
|
||||
continue
|
||||
}
|
||||
|
||||
// Unlike the built-in copy function, this byte-by-byte copy always runs
|
||||
// forwards, even if the slices overlap. Conceptually, this is:
|
||||
//
|
||||
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
|
||||
//
|
||||
// We align the slices into a and b and show the compiler they are the same size.
|
||||
// This allows the loop to run without bounds checks.
|
||||
a := dst[d : d+length]
|
||||
b := dst[d-offset:]
|
||||
b = b[:len(a)]
|
||||
for i := range a {
|
||||
a[i] = b[i]
|
||||
}
|
||||
d += length
|
||||
}
|
||||
if d != len(dst) {
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
return 0
|
||||
}
|
|
@ -0,0 +1,289 @@
|
|||
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package snappy
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
// Encode returns the encoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire encoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
//
|
||||
// Encode handles the Snappy block format, not the Snappy stream format.
|
||||
func Encode(dst, src []byte) []byte {
|
||||
if n := MaxEncodedLen(len(src)); n < 0 {
|
||||
panic(ErrTooLarge)
|
||||
} else if len(dst) < n {
|
||||
dst = make([]byte, n)
|
||||
}
|
||||
|
||||
// The block starts with the varint-encoded length of the decompressed bytes.
|
||||
d := binary.PutUvarint(dst, uint64(len(src)))
|
||||
|
||||
for len(src) > 0 {
|
||||
p := src
|
||||
src = nil
|
||||
if len(p) > maxBlockSize {
|
||||
p, src = p[:maxBlockSize], p[maxBlockSize:]
|
||||
}
|
||||
if len(p) < minNonLiteralBlockSize {
|
||||
d += emitLiteral(dst[d:], p)
|
||||
} else {
|
||||
d += encodeBlock(dst[d:], p)
|
||||
}
|
||||
}
|
||||
return dst[:d]
|
||||
}
|
||||
|
||||
// inputMargin is the minimum number of extra input bytes to keep, inside
|
||||
// encodeBlock's inner loop. On some architectures, this margin lets us
|
||||
// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
|
||||
// literals can be implemented as a single load to and store from a 16-byte
|
||||
// register. That literal's actual length can be as short as 1 byte, so this
|
||||
// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
|
||||
// the encoding loop will fix up the copy overrun, and this inputMargin ensures
|
||||
// that we don't overrun the dst and src buffers.
|
||||
const inputMargin = 16 - 1
|
||||
|
||||
// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
|
||||
// could be encoded with a copy tag. This is the minimum with respect to the
|
||||
// algorithm used by encodeBlock, not a minimum enforced by the file format.
|
||||
//
|
||||
// The encoded output must start with at least a 1 byte literal, as there are
|
||||
// no previous bytes to copy. A minimal (1 byte) copy after that, generated
|
||||
// from an emitCopy call in encodeBlock's main loop, would require at least
|
||||
// another inputMargin bytes, for the reason above: we want any emitLiteral
|
||||
// calls inside encodeBlock's main loop to use the fast path if possible, which
|
||||
// requires being able to overrun by inputMargin bytes. Thus,
|
||||
// minNonLiteralBlockSize equals 1 + 1 + inputMargin.
|
||||
//
|
||||
// The C++ code doesn't use this exact threshold, but it could, as discussed at
|
||||
// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
|
||||
// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
|
||||
// optimization. It should not affect the encoded form. This is tested by
|
||||
// TestSameEncodingAsCppShortCopies.
|
||||
const minNonLiteralBlockSize = 1 + 1 + inputMargin
|
||||
|
||||
// MaxEncodedLen returns the maximum length of a snappy block, given its
|
||||
// uncompressed length.
|
||||
//
|
||||
// It will return a negative value if srcLen is too large to encode.
|
||||
func MaxEncodedLen(srcLen int) int {
|
||||
n := uint64(srcLen)
|
||||
if n > 0xffffffff {
|
||||
return -1
|
||||
}
|
||||
// Compressed data can be defined as:
|
||||
// compressed := item* literal*
|
||||
// item := literal* copy
|
||||
//
|
||||
// The trailing literal sequence has a space blowup of at most 62/60
|
||||
// since a literal of length 60 needs one tag byte + one extra byte
|
||||
// for length information.
|
||||
//
|
||||
// Item blowup is trickier to measure. Suppose the "copy" op copies
|
||||
// 4 bytes of data. Because of a special check in the encoding code,
|
||||
// we produce a 4-byte copy only if the offset is < 65536. Therefore
|
||||
// the copy op takes 3 bytes to encode, and this type of item leads
|
||||
// to at most the 62/60 blowup for representing literals.
|
||||
//
|
||||
// Suppose the "copy" op copies 5 bytes of data. If the offset is big
|
||||
// enough, it will take 5 bytes to encode the copy op. Therefore the
|
||||
// worst case here is a one-byte literal followed by a five-byte copy.
|
||||
// That is, 6 bytes of input turn into 7 bytes of "compressed" data.
|
||||
//
|
||||
// This last factor dominates the blowup, so the final estimate is:
|
||||
n = 32 + n + n/6
|
||||
if n > 0xffffffff {
|
||||
return -1
|
||||
}
|
||||
return int(n)
|
||||
}
|
||||
|
||||
var errClosed = errors.New("snappy: Writer is closed")
|
||||
|
||||
// NewWriter returns a new Writer that compresses to w.
|
||||
//
|
||||
// The Writer returned does not buffer writes. There is no need to Flush or
|
||||
// Close such a Writer.
|
||||
//
|
||||
// Deprecated: the Writer returned is not suitable for many small writes, only
|
||||
// for few large writes. Use NewBufferedWriter instead, which is efficient
|
||||
// regardless of the frequency and shape of the writes, and remember to Close
|
||||
// that Writer when done.
|
||||
func NewWriter(w io.Writer) *Writer {
|
||||
return &Writer{
|
||||
w: w,
|
||||
obuf: make([]byte, obufLen),
|
||||
}
|
||||
}
|
||||
|
||||
// NewBufferedWriter returns a new Writer that compresses to w, using the
|
||||
// framing format described at
|
||||
// https://github.com/google/snappy/blob/master/framing_format.txt
|
||||
//
|
||||
// The Writer returned buffers writes. Users must call Close to guarantee all
|
||||
// data has been forwarded to the underlying io.Writer. They may also call
|
||||
// Flush zero or more times before calling Close.
|
||||
func NewBufferedWriter(w io.Writer) *Writer {
|
||||
return &Writer{
|
||||
w: w,
|
||||
ibuf: make([]byte, 0, maxBlockSize),
|
||||
obuf: make([]byte, obufLen),
|
||||
}
|
||||
}
|
||||
|
||||
// Writer is an io.Writer that can write Snappy-compressed bytes.
|
||||
//
|
||||
// Writer handles the Snappy stream format, not the Snappy block format.
|
||||
type Writer struct {
|
||||
w io.Writer
|
||||
err error
|
||||
|
||||
// ibuf is a buffer for the incoming (uncompressed) bytes.
|
||||
//
|
||||
// Its use is optional. For backwards compatibility, Writers created by the
|
||||
// NewWriter function have ibuf == nil, do not buffer incoming bytes, and
|
||||
// therefore do not need to be Flush'ed or Close'd.
|
||||
ibuf []byte
|
||||
|
||||
// obuf is a buffer for the outgoing (compressed) bytes.
|
||||
obuf []byte
|
||||
|
||||
// wroteStreamHeader is whether we have written the stream header.
|
||||
wroteStreamHeader bool
|
||||
}
|
||||
|
||||
// Reset discards the writer's state and switches the Snappy writer to write to
|
||||
// w. This permits reusing a Writer rather than allocating a new one.
|
||||
func (w *Writer) Reset(writer io.Writer) {
|
||||
w.w = writer
|
||||
w.err = nil
|
||||
if w.ibuf != nil {
|
||||
w.ibuf = w.ibuf[:0]
|
||||
}
|
||||
w.wroteStreamHeader = false
|
||||
}
|
||||
|
||||
// Write satisfies the io.Writer interface.
|
||||
func (w *Writer) Write(p []byte) (nRet int, errRet error) {
|
||||
if w.ibuf == nil {
|
||||
// Do not buffer incoming bytes. This does not perform or compress well
|
||||
// if the caller of Writer.Write writes many small slices. This
|
||||
// behavior is therefore deprecated, but still supported for backwards
|
||||
// compatibility with code that doesn't explicitly Flush or Close.
|
||||
return w.write(p)
|
||||
}
|
||||
|
||||
// The remainder of this method is based on bufio.Writer.Write from the
|
||||
// standard library.
|
||||
|
||||
for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil {
|
||||
var n int
|
||||
if len(w.ibuf) == 0 {
|
||||
// Large write, empty buffer.
|
||||
// Write directly from p to avoid copy.
|
||||
n, _ = w.write(p)
|
||||
} else {
|
||||
n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
|
||||
w.ibuf = w.ibuf[:len(w.ibuf)+n]
|
||||
w.Flush()
|
||||
}
|
||||
nRet += n
|
||||
p = p[n:]
|
||||
}
|
||||
if w.err != nil {
|
||||
return nRet, w.err
|
||||
}
|
||||
n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
|
||||
w.ibuf = w.ibuf[:len(w.ibuf)+n]
|
||||
nRet += n
|
||||
return nRet, nil
|
||||
}
|
||||
|
||||
func (w *Writer) write(p []byte) (nRet int, errRet error) {
|
||||
if w.err != nil {
|
||||
return 0, w.err
|
||||
}
|
||||
for len(p) > 0 {
|
||||
obufStart := len(magicChunk)
|
||||
if !w.wroteStreamHeader {
|
||||
w.wroteStreamHeader = true
|
||||
copy(w.obuf, magicChunk)
|
||||
obufStart = 0
|
||||
}
|
||||
|
||||
var uncompressed []byte
|
||||
if len(p) > maxBlockSize {
|
||||
uncompressed, p = p[:maxBlockSize], p[maxBlockSize:]
|
||||
} else {
|
||||
uncompressed, p = p, nil
|
||||
}
|
||||
checksum := crc(uncompressed)
|
||||
|
||||
// Compress the buffer, discarding the result if the improvement
|
||||
// isn't at least 12.5%.
|
||||
compressed := Encode(w.obuf[obufHeaderLen:], uncompressed)
|
||||
chunkType := uint8(chunkTypeCompressedData)
|
||||
chunkLen := 4 + len(compressed)
|
||||
obufEnd := obufHeaderLen + len(compressed)
|
||||
if len(compressed) >= len(uncompressed)-len(uncompressed)/8 {
|
||||
chunkType = chunkTypeUncompressedData
|
||||
chunkLen = 4 + len(uncompressed)
|
||||
obufEnd = obufHeaderLen
|
||||
}
|
||||
|
||||
// Fill in the per-chunk header that comes before the body.
|
||||
w.obuf[len(magicChunk)+0] = chunkType
|
||||
w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0)
|
||||
w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8)
|
||||
w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16)
|
||||
w.obuf[len(magicChunk)+4] = uint8(checksum >> 0)
|
||||
w.obuf[len(magicChunk)+5] = uint8(checksum >> 8)
|
||||
w.obuf[len(magicChunk)+6] = uint8(checksum >> 16)
|
||||
w.obuf[len(magicChunk)+7] = uint8(checksum >> 24)
|
||||
|
||||
if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil {
|
||||
w.err = err
|
||||
return nRet, err
|
||||
}
|
||||
if chunkType == chunkTypeUncompressedData {
|
||||
if _, err := w.w.Write(uncompressed); err != nil {
|
||||
w.err = err
|
||||
return nRet, err
|
||||
}
|
||||
}
|
||||
nRet += len(uncompressed)
|
||||
}
|
||||
return nRet, nil
|
||||
}
|
||||
|
||||
// Flush flushes the Writer to its underlying io.Writer.
|
||||
func (w *Writer) Flush() error {
|
||||
if w.err != nil {
|
||||
return w.err
|
||||
}
|
||||
if len(w.ibuf) == 0 {
|
||||
return nil
|
||||
}
|
||||
w.write(w.ibuf)
|
||||
w.ibuf = w.ibuf[:0]
|
||||
return w.err
|
||||
}
|
||||
|
||||
// Close calls Flush and then closes the Writer.
|
||||
func (w *Writer) Close() error {
|
||||
w.Flush()
|
||||
ret := w.err
|
||||
if w.err == nil {
|
||||
w.err = errClosed
|
||||
}
|
||||
return ret
|
||||
}
|
|
@ -0,0 +1,730 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
|
||||
// Go toolchain regression. See https://github.com/golang/go/issues/15426 and
|
||||
// https://github.com/golang/snappy/issues/29
|
||||
//
|
||||
// As a workaround, the package was built with a known good assembler, and
|
||||
// those instructions were disassembled by "objdump -d" to yield the
|
||||
// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
|
||||
// style comments, in AT&T asm syntax. Note that rsp here is a physical
|
||||
// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
|
||||
// The instructions were then encoded as "BYTE $0x.." sequences, which assemble
|
||||
// fine on Go 1.6.
|
||||
|
||||
// The asm code generally follows the pure Go code in encode_other.go, except
|
||||
// where marked with a "!!!".
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// func emitLiteral(dst, lit []byte) int
|
||||
//
|
||||
// All local variables fit into registers. The register allocation:
|
||||
// - AX len(lit)
|
||||
// - BX n
|
||||
// - DX return value
|
||||
// - DI &dst[i]
|
||||
// - R10 &lit[0]
|
||||
//
|
||||
// The 24 bytes of stack space is to call runtime·memmove.
|
||||
//
|
||||
// The unusual register allocation of local variables, such as R10 for the
|
||||
// source pointer, matches the allocation used at the call site in encodeBlock,
|
||||
// which makes it easier to manually inline this function.
|
||||
TEXT ·emitLiteral(SB), NOSPLIT, $24-56
|
||||
MOVQ dst_base+0(FP), DI
|
||||
MOVQ lit_base+24(FP), R10
|
||||
MOVQ lit_len+32(FP), AX
|
||||
MOVQ AX, DX
|
||||
MOVL AX, BX
|
||||
SUBL $1, BX
|
||||
|
||||
CMPL BX, $60
|
||||
JLT oneByte
|
||||
CMPL BX, $256
|
||||
JLT twoBytes
|
||||
|
||||
threeBytes:
|
||||
MOVB $0xf4, 0(DI)
|
||||
MOVW BX, 1(DI)
|
||||
ADDQ $3, DI
|
||||
ADDQ $3, DX
|
||||
JMP memmove
|
||||
|
||||
twoBytes:
|
||||
MOVB $0xf0, 0(DI)
|
||||
MOVB BX, 1(DI)
|
||||
ADDQ $2, DI
|
||||
ADDQ $2, DX
|
||||
JMP memmove
|
||||
|
||||
oneByte:
|
||||
SHLB $2, BX
|
||||
MOVB BX, 0(DI)
|
||||
ADDQ $1, DI
|
||||
ADDQ $1, DX
|
||||
|
||||
memmove:
|
||||
MOVQ DX, ret+48(FP)
|
||||
|
||||
// copy(dst[i:], lit)
|
||||
//
|
||||
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
|
||||
// DI, R10 and AX as arguments.
|
||||
MOVQ DI, 0(SP)
|
||||
MOVQ R10, 8(SP)
|
||||
MOVQ AX, 16(SP)
|
||||
CALL runtime·memmove(SB)
|
||||
RET
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// func emitCopy(dst []byte, offset, length int) int
|
||||
//
|
||||
// All local variables fit into registers. The register allocation:
|
||||
// - AX length
|
||||
// - SI &dst[0]
|
||||
// - DI &dst[i]
|
||||
// - R11 offset
|
||||
//
|
||||
// The unusual register allocation of local variables, such as R11 for the
|
||||
// offset, matches the allocation used at the call site in encodeBlock, which
|
||||
// makes it easier to manually inline this function.
|
||||
TEXT ·emitCopy(SB), NOSPLIT, $0-48
|
||||
MOVQ dst_base+0(FP), DI
|
||||
MOVQ DI, SI
|
||||
MOVQ offset+24(FP), R11
|
||||
MOVQ length+32(FP), AX
|
||||
|
||||
loop0:
|
||||
// for length >= 68 { etc }
|
||||
CMPL AX, $68
|
||||
JLT step1
|
||||
|
||||
// Emit a length 64 copy, encoded as 3 bytes.
|
||||
MOVB $0xfe, 0(DI)
|
||||
MOVW R11, 1(DI)
|
||||
ADDQ $3, DI
|
||||
SUBL $64, AX
|
||||
JMP loop0
|
||||
|
||||
step1:
|
||||
// if length > 64 { etc }
|
||||
CMPL AX, $64
|
||||
JLE step2
|
||||
|
||||
// Emit a length 60 copy, encoded as 3 bytes.
|
||||
MOVB $0xee, 0(DI)
|
||||
MOVW R11, 1(DI)
|
||||
ADDQ $3, DI
|
||||
SUBL $60, AX
|
||||
|
||||
step2:
|
||||
// if length >= 12 || offset >= 2048 { goto step3 }
|
||||
CMPL AX, $12
|
||||
JGE step3
|
||||
CMPL R11, $2048
|
||||
JGE step3
|
||||
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
MOVB R11, 1(DI)
|
||||
SHRL $8, R11
|
||||
SHLB $5, R11
|
||||
SUBB $4, AX
|
||||
SHLB $2, AX
|
||||
ORB AX, R11
|
||||
ORB $1, R11
|
||||
MOVB R11, 0(DI)
|
||||
ADDQ $2, DI
|
||||
|
||||
// Return the number of bytes written.
|
||||
SUBQ SI, DI
|
||||
MOVQ DI, ret+40(FP)
|
||||
RET
|
||||
|
||||
step3:
|
||||
// Emit the remaining copy, encoded as 3 bytes.
|
||||
SUBL $1, AX
|
||||
SHLB $2, AX
|
||||
ORB $2, AX
|
||||
MOVB AX, 0(DI)
|
||||
MOVW R11, 1(DI)
|
||||
ADDQ $3, DI
|
||||
|
||||
// Return the number of bytes written.
|
||||
SUBQ SI, DI
|
||||
MOVQ DI, ret+40(FP)
|
||||
RET
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// func extendMatch(src []byte, i, j int) int
|
||||
//
|
||||
// All local variables fit into registers. The register allocation:
|
||||
// - DX &src[0]
|
||||
// - SI &src[j]
|
||||
// - R13 &src[len(src) - 8]
|
||||
// - R14 &src[len(src)]
|
||||
// - R15 &src[i]
|
||||
//
|
||||
// The unusual register allocation of local variables, such as R15 for a source
|
||||
// pointer, matches the allocation used at the call site in encodeBlock, which
|
||||
// makes it easier to manually inline this function.
|
||||
TEXT ·extendMatch(SB), NOSPLIT, $0-48
|
||||
MOVQ src_base+0(FP), DX
|
||||
MOVQ src_len+8(FP), R14
|
||||
MOVQ i+24(FP), R15
|
||||
MOVQ j+32(FP), SI
|
||||
ADDQ DX, R14
|
||||
ADDQ DX, R15
|
||||
ADDQ DX, SI
|
||||
MOVQ R14, R13
|
||||
SUBQ $8, R13
|
||||
|
||||
cmp8:
|
||||
// As long as we are 8 or more bytes before the end of src, we can load and
|
||||
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
|
||||
CMPQ SI, R13
|
||||
JA cmp1
|
||||
MOVQ (R15), AX
|
||||
MOVQ (SI), BX
|
||||
CMPQ AX, BX
|
||||
JNE bsf
|
||||
ADDQ $8, R15
|
||||
ADDQ $8, SI
|
||||
JMP cmp8
|
||||
|
||||
bsf:
|
||||
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
|
||||
// the index of the first byte that differs. The BSF instruction finds the
|
||||
// least significant 1 bit, the amd64 architecture is little-endian, and
|
||||
// the shift by 3 converts a bit index to a byte index.
|
||||
XORQ AX, BX
|
||||
BSFQ BX, BX
|
||||
SHRQ $3, BX
|
||||
ADDQ BX, SI
|
||||
|
||||
// Convert from &src[ret] to ret.
|
||||
SUBQ DX, SI
|
||||
MOVQ SI, ret+40(FP)
|
||||
RET
|
||||
|
||||
cmp1:
|
||||
// In src's tail, compare 1 byte at a time.
|
||||
CMPQ SI, R14
|
||||
JAE extendMatchEnd
|
||||
MOVB (R15), AX
|
||||
MOVB (SI), BX
|
||||
CMPB AX, BX
|
||||
JNE extendMatchEnd
|
||||
ADDQ $1, R15
|
||||
ADDQ $1, SI
|
||||
JMP cmp1
|
||||
|
||||
extendMatchEnd:
|
||||
// Convert from &src[ret] to ret.
|
||||
SUBQ DX, SI
|
||||
MOVQ SI, ret+40(FP)
|
||||
RET
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// func encodeBlock(dst, src []byte) (d int)
|
||||
//
|
||||
// All local variables fit into registers, other than "var table". The register
|
||||
// allocation:
|
||||
// - AX . .
|
||||
// - BX . .
|
||||
// - CX 56 shift (note that amd64 shifts by non-immediates must use CX).
|
||||
// - DX 64 &src[0], tableSize
|
||||
// - SI 72 &src[s]
|
||||
// - DI 80 &dst[d]
|
||||
// - R9 88 sLimit
|
||||
// - R10 . &src[nextEmit]
|
||||
// - R11 96 prevHash, currHash, nextHash, offset
|
||||
// - R12 104 &src[base], skip
|
||||
// - R13 . &src[nextS], &src[len(src) - 8]
|
||||
// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x
|
||||
// - R15 112 candidate
|
||||
//
|
||||
// The second column (56, 64, etc) is the stack offset to spill the registers
|
||||
// when calling other functions. We could pack this slightly tighter, but it's
|
||||
// simpler to have a dedicated spill map independent of the function called.
|
||||
//
|
||||
// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
|
||||
// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
|
||||
// local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
|
||||
TEXT ·encodeBlock(SB), 0, $32888-56
|
||||
MOVQ dst_base+0(FP), DI
|
||||
MOVQ src_base+24(FP), SI
|
||||
MOVQ src_len+32(FP), R14
|
||||
|
||||
// shift, tableSize := uint32(32-8), 1<<8
|
||||
MOVQ $24, CX
|
||||
MOVQ $256, DX
|
||||
|
||||
calcShift:
|
||||
// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
|
||||
// shift--
|
||||
// }
|
||||
CMPQ DX, $16384
|
||||
JGE varTable
|
||||
CMPQ DX, R14
|
||||
JGE varTable
|
||||
SUBQ $1, CX
|
||||
SHLQ $1, DX
|
||||
JMP calcShift
|
||||
|
||||
varTable:
|
||||
// var table [maxTableSize]uint16
|
||||
//
|
||||
// In the asm code, unlike the Go code, we can zero-initialize only the
|
||||
// first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
|
||||
// writes 16 bytes, so we can do only tableSize/8 writes instead of the
|
||||
// 2048 writes that would zero-initialize all of table's 32768 bytes.
|
||||
SHRQ $3, DX
|
||||
LEAQ table-32768(SP), BX
|
||||
PXOR X0, X0
|
||||
|
||||
memclr:
|
||||
MOVOU X0, 0(BX)
|
||||
ADDQ $16, BX
|
||||
SUBQ $1, DX
|
||||
JNZ memclr
|
||||
|
||||
// !!! DX = &src[0]
|
||||
MOVQ SI, DX
|
||||
|
||||
// sLimit := len(src) - inputMargin
|
||||
MOVQ R14, R9
|
||||
SUBQ $15, R9
|
||||
|
||||
// !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
|
||||
// change for the rest of the function.
|
||||
MOVQ CX, 56(SP)
|
||||
MOVQ DX, 64(SP)
|
||||
MOVQ R9, 88(SP)
|
||||
|
||||
// nextEmit := 0
|
||||
MOVQ DX, R10
|
||||
|
||||
// s := 1
|
||||
ADDQ $1, SI
|
||||
|
||||
// nextHash := hash(load32(src, s), shift)
|
||||
MOVL 0(SI), R11
|
||||
IMULL $0x1e35a7bd, R11
|
||||
SHRL CX, R11
|
||||
|
||||
outer:
|
||||
// for { etc }
|
||||
|
||||
// skip := 32
|
||||
MOVQ $32, R12
|
||||
|
||||
// nextS := s
|
||||
MOVQ SI, R13
|
||||
|
||||
// candidate := 0
|
||||
MOVQ $0, R15
|
||||
|
||||
inner0:
|
||||
// for { etc }
|
||||
|
||||
// s := nextS
|
||||
MOVQ R13, SI
|
||||
|
||||
// bytesBetweenHashLookups := skip >> 5
|
||||
MOVQ R12, R14
|
||||
SHRQ $5, R14
|
||||
|
||||
// nextS = s + bytesBetweenHashLookups
|
||||
ADDQ R14, R13
|
||||
|
||||
// skip += bytesBetweenHashLookups
|
||||
ADDQ R14, R12
|
||||
|
||||
// if nextS > sLimit { goto emitRemainder }
|
||||
MOVQ R13, AX
|
||||
SUBQ DX, AX
|
||||
CMPQ AX, R9
|
||||
JA emitRemainder
|
||||
|
||||
// candidate = int(table[nextHash])
|
||||
// XXX: MOVWQZX table-32768(SP)(R11*2), R15
|
||||
// XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
|
||||
BYTE $0x4e
|
||||
BYTE $0x0f
|
||||
BYTE $0xb7
|
||||
BYTE $0x7c
|
||||
BYTE $0x5c
|
||||
BYTE $0x78
|
||||
|
||||
// table[nextHash] = uint16(s)
|
||||
MOVQ SI, AX
|
||||
SUBQ DX, AX
|
||||
|
||||
// XXX: MOVW AX, table-32768(SP)(R11*2)
|
||||
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
|
||||
BYTE $0x66
|
||||
BYTE $0x42
|
||||
BYTE $0x89
|
||||
BYTE $0x44
|
||||
BYTE $0x5c
|
||||
BYTE $0x78
|
||||
|
||||
// nextHash = hash(load32(src, nextS), shift)
|
||||
MOVL 0(R13), R11
|
||||
IMULL $0x1e35a7bd, R11
|
||||
SHRL CX, R11
|
||||
|
||||
// if load32(src, s) != load32(src, candidate) { continue } break
|
||||
MOVL 0(SI), AX
|
||||
MOVL (DX)(R15*1), BX
|
||||
CMPL AX, BX
|
||||
JNE inner0
|
||||
|
||||
fourByteMatch:
|
||||
// As per the encode_other.go code:
|
||||
//
|
||||
// A 4-byte match has been found. We'll later see etc.
|
||||
|
||||
// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
|
||||
// on inputMargin in encode.go.
|
||||
MOVQ SI, AX
|
||||
SUBQ R10, AX
|
||||
CMPQ AX, $16
|
||||
JLE emitLiteralFastPath
|
||||
|
||||
// ----------------------------------------
|
||||
// Begin inline of the emitLiteral call.
|
||||
//
|
||||
// d += emitLiteral(dst[d:], src[nextEmit:s])
|
||||
|
||||
MOVL AX, BX
|
||||
SUBL $1, BX
|
||||
|
||||
CMPL BX, $60
|
||||
JLT inlineEmitLiteralOneByte
|
||||
CMPL BX, $256
|
||||
JLT inlineEmitLiteralTwoBytes
|
||||
|
||||
inlineEmitLiteralThreeBytes:
|
||||
MOVB $0xf4, 0(DI)
|
||||
MOVW BX, 1(DI)
|
||||
ADDQ $3, DI
|
||||
JMP inlineEmitLiteralMemmove
|
||||
|
||||
inlineEmitLiteralTwoBytes:
|
||||
MOVB $0xf0, 0(DI)
|
||||
MOVB BX, 1(DI)
|
||||
ADDQ $2, DI
|
||||
JMP inlineEmitLiteralMemmove
|
||||
|
||||
inlineEmitLiteralOneByte:
|
||||
SHLB $2, BX
|
||||
MOVB BX, 0(DI)
|
||||
ADDQ $1, DI
|
||||
|
||||
inlineEmitLiteralMemmove:
|
||||
// Spill local variables (registers) onto the stack; call; unspill.
|
||||
//
|
||||
// copy(dst[i:], lit)
|
||||
//
|
||||
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
|
||||
// DI, R10 and AX as arguments.
|
||||
MOVQ DI, 0(SP)
|
||||
MOVQ R10, 8(SP)
|
||||
MOVQ AX, 16(SP)
|
||||
ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)".
|
||||
MOVQ SI, 72(SP)
|
||||
MOVQ DI, 80(SP)
|
||||
MOVQ R15, 112(SP)
|
||||
CALL runtime·memmove(SB)
|
||||
MOVQ 56(SP), CX
|
||||
MOVQ 64(SP), DX
|
||||
MOVQ 72(SP), SI
|
||||
MOVQ 80(SP), DI
|
||||
MOVQ 88(SP), R9
|
||||
MOVQ 112(SP), R15
|
||||
JMP inner1
|
||||
|
||||
inlineEmitLiteralEnd:
|
||||
// End inline of the emitLiteral call.
|
||||
// ----------------------------------------
|
||||
|
||||
emitLiteralFastPath:
|
||||
// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
|
||||
MOVB AX, BX
|
||||
SUBB $1, BX
|
||||
SHLB $2, BX
|
||||
MOVB BX, (DI)
|
||||
ADDQ $1, DI
|
||||
|
||||
// !!! Implement the copy from lit to dst as a 16-byte load and store.
|
||||
// (Encode's documentation says that dst and src must not overlap.)
|
||||
//
|
||||
// This always copies 16 bytes, instead of only len(lit) bytes, but that's
|
||||
// OK. Subsequent iterations will fix up the overrun.
|
||||
//
|
||||
// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
|
||||
// 16-byte loads and stores. This technique probably wouldn't be as
|
||||
// effective on architectures that are fussier about alignment.
|
||||
MOVOU 0(R10), X0
|
||||
MOVOU X0, 0(DI)
|
||||
ADDQ AX, DI
|
||||
|
||||
inner1:
|
||||
// for { etc }
|
||||
|
||||
// base := s
|
||||
MOVQ SI, R12
|
||||
|
||||
// !!! offset := base - candidate
|
||||
MOVQ R12, R11
|
||||
SUBQ R15, R11
|
||||
SUBQ DX, R11
|
||||
|
||||
// ----------------------------------------
|
||||
// Begin inline of the extendMatch call.
|
||||
//
|
||||
// s = extendMatch(src, candidate+4, s+4)
|
||||
|
||||
// !!! R14 = &src[len(src)]
|
||||
MOVQ src_len+32(FP), R14
|
||||
ADDQ DX, R14
|
||||
|
||||
// !!! R13 = &src[len(src) - 8]
|
||||
MOVQ R14, R13
|
||||
SUBQ $8, R13
|
||||
|
||||
// !!! R15 = &src[candidate + 4]
|
||||
ADDQ $4, R15
|
||||
ADDQ DX, R15
|
||||
|
||||
// !!! s += 4
|
||||
ADDQ $4, SI
|
||||
|
||||
inlineExtendMatchCmp8:
|
||||
// As long as we are 8 or more bytes before the end of src, we can load and
|
||||
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
|
||||
CMPQ SI, R13
|
||||
JA inlineExtendMatchCmp1
|
||||
MOVQ (R15), AX
|
||||
MOVQ (SI), BX
|
||||
CMPQ AX, BX
|
||||
JNE inlineExtendMatchBSF
|
||||
ADDQ $8, R15
|
||||
ADDQ $8, SI
|
||||
JMP inlineExtendMatchCmp8
|
||||
|
||||
inlineExtendMatchBSF:
|
||||
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
|
||||
// the index of the first byte that differs. The BSF instruction finds the
|
||||
// least significant 1 bit, the amd64 architecture is little-endian, and
|
||||
// the shift by 3 converts a bit index to a byte index.
|
||||
XORQ AX, BX
|
||||
BSFQ BX, BX
|
||||
SHRQ $3, BX
|
||||
ADDQ BX, SI
|
||||
JMP inlineExtendMatchEnd
|
||||
|
||||
inlineExtendMatchCmp1:
|
||||
// In src's tail, compare 1 byte at a time.
|
||||
CMPQ SI, R14
|
||||
JAE inlineExtendMatchEnd
|
||||
MOVB (R15), AX
|
||||
MOVB (SI), BX
|
||||
CMPB AX, BX
|
||||
JNE inlineExtendMatchEnd
|
||||
ADDQ $1, R15
|
||||
ADDQ $1, SI
|
||||
JMP inlineExtendMatchCmp1
|
||||
|
||||
inlineExtendMatchEnd:
|
||||
// End inline of the extendMatch call.
|
||||
// ----------------------------------------
|
||||
|
||||
// ----------------------------------------
|
||||
// Begin inline of the emitCopy call.
|
||||
//
|
||||
// d += emitCopy(dst[d:], base-candidate, s-base)
|
||||
|
||||
// !!! length := s - base
|
||||
MOVQ SI, AX
|
||||
SUBQ R12, AX
|
||||
|
||||
inlineEmitCopyLoop0:
|
||||
// for length >= 68 { etc }
|
||||
CMPL AX, $68
|
||||
JLT inlineEmitCopyStep1
|
||||
|
||||
// Emit a length 64 copy, encoded as 3 bytes.
|
||||
MOVB $0xfe, 0(DI)
|
||||
MOVW R11, 1(DI)
|
||||
ADDQ $3, DI
|
||||
SUBL $64, AX
|
||||
JMP inlineEmitCopyLoop0
|
||||
|
||||
inlineEmitCopyStep1:
|
||||
// if length > 64 { etc }
|
||||
CMPL AX, $64
|
||||
JLE inlineEmitCopyStep2
|
||||
|
||||
// Emit a length 60 copy, encoded as 3 bytes.
|
||||
MOVB $0xee, 0(DI)
|
||||
MOVW R11, 1(DI)
|
||||
ADDQ $3, DI
|
||||
SUBL $60, AX
|
||||
|
||||
inlineEmitCopyStep2:
|
||||
// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
|
||||
CMPL AX, $12
|
||||
JGE inlineEmitCopyStep3
|
||||
CMPL R11, $2048
|
||||
JGE inlineEmitCopyStep3
|
||||
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
MOVB R11, 1(DI)
|
||||
SHRL $8, R11
|
||||
SHLB $5, R11
|
||||
SUBB $4, AX
|
||||
SHLB $2, AX
|
||||
ORB AX, R11
|
||||
ORB $1, R11
|
||||
MOVB R11, 0(DI)
|
||||
ADDQ $2, DI
|
||||
JMP inlineEmitCopyEnd
|
||||
|
||||
inlineEmitCopyStep3:
|
||||
// Emit the remaining copy, encoded as 3 bytes.
|
||||
SUBL $1, AX
|
||||
SHLB $2, AX
|
||||
ORB $2, AX
|
||||
MOVB AX, 0(DI)
|
||||
MOVW R11, 1(DI)
|
||||
ADDQ $3, DI
|
||||
|
||||
inlineEmitCopyEnd:
|
||||
// End inline of the emitCopy call.
|
||||
// ----------------------------------------
|
||||
|
||||
// nextEmit = s
|
||||
MOVQ SI, R10
|
||||
|
||||
// if s >= sLimit { goto emitRemainder }
|
||||
MOVQ SI, AX
|
||||
SUBQ DX, AX
|
||||
CMPQ AX, R9
|
||||
JAE emitRemainder
|
||||
|
||||
// As per the encode_other.go code:
|
||||
//
|
||||
// We could immediately etc.
|
||||
|
||||
// x := load64(src, s-1)
|
||||
MOVQ -1(SI), R14
|
||||
|
||||
// prevHash := hash(uint32(x>>0), shift)
|
||||
MOVL R14, R11
|
||||
IMULL $0x1e35a7bd, R11
|
||||
SHRL CX, R11
|
||||
|
||||
// table[prevHash] = uint16(s-1)
|
||||
MOVQ SI, AX
|
||||
SUBQ DX, AX
|
||||
SUBQ $1, AX
|
||||
|
||||
// XXX: MOVW AX, table-32768(SP)(R11*2)
|
||||
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
|
||||
BYTE $0x66
|
||||
BYTE $0x42
|
||||
BYTE $0x89
|
||||
BYTE $0x44
|
||||
BYTE $0x5c
|
||||
BYTE $0x78
|
||||
|
||||
// currHash := hash(uint32(x>>8), shift)
|
||||
SHRQ $8, R14
|
||||
MOVL R14, R11
|
||||
IMULL $0x1e35a7bd, R11
|
||||
SHRL CX, R11
|
||||
|
||||
// candidate = int(table[currHash])
|
||||
// XXX: MOVWQZX table-32768(SP)(R11*2), R15
|
||||
// XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
|
||||
BYTE $0x4e
|
||||
BYTE $0x0f
|
||||
BYTE $0xb7
|
||||
BYTE $0x7c
|
||||
BYTE $0x5c
|
||||
BYTE $0x78
|
||||
|
||||
// table[currHash] = uint16(s)
|
||||
ADDQ $1, AX
|
||||
|
||||
// XXX: MOVW AX, table-32768(SP)(R11*2)
|
||||
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
|
||||
BYTE $0x66
|
||||
BYTE $0x42
|
||||
BYTE $0x89
|
||||
BYTE $0x44
|
||||
BYTE $0x5c
|
||||
BYTE $0x78
|
||||
|
||||
// if uint32(x>>8) == load32(src, candidate) { continue }
|
||||
MOVL (DX)(R15*1), BX
|
||||
CMPL R14, BX
|
||||
JEQ inner1
|
||||
|
||||
// nextHash = hash(uint32(x>>16), shift)
|
||||
SHRQ $8, R14
|
||||
MOVL R14, R11
|
||||
IMULL $0x1e35a7bd, R11
|
||||
SHRL CX, R11
|
||||
|
||||
// s++
|
||||
ADDQ $1, SI
|
||||
|
||||
// break out of the inner1 for loop, i.e. continue the outer loop.
|
||||
JMP outer
|
||||
|
||||
emitRemainder:
|
||||
// if nextEmit < len(src) { etc }
|
||||
MOVQ src_len+32(FP), AX
|
||||
ADDQ DX, AX
|
||||
CMPQ R10, AX
|
||||
JEQ encodeBlockEnd
|
||||
|
||||
// d += emitLiteral(dst[d:], src[nextEmit:])
|
||||
//
|
||||
// Push args.
|
||||
MOVQ DI, 0(SP)
|
||||
MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative.
|
||||
MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative.
|
||||
MOVQ R10, 24(SP)
|
||||
SUBQ R10, AX
|
||||
MOVQ AX, 32(SP)
|
||||
MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative.
|
||||
|
||||
// Spill local variables (registers) onto the stack; call; unspill.
|
||||
MOVQ DI, 80(SP)
|
||||
CALL ·emitLiteral(SB)
|
||||
MOVQ 80(SP), DI
|
||||
|
||||
// Finish the "d +=" part of "d += emitLiteral(etc)".
|
||||
ADDQ 48(SP), DI
|
||||
|
||||
encodeBlockEnd:
|
||||
MOVQ dst_base+0(FP), AX
|
||||
SUBQ AX, DI
|
||||
MOVQ DI, d+48(FP)
|
||||
RET
|
|
@ -0,0 +1,722 @@
|
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// The asm code generally follows the pure Go code in encode_other.go, except
|
||||
// where marked with a "!!!".
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// func emitLiteral(dst, lit []byte) int
|
||||
//
|
||||
// All local variables fit into registers. The register allocation:
|
||||
// - R3 len(lit)
|
||||
// - R4 n
|
||||
// - R6 return value
|
||||
// - R8 &dst[i]
|
||||
// - R10 &lit[0]
|
||||
//
|
||||
// The 32 bytes of stack space is to call runtime·memmove.
|
||||
//
|
||||
// The unusual register allocation of local variables, such as R10 for the
|
||||
// source pointer, matches the allocation used at the call site in encodeBlock,
|
||||
// which makes it easier to manually inline this function.
|
||||
TEXT ·emitLiteral(SB), NOSPLIT, $32-56
|
||||
MOVD dst_base+0(FP), R8
|
||||
MOVD lit_base+24(FP), R10
|
||||
MOVD lit_len+32(FP), R3
|
||||
MOVD R3, R6
|
||||
MOVW R3, R4
|
||||
SUBW $1, R4, R4
|
||||
|
||||
CMPW $60, R4
|
||||
BLT oneByte
|
||||
CMPW $256, R4
|
||||
BLT twoBytes
|
||||
|
||||
threeBytes:
|
||||
MOVD $0xf4, R2
|
||||
MOVB R2, 0(R8)
|
||||
MOVW R4, 1(R8)
|
||||
ADD $3, R8, R8
|
||||
ADD $3, R6, R6
|
||||
B memmove
|
||||
|
||||
twoBytes:
|
||||
MOVD $0xf0, R2
|
||||
MOVB R2, 0(R8)
|
||||
MOVB R4, 1(R8)
|
||||
ADD $2, R8, R8
|
||||
ADD $2, R6, R6
|
||||
B memmove
|
||||
|
||||
oneByte:
|
||||
LSLW $2, R4, R4
|
||||
MOVB R4, 0(R8)
|
||||
ADD $1, R8, R8
|
||||
ADD $1, R6, R6
|
||||
|
||||
memmove:
|
||||
MOVD R6, ret+48(FP)
|
||||
|
||||
// copy(dst[i:], lit)
|
||||
//
|
||||
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
|
||||
// R8, R10 and R3 as arguments.
|
||||
MOVD R8, 8(RSP)
|
||||
MOVD R10, 16(RSP)
|
||||
MOVD R3, 24(RSP)
|
||||
CALL runtime·memmove(SB)
|
||||
RET
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// func emitCopy(dst []byte, offset, length int) int
|
||||
//
|
||||
// All local variables fit into registers. The register allocation:
|
||||
// - R3 length
|
||||
// - R7 &dst[0]
|
||||
// - R8 &dst[i]
|
||||
// - R11 offset
|
||||
//
|
||||
// The unusual register allocation of local variables, such as R11 for the
|
||||
// offset, matches the allocation used at the call site in encodeBlock, which
|
||||
// makes it easier to manually inline this function.
|
||||
TEXT ·emitCopy(SB), NOSPLIT, $0-48
|
||||
MOVD dst_base+0(FP), R8
|
||||
MOVD R8, R7
|
||||
MOVD offset+24(FP), R11
|
||||
MOVD length+32(FP), R3
|
||||
|
||||
loop0:
|
||||
// for length >= 68 { etc }
|
||||
CMPW $68, R3
|
||||
BLT step1
|
||||
|
||||
// Emit a length 64 copy, encoded as 3 bytes.
|
||||
MOVD $0xfe, R2
|
||||
MOVB R2, 0(R8)
|
||||
MOVW R11, 1(R8)
|
||||
ADD $3, R8, R8
|
||||
SUB $64, R3, R3
|
||||
B loop0
|
||||
|
||||
step1:
|
||||
// if length > 64 { etc }
|
||||
CMP $64, R3
|
||||
BLE step2
|
||||
|
||||
// Emit a length 60 copy, encoded as 3 bytes.
|
||||
MOVD $0xee, R2
|
||||
MOVB R2, 0(R8)
|
||||
MOVW R11, 1(R8)
|
||||
ADD $3, R8, R8
|
||||
SUB $60, R3, R3
|
||||
|
||||
step2:
|
||||
// if length >= 12 || offset >= 2048 { goto step3 }
|
||||
CMP $12, R3
|
||||
BGE step3
|
||||
CMPW $2048, R11
|
||||
BGE step3
|
||||
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
MOVB R11, 1(R8)
|
||||
LSRW $3, R11, R11
|
||||
AND $0xe0, R11, R11
|
||||
SUB $4, R3, R3
|
||||
LSLW $2, R3
|
||||
AND $0xff, R3, R3
|
||||
ORRW R3, R11, R11
|
||||
ORRW $1, R11, R11
|
||||
MOVB R11, 0(R8)
|
||||
ADD $2, R8, R8
|
||||
|
||||
// Return the number of bytes written.
|
||||
SUB R7, R8, R8
|
||||
MOVD R8, ret+40(FP)
|
||||
RET
|
||||
|
||||
step3:
|
||||
// Emit the remaining copy, encoded as 3 bytes.
|
||||
SUB $1, R3, R3
|
||||
AND $0xff, R3, R3
|
||||
LSLW $2, R3, R3
|
||||
ORRW $2, R3, R3
|
||||
MOVB R3, 0(R8)
|
||||
MOVW R11, 1(R8)
|
||||
ADD $3, R8, R8
|
||||
|
||||
// Return the number of bytes written.
|
||||
SUB R7, R8, R8
|
||||
MOVD R8, ret+40(FP)
|
||||
RET
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// func extendMatch(src []byte, i, j int) int
|
||||
//
|
||||
// All local variables fit into registers. The register allocation:
|
||||
// - R6 &src[0]
|
||||
// - R7 &src[j]
|
||||
// - R13 &src[len(src) - 8]
|
||||
// - R14 &src[len(src)]
|
||||
// - R15 &src[i]
|
||||
//
|
||||
// The unusual register allocation of local variables, such as R15 for a source
|
||||
// pointer, matches the allocation used at the call site in encodeBlock, which
|
||||
// makes it easier to manually inline this function.
|
||||
TEXT ·extendMatch(SB), NOSPLIT, $0-48
|
||||
MOVD src_base+0(FP), R6
|
||||
MOVD src_len+8(FP), R14
|
||||
MOVD i+24(FP), R15
|
||||
MOVD j+32(FP), R7
|
||||
ADD R6, R14, R14
|
||||
ADD R6, R15, R15
|
||||
ADD R6, R7, R7
|
||||
MOVD R14, R13
|
||||
SUB $8, R13, R13
|
||||
|
||||
cmp8:
|
||||
// As long as we are 8 or more bytes before the end of src, we can load and
|
||||
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
|
||||
CMP R13, R7
|
||||
BHI cmp1
|
||||
MOVD (R15), R3
|
||||
MOVD (R7), R4
|
||||
CMP R4, R3
|
||||
BNE bsf
|
||||
ADD $8, R15, R15
|
||||
ADD $8, R7, R7
|
||||
B cmp8
|
||||
|
||||
bsf:
|
||||
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
|
||||
// the index of the first byte that differs.
|
||||
// RBIT reverses the bit order, then CLZ counts the leading zeros, the
|
||||
// combination of which finds the least significant bit which is set.
|
||||
// The arm64 architecture is little-endian, and the shift by 3 converts
|
||||
// a bit index to a byte index.
|
||||
EOR R3, R4, R4
|
||||
RBIT R4, R4
|
||||
CLZ R4, R4
|
||||
ADD R4>>3, R7, R7
|
||||
|
||||
// Convert from &src[ret] to ret.
|
||||
SUB R6, R7, R7
|
||||
MOVD R7, ret+40(FP)
|
||||
RET
|
||||
|
||||
cmp1:
|
||||
// In src's tail, compare 1 byte at a time.
|
||||
CMP R7, R14
|
||||
BLS extendMatchEnd
|
||||
MOVB (R15), R3
|
||||
MOVB (R7), R4
|
||||
CMP R4, R3
|
||||
BNE extendMatchEnd
|
||||
ADD $1, R15, R15
|
||||
ADD $1, R7, R7
|
||||
B cmp1
|
||||
|
||||
extendMatchEnd:
|
||||
// Convert from &src[ret] to ret.
|
||||
SUB R6, R7, R7
|
||||
MOVD R7, ret+40(FP)
|
||||
RET
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// func encodeBlock(dst, src []byte) (d int)
|
||||
//
|
||||
// All local variables fit into registers, other than "var table". The register
|
||||
// allocation:
|
||||
// - R3 . .
|
||||
// - R4 . .
|
||||
// - R5 64 shift
|
||||
// - R6 72 &src[0], tableSize
|
||||
// - R7 80 &src[s]
|
||||
// - R8 88 &dst[d]
|
||||
// - R9 96 sLimit
|
||||
// - R10 . &src[nextEmit]
|
||||
// - R11 104 prevHash, currHash, nextHash, offset
|
||||
// - R12 112 &src[base], skip
|
||||
// - R13 . &src[nextS], &src[len(src) - 8]
|
||||
// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x
|
||||
// - R15 120 candidate
|
||||
// - R16 . hash constant, 0x1e35a7bd
|
||||
// - R17 . &table
|
||||
// - . 128 table
|
||||
//
|
||||
// The second column (64, 72, etc) is the stack offset to spill the registers
|
||||
// when calling other functions. We could pack this slightly tighter, but it's
|
||||
// simpler to have a dedicated spill map independent of the function called.
|
||||
//
|
||||
// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
|
||||
// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill
|
||||
// local variables (registers) during calls gives 32768 + 64 + 64 = 32896.
|
||||
TEXT ·encodeBlock(SB), 0, $32896-56
|
||||
MOVD dst_base+0(FP), R8
|
||||
MOVD src_base+24(FP), R7
|
||||
MOVD src_len+32(FP), R14
|
||||
|
||||
// shift, tableSize := uint32(32-8), 1<<8
|
||||
MOVD $24, R5
|
||||
MOVD $256, R6
|
||||
MOVW $0xa7bd, R16
|
||||
MOVKW $(0x1e35<<16), R16
|
||||
|
||||
calcShift:
|
||||
// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
|
||||
// shift--
|
||||
// }
|
||||
MOVD $16384, R2
|
||||
CMP R2, R6
|
||||
BGE varTable
|
||||
CMP R14, R6
|
||||
BGE varTable
|
||||
SUB $1, R5, R5
|
||||
LSL $1, R6, R6
|
||||
B calcShift
|
||||
|
||||
varTable:
|
||||
// var table [maxTableSize]uint16
|
||||
//
|
||||
// In the asm code, unlike the Go code, we can zero-initialize only the
|
||||
// first tableSize elements. Each uint16 element is 2 bytes and each
|
||||
// iterations writes 64 bytes, so we can do only tableSize/32 writes
|
||||
// instead of the 2048 writes that would zero-initialize all of table's
|
||||
// 32768 bytes. This clear could overrun the first tableSize elements, but
|
||||
// it won't overrun the allocated stack size.
|
||||
ADD $128, RSP, R17
|
||||
MOVD R17, R4
|
||||
|
||||
// !!! R6 = &src[tableSize]
|
||||
ADD R6<<1, R17, R6
|
||||
|
||||
memclr:
|
||||
STP.P (ZR, ZR), 64(R4)
|
||||
STP (ZR, ZR), -48(R4)
|
||||
STP (ZR, ZR), -32(R4)
|
||||
STP (ZR, ZR), -16(R4)
|
||||
CMP R4, R6
|
||||
BHI memclr
|
||||
|
||||
// !!! R6 = &src[0]
|
||||
MOVD R7, R6
|
||||
|
||||
// sLimit := len(src) - inputMargin
|
||||
MOVD R14, R9
|
||||
SUB $15, R9, R9
|
||||
|
||||
// !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't
|
||||
// change for the rest of the function.
|
||||
MOVD R5, 64(RSP)
|
||||
MOVD R6, 72(RSP)
|
||||
MOVD R9, 96(RSP)
|
||||
|
||||
// nextEmit := 0
|
||||
MOVD R6, R10
|
||||
|
||||
// s := 1
|
||||
ADD $1, R7, R7
|
||||
|
||||
// nextHash := hash(load32(src, s), shift)
|
||||
MOVW 0(R7), R11
|
||||
MULW R16, R11, R11
|
||||
LSRW R5, R11, R11
|
||||
|
||||
outer:
|
||||
// for { etc }
|
||||
|
||||
// skip := 32
|
||||
MOVD $32, R12
|
||||
|
||||
// nextS := s
|
||||
MOVD R7, R13
|
||||
|
||||
// candidate := 0
|
||||
MOVD $0, R15
|
||||
|
||||
inner0:
|
||||
// for { etc }
|
||||
|
||||
// s := nextS
|
||||
MOVD R13, R7
|
||||
|
||||
// bytesBetweenHashLookups := skip >> 5
|
||||
MOVD R12, R14
|
||||
LSR $5, R14, R14
|
||||
|
||||
// nextS = s + bytesBetweenHashLookups
|
||||
ADD R14, R13, R13
|
||||
|
||||
// skip += bytesBetweenHashLookups
|
||||
ADD R14, R12, R12
|
||||
|
||||
// if nextS > sLimit { goto emitRemainder }
|
||||
MOVD R13, R3
|
||||
SUB R6, R3, R3
|
||||
CMP R9, R3
|
||||
BHI emitRemainder
|
||||
|
||||
// candidate = int(table[nextHash])
|
||||
MOVHU 0(R17)(R11<<1), R15
|
||||
|
||||
// table[nextHash] = uint16(s)
|
||||
MOVD R7, R3
|
||||
SUB R6, R3, R3
|
||||
|
||||
MOVH R3, 0(R17)(R11<<1)
|
||||
|
||||
// nextHash = hash(load32(src, nextS), shift)
|
||||
MOVW 0(R13), R11
|
||||
MULW R16, R11
|
||||
LSRW R5, R11, R11
|
||||
|
||||
// if load32(src, s) != load32(src, candidate) { continue } break
|
||||
MOVW 0(R7), R3
|
||||
MOVW (R6)(R15), R4
|
||||
CMPW R4, R3
|
||||
BNE inner0
|
||||
|
||||
fourByteMatch:
|
||||
// As per the encode_other.go code:
|
||||
//
|
||||
// A 4-byte match has been found. We'll later see etc.
|
||||
|
||||
// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
|
||||
// on inputMargin in encode.go.
|
||||
MOVD R7, R3
|
||||
SUB R10, R3, R3
|
||||
CMP $16, R3
|
||||
BLE emitLiteralFastPath
|
||||
|
||||
// ----------------------------------------
|
||||
// Begin inline of the emitLiteral call.
|
||||
//
|
||||
// d += emitLiteral(dst[d:], src[nextEmit:s])
|
||||
|
||||
MOVW R3, R4
|
||||
SUBW $1, R4, R4
|
||||
|
||||
MOVW $60, R2
|
||||
CMPW R2, R4
|
||||
BLT inlineEmitLiteralOneByte
|
||||
MOVW $256, R2
|
||||
CMPW R2, R4
|
||||
BLT inlineEmitLiteralTwoBytes
|
||||
|
||||
inlineEmitLiteralThreeBytes:
|
||||
MOVD $0xf4, R1
|
||||
MOVB R1, 0(R8)
|
||||
MOVW R4, 1(R8)
|
||||
ADD $3, R8, R8
|
||||
B inlineEmitLiteralMemmove
|
||||
|
||||
inlineEmitLiteralTwoBytes:
|
||||
MOVD $0xf0, R1
|
||||
MOVB R1, 0(R8)
|
||||
MOVB R4, 1(R8)
|
||||
ADD $2, R8, R8
|
||||
B inlineEmitLiteralMemmove
|
||||
|
||||
inlineEmitLiteralOneByte:
|
||||
LSLW $2, R4, R4
|
||||
MOVB R4, 0(R8)
|
||||
ADD $1, R8, R8
|
||||
|
||||
inlineEmitLiteralMemmove:
|
||||
// Spill local variables (registers) onto the stack; call; unspill.
|
||||
//
|
||||
// copy(dst[i:], lit)
|
||||
//
|
||||
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
|
||||
// R8, R10 and R3 as arguments.
|
||||
MOVD R8, 8(RSP)
|
||||
MOVD R10, 16(RSP)
|
||||
MOVD R3, 24(RSP)
|
||||
|
||||
// Finish the "d +=" part of "d += emitLiteral(etc)".
|
||||
ADD R3, R8, R8
|
||||
MOVD R7, 80(RSP)
|
||||
MOVD R8, 88(RSP)
|
||||
MOVD R15, 120(RSP)
|
||||
CALL runtime·memmove(SB)
|
||||
MOVD 64(RSP), R5
|
||||
MOVD 72(RSP), R6
|
||||
MOVD 80(RSP), R7
|
||||
MOVD 88(RSP), R8
|
||||
MOVD 96(RSP), R9
|
||||
MOVD 120(RSP), R15
|
||||
ADD $128, RSP, R17
|
||||
MOVW $0xa7bd, R16
|
||||
MOVKW $(0x1e35<<16), R16
|
||||
B inner1
|
||||
|
||||
inlineEmitLiteralEnd:
|
||||
// End inline of the emitLiteral call.
|
||||
// ----------------------------------------
|
||||
|
||||
emitLiteralFastPath:
|
||||
// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
|
||||
MOVB R3, R4
|
||||
SUBW $1, R4, R4
|
||||
AND $0xff, R4, R4
|
||||
LSLW $2, R4, R4
|
||||
MOVB R4, (R8)
|
||||
ADD $1, R8, R8
|
||||
|
||||
// !!! Implement the copy from lit to dst as a 16-byte load and store.
|
||||
// (Encode's documentation says that dst and src must not overlap.)
|
||||
//
|
||||
// This always copies 16 bytes, instead of only len(lit) bytes, but that's
|
||||
// OK. Subsequent iterations will fix up the overrun.
|
||||
//
|
||||
// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
|
||||
// 16-byte loads and stores. This technique probably wouldn't be as
|
||||
// effective on architectures that are fussier about alignment.
|
||||
LDP 0(R10), (R0, R1)
|
||||
STP (R0, R1), 0(R8)
|
||||
ADD R3, R8, R8
|
||||
|
||||
inner1:
|
||||
// for { etc }
|
||||
|
||||
// base := s
|
||||
MOVD R7, R12
|
||||
|
||||
// !!! offset := base - candidate
|
||||
MOVD R12, R11
|
||||
SUB R15, R11, R11
|
||||
SUB R6, R11, R11
|
||||
|
||||
// ----------------------------------------
|
||||
// Begin inline of the extendMatch call.
|
||||
//
|
||||
// s = extendMatch(src, candidate+4, s+4)
|
||||
|
||||
// !!! R14 = &src[len(src)]
|
||||
MOVD src_len+32(FP), R14
|
||||
ADD R6, R14, R14
|
||||
|
||||
// !!! R13 = &src[len(src) - 8]
|
||||
MOVD R14, R13
|
||||
SUB $8, R13, R13
|
||||
|
||||
// !!! R15 = &src[candidate + 4]
|
||||
ADD $4, R15, R15
|
||||
ADD R6, R15, R15
|
||||
|
||||
// !!! s += 4
|
||||
ADD $4, R7, R7
|
||||
|
||||
inlineExtendMatchCmp8:
|
||||
// As long as we are 8 or more bytes before the end of src, we can load and
|
||||
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
|
||||
CMP R13, R7
|
||||
BHI inlineExtendMatchCmp1
|
||||
MOVD (R15), R3
|
||||
MOVD (R7), R4
|
||||
CMP R4, R3
|
||||
BNE inlineExtendMatchBSF
|
||||
ADD $8, R15, R15
|
||||
ADD $8, R7, R7
|
||||
B inlineExtendMatchCmp8
|
||||
|
||||
inlineExtendMatchBSF:
|
||||
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
|
||||
// the index of the first byte that differs.
|
||||
// RBIT reverses the bit order, then CLZ counts the leading zeros, the
|
||||
// combination of which finds the least significant bit which is set.
|
||||
// The arm64 architecture is little-endian, and the shift by 3 converts
|
||||
// a bit index to a byte index.
|
||||
EOR R3, R4, R4
|
||||
RBIT R4, R4
|
||||
CLZ R4, R4
|
||||
ADD R4>>3, R7, R7
|
||||
B inlineExtendMatchEnd
|
||||
|
||||
inlineExtendMatchCmp1:
|
||||
// In src's tail, compare 1 byte at a time.
|
||||
CMP R7, R14
|
||||
BLS inlineExtendMatchEnd
|
||||
MOVB (R15), R3
|
||||
MOVB (R7), R4
|
||||
CMP R4, R3
|
||||
BNE inlineExtendMatchEnd
|
||||
ADD $1, R15, R15
|
||||
ADD $1, R7, R7
|
||||
B inlineExtendMatchCmp1
|
||||
|
||||
inlineExtendMatchEnd:
|
||||
// End inline of the extendMatch call.
|
||||
// ----------------------------------------
|
||||
|
||||
// ----------------------------------------
|
||||
// Begin inline of the emitCopy call.
|
||||
//
|
||||
// d += emitCopy(dst[d:], base-candidate, s-base)
|
||||
|
||||
// !!! length := s - base
|
||||
MOVD R7, R3
|
||||
SUB R12, R3, R3
|
||||
|
||||
inlineEmitCopyLoop0:
|
||||
// for length >= 68 { etc }
|
||||
MOVW $68, R2
|
||||
CMPW R2, R3
|
||||
BLT inlineEmitCopyStep1
|
||||
|
||||
// Emit a length 64 copy, encoded as 3 bytes.
|
||||
MOVD $0xfe, R1
|
||||
MOVB R1, 0(R8)
|
||||
MOVW R11, 1(R8)
|
||||
ADD $3, R8, R8
|
||||
SUBW $64, R3, R3
|
||||
B inlineEmitCopyLoop0
|
||||
|
||||
inlineEmitCopyStep1:
|
||||
// if length > 64 { etc }
|
||||
MOVW $64, R2
|
||||
CMPW R2, R3
|
||||
BLE inlineEmitCopyStep2
|
||||
|
||||
// Emit a length 60 copy, encoded as 3 bytes.
|
||||
MOVD $0xee, R1
|
||||
MOVB R1, 0(R8)
|
||||
MOVW R11, 1(R8)
|
||||
ADD $3, R8, R8
|
||||
SUBW $60, R3, R3
|
||||
|
||||
inlineEmitCopyStep2:
|
||||
// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
|
||||
MOVW $12, R2
|
||||
CMPW R2, R3
|
||||
BGE inlineEmitCopyStep3
|
||||
MOVW $2048, R2
|
||||
CMPW R2, R11
|
||||
BGE inlineEmitCopyStep3
|
||||
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
MOVB R11, 1(R8)
|
||||
LSRW $8, R11, R11
|
||||
LSLW $5, R11, R11
|
||||
SUBW $4, R3, R3
|
||||
AND $0xff, R3, R3
|
||||
LSLW $2, R3, R3
|
||||
ORRW R3, R11, R11
|
||||
ORRW $1, R11, R11
|
||||
MOVB R11, 0(R8)
|
||||
ADD $2, R8, R8
|
||||
B inlineEmitCopyEnd
|
||||
|
||||
inlineEmitCopyStep3:
|
||||
// Emit the remaining copy, encoded as 3 bytes.
|
||||
SUBW $1, R3, R3
|
||||
LSLW $2, R3, R3
|
||||
ORRW $2, R3, R3
|
||||
MOVB R3, 0(R8)
|
||||
MOVW R11, 1(R8)
|
||||
ADD $3, R8, R8
|
||||
|
||||
inlineEmitCopyEnd:
|
||||
// End inline of the emitCopy call.
|
||||
// ----------------------------------------
|
||||
|
||||
// nextEmit = s
|
||||
MOVD R7, R10
|
||||
|
||||
// if s >= sLimit { goto emitRemainder }
|
||||
MOVD R7, R3
|
||||
SUB R6, R3, R3
|
||||
CMP R3, R9
|
||||
BLS emitRemainder
|
||||
|
||||
// As per the encode_other.go code:
|
||||
//
|
||||
// We could immediately etc.
|
||||
|
||||
// x := load64(src, s-1)
|
||||
MOVD -1(R7), R14
|
||||
|
||||
// prevHash := hash(uint32(x>>0), shift)
|
||||
MOVW R14, R11
|
||||
MULW R16, R11, R11
|
||||
LSRW R5, R11, R11
|
||||
|
||||
// table[prevHash] = uint16(s-1)
|
||||
MOVD R7, R3
|
||||
SUB R6, R3, R3
|
||||
SUB $1, R3, R3
|
||||
|
||||
MOVHU R3, 0(R17)(R11<<1)
|
||||
|
||||
// currHash := hash(uint32(x>>8), shift)
|
||||
LSR $8, R14, R14
|
||||
MOVW R14, R11
|
||||
MULW R16, R11, R11
|
||||
LSRW R5, R11, R11
|
||||
|
||||
// candidate = int(table[currHash])
|
||||
MOVHU 0(R17)(R11<<1), R15
|
||||
|
||||
// table[currHash] = uint16(s)
|
||||
ADD $1, R3, R3
|
||||
MOVHU R3, 0(R17)(R11<<1)
|
||||
|
||||
// if uint32(x>>8) == load32(src, candidate) { continue }
|
||||
MOVW (R6)(R15), R4
|
||||
CMPW R4, R14
|
||||
BEQ inner1
|
||||
|
||||
// nextHash = hash(uint32(x>>16), shift)
|
||||
LSR $8, R14, R14
|
||||
MOVW R14, R11
|
||||
MULW R16, R11, R11
|
||||
LSRW R5, R11, R11
|
||||
|
||||
// s++
|
||||
ADD $1, R7, R7
|
||||
|
||||
// break out of the inner1 for loop, i.e. continue the outer loop.
|
||||
B outer
|
||||
|
||||
emitRemainder:
|
||||
// if nextEmit < len(src) { etc }
|
||||
MOVD src_len+32(FP), R3
|
||||
ADD R6, R3, R3
|
||||
CMP R3, R10
|
||||
BEQ encodeBlockEnd
|
||||
|
||||
// d += emitLiteral(dst[d:], src[nextEmit:])
|
||||
//
|
||||
// Push args.
|
||||
MOVD R8, 8(RSP)
|
||||
MOVD $0, 16(RSP) // Unnecessary, as the callee ignores it, but conservative.
|
||||
MOVD $0, 24(RSP) // Unnecessary, as the callee ignores it, but conservative.
|
||||
MOVD R10, 32(RSP)
|
||||
SUB R10, R3, R3
|
||||
MOVD R3, 40(RSP)
|
||||
MOVD R3, 48(RSP) // Unnecessary, as the callee ignores it, but conservative.
|
||||
|
||||
// Spill local variables (registers) onto the stack; call; unspill.
|
||||
MOVD R8, 88(RSP)
|
||||
CALL ·emitLiteral(SB)
|
||||
MOVD 88(RSP), R8
|
||||
|
||||
// Finish the "d +=" part of "d += emitLiteral(etc)".
|
||||
MOVD 56(RSP), R1
|
||||
ADD R1, R8, R8
|
||||
|
||||
encodeBlockEnd:
|
||||
MOVD dst_base+0(FP), R3
|
||||
SUB R3, R8, R8
|
||||
MOVD R8, d+48(FP)
|
||||
RET
|
|
@ -0,0 +1,30 @@
|
|||
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
// +build amd64 arm64
|
||||
|
||||
package snappy
|
||||
|
||||
// emitLiteral has the same semantics as in encode_other.go.
|
||||
//
|
||||
//go:noescape
|
||||
func emitLiteral(dst, lit []byte) int
|
||||
|
||||
// emitCopy has the same semantics as in encode_other.go.
|
||||
//
|
||||
//go:noescape
|
||||
func emitCopy(dst []byte, offset, length int) int
|
||||
|
||||
// extendMatch has the same semantics as in encode_other.go.
|
||||
//
|
||||
//go:noescape
|
||||
func extendMatch(src []byte, i, j int) int
|
||||
|
||||
// encodeBlock has the same semantics as in encode_other.go.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeBlock(dst, src []byte) (d int)
|
|
@ -0,0 +1,238 @@
|
|||
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64,!arm64 appengine !gc noasm
|
||||
|
||||
package snappy
|
||||
|
||||
func load32(b []byte, i int) uint32 {
|
||||
b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
|
||||
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
|
||||
}
|
||||
|
||||
func load64(b []byte, i int) uint64 {
|
||||
b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
|
||||
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
|
||||
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
|
||||
}
|
||||
|
||||
// emitLiteral writes a literal chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
// dst is long enough to hold the encoded bytes
|
||||
// 1 <= len(lit) && len(lit) <= 65536
|
||||
func emitLiteral(dst, lit []byte) int {
|
||||
i, n := 0, uint(len(lit)-1)
|
||||
switch {
|
||||
case n < 60:
|
||||
dst[0] = uint8(n)<<2 | tagLiteral
|
||||
i = 1
|
||||
case n < 1<<8:
|
||||
dst[0] = 60<<2 | tagLiteral
|
||||
dst[1] = uint8(n)
|
||||
i = 2
|
||||
default:
|
||||
dst[0] = 61<<2 | tagLiteral
|
||||
dst[1] = uint8(n)
|
||||
dst[2] = uint8(n >> 8)
|
||||
i = 3
|
||||
}
|
||||
return i + copy(dst[i:], lit)
|
||||
}
|
||||
|
||||
// emitCopy writes a copy chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
// dst is long enough to hold the encoded bytes
|
||||
// 1 <= offset && offset <= 65535
|
||||
// 4 <= length && length <= 65535
|
||||
func emitCopy(dst []byte, offset, length int) int {
|
||||
i := 0
|
||||
// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
|
||||
// threshold for this loop is a little higher (at 68 = 64 + 4), and the
|
||||
// length emitted down below is is a little lower (at 60 = 64 - 4), because
|
||||
// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
|
||||
// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
|
||||
// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
|
||||
// 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
|
||||
// tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
|
||||
// encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
|
||||
for length >= 68 {
|
||||
// Emit a length 64 copy, encoded as 3 bytes.
|
||||
dst[i+0] = 63<<2 | tagCopy2
|
||||
dst[i+1] = uint8(offset)
|
||||
dst[i+2] = uint8(offset >> 8)
|
||||
i += 3
|
||||
length -= 64
|
||||
}
|
||||
if length > 64 {
|
||||
// Emit a length 60 copy, encoded as 3 bytes.
|
||||
dst[i+0] = 59<<2 | tagCopy2
|
||||
dst[i+1] = uint8(offset)
|
||||
dst[i+2] = uint8(offset >> 8)
|
||||
i += 3
|
||||
length -= 60
|
||||
}
|
||||
if length >= 12 || offset >= 2048 {
|
||||
// Emit the remaining copy, encoded as 3 bytes.
|
||||
dst[i+0] = uint8(length-1)<<2 | tagCopy2
|
||||
dst[i+1] = uint8(offset)
|
||||
dst[i+2] = uint8(offset >> 8)
|
||||
return i + 3
|
||||
}
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
|
||||
dst[i+1] = uint8(offset)
|
||||
return i + 2
|
||||
}
|
||||
|
||||
// extendMatch returns the largest k such that k <= len(src) and that
|
||||
// src[i:i+k-j] and src[j:k] have the same contents.
|
||||
//
|
||||
// It assumes that:
|
||||
// 0 <= i && i < j && j <= len(src)
|
||||
func extendMatch(src []byte, i, j int) int {
|
||||
for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
|
||||
}
|
||||
return j
|
||||
}
|
||||
|
||||
func hash(u, shift uint32) uint32 {
|
||||
return (u * 0x1e35a7bd) >> shift
|
||||
}
|
||||
|
||||
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||
// been written.
|
||||
//
|
||||
// It also assumes that:
|
||||
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||
func encodeBlock(dst, src []byte) (d int) {
|
||||
// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
|
||||
// The table element type is uint16, as s < sLimit and sLimit < len(src)
|
||||
// and len(src) <= maxBlockSize and maxBlockSize == 65536.
|
||||
const (
|
||||
maxTableSize = 1 << 14
|
||||
// tableMask is redundant, but helps the compiler eliminate bounds
|
||||
// checks.
|
||||
tableMask = maxTableSize - 1
|
||||
)
|
||||
shift := uint32(32 - 8)
|
||||
for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
|
||||
shift--
|
||||
}
|
||||
// In Go, all array elements are zero-initialized, so there is no advantage
|
||||
// to a smaller tableSize per se. However, it matches the C++ algorithm,
|
||||
// and in the asm versions of this code, we can get away with zeroing only
|
||||
// the first tableSize elements.
|
||||
var table [maxTableSize]uint16
|
||||
|
||||
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||
// looking for copies.
|
||||
sLimit := len(src) - inputMargin
|
||||
|
||||
// nextEmit is where in src the next emitLiteral should start from.
|
||||
nextEmit := 0
|
||||
|
||||
// The encoded form must start with a literal, as there are no previous
|
||||
// bytes to copy, so we start looking for hash matches at s == 1.
|
||||
s := 1
|
||||
nextHash := hash(load32(src, s), shift)
|
||||
|
||||
for {
|
||||
// Copied from the C++ snappy implementation:
|
||||
//
|
||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
// found, start looking only at every other byte. If 32 more bytes are
|
||||
// scanned (or skipped), look at every third byte, etc.. When a match
|
||||
// is found, immediately go back to looking at every byte. This is a
|
||||
// small loss (~5% performance, ~0.1% density) for compressible data
|
||||
// due to more bookkeeping, but for non-compressible data (such as
|
||||
// JPEG) it's a huge win since the compressor quickly "realizes" the
|
||||
// data is incompressible and doesn't bother looking for matches
|
||||
// everywhere.
|
||||
//
|
||||
// The "skip" variable keeps track of how many bytes there are since
|
||||
// the last match; dividing it by 32 (ie. right-shifting by five) gives
|
||||
// the number of bytes to move ahead for each iteration.
|
||||
skip := 32
|
||||
|
||||
nextS := s
|
||||
candidate := 0
|
||||
for {
|
||||
s = nextS
|
||||
bytesBetweenHashLookups := skip >> 5
|
||||
nextS = s + bytesBetweenHashLookups
|
||||
skip += bytesBetweenHashLookups
|
||||
if nextS > sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
candidate = int(table[nextHash&tableMask])
|
||||
table[nextHash&tableMask] = uint16(s)
|
||||
nextHash = hash(load32(src, nextS), shift)
|
||||
if load32(src, s) == load32(src, candidate) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
||||
// them as literal bytes.
|
||||
d += emitLiteral(dst[d:], src[nextEmit:s])
|
||||
|
||||
// Call emitCopy, and then see if another emitCopy could be our next
|
||||
// move. Repeat until we find no match for the input immediately after
|
||||
// what was consumed by the last emitCopy call.
|
||||
//
|
||||
// If we exit this loop normally then we need to call emitLiteral next,
|
||||
// though we don't yet know how big the literal will be. We handle that
|
||||
// by proceeding to the next iteration of the main loop. We also can
|
||||
// exit this loop via goto if we get close to exhausting the input.
|
||||
for {
|
||||
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||
// literal bytes prior to s.
|
||||
base := s
|
||||
|
||||
// Extend the 4-byte match as long as possible.
|
||||
//
|
||||
// This is an inlined version of:
|
||||
// s = extendMatch(src, candidate+4, s+4)
|
||||
s += 4
|
||||
for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
|
||||
}
|
||||
|
||||
d += emitCopy(dst[d:], base-candidate, s-base)
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
|
||||
// We could immediately start working at s now, but to improve
|
||||
// compression we first update the hash table at s-1 and at s. If
|
||||
// another emitCopy is not our next move, also calculate nextHash
|
||||
// at s+1. At least on GOARCH=amd64, these three hash calculations
|
||||
// are faster as one load64 call (with some shifts) instead of
|
||||
// three load32 calls.
|
||||
x := load64(src, s-1)
|
||||
prevHash := hash(uint32(x>>0), shift)
|
||||
table[prevHash&tableMask] = uint16(s - 1)
|
||||
currHash := hash(uint32(x>>8), shift)
|
||||
candidate = int(table[currHash&tableMask])
|
||||
table[currHash&tableMask] = uint16(s)
|
||||
if uint32(x>>8) != load32(src, candidate) {
|
||||
nextHash = hash(uint32(x>>16), shift)
|
||||
s++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
emitRemainder:
|
||||
if nextEmit < len(src) {
|
||||
d += emitLiteral(dst[d:], src[nextEmit:])
|
||||
}
|
||||
return d
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package snappy implements the Snappy compression format. It aims for very
|
||||
// high speeds and reasonable compression.
|
||||
//
|
||||
// There are actually two Snappy formats: block and stream. They are related,
|
||||
// but different: trying to decompress block-compressed data as a Snappy stream
|
||||
// will fail, and vice versa. The block format is the Decode and Encode
|
||||
// functions and the stream format is the Reader and Writer types.
|
||||
//
|
||||
// The block format, the more common case, is used when the complete size (the
|
||||
// number of bytes) of the original data is known upfront, at the time
|
||||
// compression starts. The stream format, also known as the framing format, is
|
||||
// for when that isn't always true.
|
||||
//
|
||||
// The canonical, C++ implementation is at https://github.com/google/snappy and
|
||||
// it only implements the block format.
|
||||
package snappy // import "github.com/golang/snappy"
|
||||
|
||||
import (
|
||||
"hash/crc32"
|
||||
)
|
||||
|
||||
/*
|
||||
Each encoded block begins with the varint-encoded length of the decoded data,
|
||||
followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
|
||||
first byte of each chunk is broken into its 2 least and 6 most significant bits
|
||||
called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
|
||||
Zero means a literal tag. All other values mean a copy tag.
|
||||
|
||||
For literal tags:
|
||||
- If m < 60, the next 1 + m bytes are literal bytes.
|
||||
- Otherwise, let n be the little-endian unsigned integer denoted by the next
|
||||
m - 59 bytes. The next 1 + n bytes after that are literal bytes.
|
||||
|
||||
For copy tags, length bytes are copied from offset bytes ago, in the style of
|
||||
Lempel-Ziv compression algorithms. In particular:
|
||||
- For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
|
||||
The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
|
||||
of the offset. The next byte is bits 0-7 of the offset.
|
||||
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
|
||||
The length is 1 + m. The offset is the little-endian unsigned integer
|
||||
denoted by the next 2 bytes.
|
||||
- For l == 3, this tag is a legacy format that is no longer issued by most
|
||||
encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
|
||||
[1, 65). The length is 1 + m. The offset is the little-endian unsigned
|
||||
integer denoted by the next 4 bytes.
|
||||
*/
|
||||
const (
|
||||
tagLiteral = 0x00
|
||||
tagCopy1 = 0x01
|
||||
tagCopy2 = 0x02
|
||||
tagCopy4 = 0x03
|
||||
)
|
||||
|
||||
const (
|
||||
checksumSize = 4
|
||||
chunkHeaderSize = 4
|
||||
magicChunk = "\xff\x06\x00\x00" + magicBody
|
||||
magicBody = "sNaPpY"
|
||||
|
||||
// maxBlockSize is the maximum size of the input to encodeBlock. It is not
|
||||
// part of the wire format per se, but some parts of the encoder assume
|
||||
// that an offset fits into a uint16.
|
||||
//
|
||||
// Also, for the framing format (Writer type instead of Encode function),
|
||||
// https://github.com/google/snappy/blob/master/framing_format.txt says
|
||||
// that "the uncompressed data in a chunk must be no longer than 65536
|
||||
// bytes".
|
||||
maxBlockSize = 65536
|
||||
|
||||
// maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is
|
||||
// hard coded to be a const instead of a variable, so that obufLen can also
|
||||
// be a const. Their equivalence is confirmed by
|
||||
// TestMaxEncodedLenOfMaxBlockSize.
|
||||
maxEncodedLenOfMaxBlockSize = 76490
|
||||
|
||||
obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize
|
||||
obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize
|
||||
)
|
||||
|
||||
const (
|
||||
chunkTypeCompressedData = 0x00
|
||||
chunkTypeUncompressedData = 0x01
|
||||
chunkTypePadding = 0xfe
|
||||
chunkTypeStreamIdentifier = 0xff
|
||||
)
|
||||
|
||||
var crcTable = crc32.MakeTable(crc32.Castagnoli)
|
||||
|
||||
// crc implements the checksum specified in section 3 of
|
||||
// https://github.com/google/snappy/blob/master/framing_format.txt
|
||||
func crc(b []byte) uint32 {
|
||||
c := crc32.Update(0, crcTable, b)
|
||||
return uint32(c>>15|c<<17) + 0xa282ead8
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
# Created by https://www.gitignore.io/api/macos
|
||||
|
||||
### macOS ###
|
||||
*.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
|
||||
# End of https://www.gitignore.io/api/macos
|
||||
|
||||
cmd/*/*exe
|
||||
.idea
|
||||
|
||||
fuzz/*.zip
|
|
@ -0,0 +1,28 @@
|
|||
Copyright (c) 2015, Pierre Curto
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of xxHash nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
# lz4 : LZ4 compression in pure Go
|
||||
|
||||
[![Go Reference](https://pkg.go.dev/badge/github.com/pierrec/lz4/v4.svg)](https://pkg.go.dev/github.com/pierrec/lz4/v4)
|
||||
[![CI](https://github.com/pierrec/lz4/workflows/ci/badge.svg)](https://github.com/pierrec/lz4/actions)
|
||||
[![Go Report Card](https://goreportcard.com/badge/github.com/pierrec/lz4)](https://goreportcard.com/report/github.com/pierrec/lz4)
|
||||
[![GitHub tag (latest SemVer)](https://img.shields.io/github/tag/pierrec/lz4.svg?style=social)](https://github.com/pierrec/lz4/tags)
|
||||
|
||||
## Overview
|
||||
|
||||
This package provides a streaming interface to [LZ4 data streams](http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html) as well as low level compress and uncompress functions for LZ4 data blocks.
|
||||
The implementation is based on the reference C [one](https://github.com/lz4/lz4).
|
||||
|
||||
## Install
|
||||
|
||||
Assuming you have the go toolchain installed:
|
||||
|
||||
```
|
||||
go get github.com/pierrec/lz4/v4
|
||||
```
|
||||
|
||||
There is a command line interface tool to compress and decompress LZ4 files.
|
||||
|
||||
```
|
||||
go install github.com/pierrec/lz4/v4/cmd/lz4c
|
||||
```
|
||||
|
||||
Usage
|
||||
|
||||
```
|
||||
Usage of lz4c:
|
||||
-version
|
||||
print the program version
|
||||
|
||||
Subcommands:
|
||||
Compress the given files or from stdin to stdout.
|
||||
compress [arguments] [<file name> ...]
|
||||
-bc
|
||||
enable block checksum
|
||||
-l int
|
||||
compression level (0=fastest)
|
||||
-sc
|
||||
disable stream checksum
|
||||
-size string
|
||||
block max size [64K,256K,1M,4M] (default "4M")
|
||||
|
||||
Uncompress the given files or from stdin to stdout.
|
||||
uncompress [arguments] [<file name> ...]
|
||||
|
||||
```
|
||||
|
||||
|
||||
## Example
|
||||
|
||||
```
|
||||
// Compress and uncompress an input string.
|
||||
s := "hello world"
|
||||
r := strings.NewReader(s)
|
||||
|
||||
// The pipe will uncompress the data from the writer.
|
||||
pr, pw := io.Pipe()
|
||||
zw := lz4.NewWriter(pw)
|
||||
zr := lz4.NewReader(pr)
|
||||
|
||||
go func() {
|
||||
// Compress the input string.
|
||||
_, _ = io.Copy(zw, r)
|
||||
_ = zw.Close() // Make sure the writer is closed
|
||||
_ = pw.Close() // Terminate the pipe
|
||||
}()
|
||||
|
||||
_, _ = io.Copy(os.Stdout, zr)
|
||||
|
||||
// Output:
|
||||
// hello world
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are very welcome for bug fixing, performance improvements...!
|
||||
|
||||
- Open an issue with a proper description
|
||||
- Send a pull request with appropriate test case(s)
|
||||
|
||||
## Contributors
|
||||
|
||||
Thanks to all [contributors](https://github.com/pierrec/lz4/graphs/contributors) so far!
|
||||
|
||||
Special thanks to [@Zariel](https://github.com/Zariel) for his asm implementation of the decoder.
|
||||
|
||||
Special thanks to [@greatroar](https://github.com/greatroar) for his work on the asm implementations of the decoder for amd64 and arm64.
|
||||
|
||||
Special thanks to [@klauspost](https://github.com/klauspost) for his work on optimizing the code.
|
|
@ -0,0 +1,481 @@
|
|||
package lz4block
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math/bits"
|
||||
"sync"
|
||||
|
||||
"github.com/pierrec/lz4/v4/internal/lz4errors"
|
||||
)
|
||||
|
||||
const (
|
||||
// The following constants are used to setup the compression algorithm.
|
||||
minMatch = 4 // the minimum size of the match sequence size (4 bytes)
|
||||
winSizeLog = 16 // LZ4 64Kb window size limit
|
||||
winSize = 1 << winSizeLog
|
||||
winMask = winSize - 1 // 64Kb window of previous data for dependent blocks
|
||||
|
||||
// hashLog determines the size of the hash table used to quickly find a previous match position.
|
||||
// Its value influences the compression speed and memory usage, the lower the faster,
|
||||
// but at the expense of the compression ratio.
|
||||
// 16 seems to be the best compromise for fast compression.
|
||||
hashLog = 16
|
||||
htSize = 1 << hashLog
|
||||
|
||||
mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes.
|
||||
)
|
||||
|
||||
func recoverBlock(e *error) {
|
||||
if r := recover(); r != nil && *e == nil {
|
||||
*e = lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
}
|
||||
|
||||
// blockHash hashes the lower 6 bytes into a value < htSize.
|
||||
func blockHash(x uint64) uint32 {
|
||||
const prime6bytes = 227718039650203
|
||||
return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog))
|
||||
}
|
||||
|
||||
func CompressBlockBound(n int) int {
|
||||
return n + n/255 + 16
|
||||
}
|
||||
|
||||
func UncompressBlock(src, dst, dict []byte) (int, error) {
|
||||
if len(src) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
if di := decodeBlock(dst, src, dict); di >= 0 {
|
||||
return di, nil
|
||||
}
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
|
||||
type Compressor struct {
|
||||
// Offsets are at most 64kiB, so we can store only the lower 16 bits of
|
||||
// match positions: effectively, an offset from some 64kiB block boundary.
|
||||
//
|
||||
// When we retrieve such an offset, we interpret it as relative to the last
|
||||
// block boundary si &^ 0xffff, or the one before, (si &^ 0xffff) - 0x10000,
|
||||
// depending on which of these is inside the current window. If a table
|
||||
// entry was generated more than 64kiB back in the input, we find out by
|
||||
// inspecting the input stream.
|
||||
table [htSize]uint16
|
||||
|
||||
// Bitmap indicating which positions in the table are in use.
|
||||
// This allows us to quickly reset the table for reuse,
|
||||
// without having to zero everything.
|
||||
inUse [htSize / 32]uint32
|
||||
}
|
||||
|
||||
// Get returns the position of a presumptive match for the hash h.
|
||||
// The match may be a false positive due to a hash collision or an old entry.
|
||||
// If si < winSize, the return value may be negative.
|
||||
func (c *Compressor) get(h uint32, si int) int {
|
||||
h &= htSize - 1
|
||||
i := 0
|
||||
if c.inUse[h/32]&(1<<(h%32)) != 0 {
|
||||
i = int(c.table[h])
|
||||
}
|
||||
i += si &^ winMask
|
||||
if i >= si {
|
||||
// Try previous 64kiB block (negative when in first block).
|
||||
i -= winSize
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func (c *Compressor) put(h uint32, si int) {
|
||||
h &= htSize - 1
|
||||
c.table[h] = uint16(si)
|
||||
c.inUse[h/32] |= 1 << (h % 32)
|
||||
}
|
||||
|
||||
func (c *Compressor) reset() { c.inUse = [htSize / 32]uint32{} }
|
||||
|
||||
var compressorPool = sync.Pool{New: func() interface{} { return new(Compressor) }}
|
||||
|
||||
func CompressBlock(src, dst []byte) (int, error) {
|
||||
c := compressorPool.Get().(*Compressor)
|
||||
n, err := c.CompressBlock(src, dst)
|
||||
compressorPool.Put(c)
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (c *Compressor) CompressBlock(src, dst []byte) (int, error) {
|
||||
// Zero out reused table to avoid non-deterministic output (issue #65).
|
||||
c.reset()
|
||||
|
||||
// Return 0, nil only if the destination buffer size is < CompressBlockBound.
|
||||
isNotCompressible := len(dst) < CompressBlockBound(len(src))
|
||||
|
||||
// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
|
||||
// This significantly speeds up incompressible data and usually has very small impact on compression.
|
||||
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
|
||||
const adaptSkipLog = 7
|
||||
|
||||
// si: Current position of the search.
|
||||
// anchor: Position of the current literals.
|
||||
var si, di, anchor int
|
||||
sn := len(src) - mfLimit
|
||||
if sn <= 0 {
|
||||
goto lastLiterals
|
||||
}
|
||||
|
||||
// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
|
||||
for si < sn {
|
||||
// Hash the next 6 bytes (sequence)...
|
||||
match := binary.LittleEndian.Uint64(src[si:])
|
||||
h := blockHash(match)
|
||||
h2 := blockHash(match >> 8)
|
||||
|
||||
// We check a match at s, s+1 and s+2 and pick the first one we get.
|
||||
// Checking 3 only requires us to load the source one.
|
||||
ref := c.get(h, si)
|
||||
ref2 := c.get(h2, si+1)
|
||||
c.put(h, si)
|
||||
c.put(h2, si+1)
|
||||
|
||||
offset := si - ref
|
||||
|
||||
if offset <= 0 || offset >= winSize || uint32(match) != binary.LittleEndian.Uint32(src[ref:]) {
|
||||
// No match. Start calculating another hash.
|
||||
// The processor can usually do this out-of-order.
|
||||
h = blockHash(match >> 16)
|
||||
ref3 := c.get(h, si+2)
|
||||
|
||||
// Check the second match at si+1
|
||||
si += 1
|
||||
offset = si - ref2
|
||||
|
||||
if offset <= 0 || offset >= winSize || uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
|
||||
// No match. Check the third match at si+2
|
||||
si += 1
|
||||
offset = si - ref3
|
||||
c.put(h, si)
|
||||
|
||||
if offset <= 0 || offset >= winSize || uint32(match>>16) != binary.LittleEndian.Uint32(src[ref3:]) {
|
||||
// Skip one extra byte (at si+3) before we check 3 matches again.
|
||||
si += 2 + (si-anchor)>>adaptSkipLog
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Match found.
|
||||
lLen := si - anchor // Literal length.
|
||||
// We already matched 4 bytes.
|
||||
mLen := 4
|
||||
|
||||
// Extend backwards if we can, reducing literals.
|
||||
tOff := si - offset - 1
|
||||
for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] {
|
||||
si--
|
||||
tOff--
|
||||
lLen--
|
||||
mLen++
|
||||
}
|
||||
|
||||
// Add the match length, so we continue search at the end.
|
||||
// Use mLen to store the offset base.
|
||||
si, mLen = si+mLen, si+minMatch
|
||||
|
||||
// Find the longest match by looking by batches of 8 bytes.
|
||||
for si+8 <= sn {
|
||||
x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
|
||||
if x == 0 {
|
||||
si += 8
|
||||
} else {
|
||||
// Stop is first non-zero byte.
|
||||
si += bits.TrailingZeros64(x) >> 3
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
mLen = si - mLen
|
||||
if di >= len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
if mLen < 0xF {
|
||||
dst[di] = byte(mLen)
|
||||
} else {
|
||||
dst[di] = 0xF
|
||||
}
|
||||
|
||||
// Encode literals length.
|
||||
if lLen < 0xF {
|
||||
dst[di] |= byte(lLen << 4)
|
||||
} else {
|
||||
dst[di] |= 0xF0
|
||||
di++
|
||||
l := lLen - 0xF
|
||||
for ; l >= 0xFF && di < len(dst); l -= 0xFF {
|
||||
dst[di] = 0xFF
|
||||
di++
|
||||
}
|
||||
if di >= len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
dst[di] = byte(l)
|
||||
}
|
||||
di++
|
||||
|
||||
// Literals.
|
||||
if di+lLen > len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
|
||||
di += lLen + 2
|
||||
anchor = si
|
||||
|
||||
// Encode offset.
|
||||
if di > len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
|
||||
|
||||
// Encode match length part 2.
|
||||
if mLen >= 0xF {
|
||||
for mLen -= 0xF; mLen >= 0xFF && di < len(dst); mLen -= 0xFF {
|
||||
dst[di] = 0xFF
|
||||
di++
|
||||
}
|
||||
if di >= len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
dst[di] = byte(mLen)
|
||||
di++
|
||||
}
|
||||
// Check if we can load next values.
|
||||
if si >= sn {
|
||||
break
|
||||
}
|
||||
// Hash match end-2
|
||||
h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
|
||||
c.put(h, si-2)
|
||||
}
|
||||
|
||||
lastLiterals:
|
||||
if isNotCompressible && anchor == 0 {
|
||||
// Incompressible.
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// Last literals.
|
||||
if di >= len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
lLen := len(src) - anchor
|
||||
if lLen < 0xF {
|
||||
dst[di] = byte(lLen << 4)
|
||||
} else {
|
||||
dst[di] = 0xF0
|
||||
di++
|
||||
for lLen -= 0xF; lLen >= 0xFF && di < len(dst); lLen -= 0xFF {
|
||||
dst[di] = 0xFF
|
||||
di++
|
||||
}
|
||||
if di >= len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
dst[di] = byte(lLen)
|
||||
}
|
||||
di++
|
||||
|
||||
// Write the last literals.
|
||||
if isNotCompressible && di >= anchor {
|
||||
// Incompressible.
|
||||
return 0, nil
|
||||
}
|
||||
if di+len(src)-anchor > len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
|
||||
return di, nil
|
||||
}
|
||||
|
||||
// blockHash hashes 4 bytes into a value < winSize.
|
||||
func blockHashHC(x uint32) uint32 {
|
||||
const hasher uint32 = 2654435761 // Knuth multiplicative hash.
|
||||
return x * hasher >> (32 - winSizeLog)
|
||||
}
|
||||
|
||||
type CompressorHC struct {
|
||||
// hashTable: stores the last position found for a given hash
|
||||
// chainTable: stores previous positions for a given hash
|
||||
hashTable, chainTable [htSize]int
|
||||
needsReset bool
|
||||
}
|
||||
|
||||
var compressorHCPool = sync.Pool{New: func() interface{} { return new(CompressorHC) }}
|
||||
|
||||
func CompressBlockHC(src, dst []byte, depth CompressionLevel) (int, error) {
|
||||
c := compressorHCPool.Get().(*CompressorHC)
|
||||
n, err := c.CompressBlock(src, dst, depth)
|
||||
compressorHCPool.Put(c)
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (c *CompressorHC) CompressBlock(src, dst []byte, depth CompressionLevel) (_ int, err error) {
|
||||
if c.needsReset {
|
||||
// Zero out reused table to avoid non-deterministic output (issue #65).
|
||||
c.hashTable = [htSize]int{}
|
||||
c.chainTable = [htSize]int{}
|
||||
}
|
||||
c.needsReset = true // Only false on first call.
|
||||
|
||||
defer recoverBlock(&err)
|
||||
|
||||
// Return 0, nil only if the destination buffer size is < CompressBlockBound.
|
||||
isNotCompressible := len(dst) < CompressBlockBound(len(src))
|
||||
|
||||
// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
|
||||
// This significantly speeds up incompressible data and usually has very small impact on compression.
|
||||
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
|
||||
const adaptSkipLog = 7
|
||||
|
||||
var si, di, anchor int
|
||||
sn := len(src) - mfLimit
|
||||
if sn <= 0 {
|
||||
goto lastLiterals
|
||||
}
|
||||
|
||||
if depth == 0 {
|
||||
depth = winSize
|
||||
}
|
||||
|
||||
for si < sn {
|
||||
// Hash the next 4 bytes (sequence).
|
||||
match := binary.LittleEndian.Uint32(src[si:])
|
||||
h := blockHashHC(match)
|
||||
|
||||
// Follow the chain until out of window and give the longest match.
|
||||
mLen := 0
|
||||
offset := 0
|
||||
for next, try := c.hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next, try = c.chainTable[next&winMask], try-1 {
|
||||
// The first (mLen==0) or next byte (mLen>=minMatch) at current match length
|
||||
// must match to improve on the match length.
|
||||
if src[next+mLen] != src[si+mLen] {
|
||||
continue
|
||||
}
|
||||
ml := 0
|
||||
// Compare the current position with a previous with the same hash.
|
||||
for ml < sn-si {
|
||||
x := binary.LittleEndian.Uint64(src[next+ml:]) ^ binary.LittleEndian.Uint64(src[si+ml:])
|
||||
if x == 0 {
|
||||
ml += 8
|
||||
} else {
|
||||
// Stop is first non-zero byte.
|
||||
ml += bits.TrailingZeros64(x) >> 3
|
||||
break
|
||||
}
|
||||
}
|
||||
if ml < minMatch || ml <= mLen {
|
||||
// Match too small (<minMath) or smaller than the current match.
|
||||
continue
|
||||
}
|
||||
// Found a longer match, keep its position and length.
|
||||
mLen = ml
|
||||
offset = si - next
|
||||
// Try another previous position with the same hash.
|
||||
}
|
||||
c.chainTable[si&winMask] = c.hashTable[h]
|
||||
c.hashTable[h] = si
|
||||
|
||||
// No match found.
|
||||
if mLen == 0 {
|
||||
si += 1 + (si-anchor)>>adaptSkipLog
|
||||
continue
|
||||
}
|
||||
|
||||
// Match found.
|
||||
// Update hash/chain tables with overlapping bytes:
|
||||
// si already hashed, add everything from si+1 up to the match length.
|
||||
winStart := si + 1
|
||||
if ws := si + mLen - winSize; ws > winStart {
|
||||
winStart = ws
|
||||
}
|
||||
for si, ml := winStart, si+mLen; si < ml; {
|
||||
match >>= 8
|
||||
match |= uint32(src[si+3]) << 24
|
||||
h := blockHashHC(match)
|
||||
c.chainTable[si&winMask] = c.hashTable[h]
|
||||
c.hashTable[h] = si
|
||||
si++
|
||||
}
|
||||
|
||||
lLen := si - anchor
|
||||
si += mLen
|
||||
mLen -= minMatch // Match length does not include minMatch.
|
||||
|
||||
if mLen < 0xF {
|
||||
dst[di] = byte(mLen)
|
||||
} else {
|
||||
dst[di] = 0xF
|
||||
}
|
||||
|
||||
// Encode literals length.
|
||||
if lLen < 0xF {
|
||||
dst[di] |= byte(lLen << 4)
|
||||
} else {
|
||||
dst[di] |= 0xF0
|
||||
di++
|
||||
l := lLen - 0xF
|
||||
for ; l >= 0xFF; l -= 0xFF {
|
||||
dst[di] = 0xFF
|
||||
di++
|
||||
}
|
||||
dst[di] = byte(l)
|
||||
}
|
||||
di++
|
||||
|
||||
// Literals.
|
||||
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
|
||||
di += lLen
|
||||
anchor = si
|
||||
|
||||
// Encode offset.
|
||||
di += 2
|
||||
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
|
||||
|
||||
// Encode match length part 2.
|
||||
if mLen >= 0xF {
|
||||
for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
|
||||
dst[di] = 0xFF
|
||||
di++
|
||||
}
|
||||
dst[di] = byte(mLen)
|
||||
di++
|
||||
}
|
||||
}
|
||||
|
||||
if isNotCompressible && anchor == 0 {
|
||||
// Incompressible.
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// Last literals.
|
||||
lastLiterals:
|
||||
lLen := len(src) - anchor
|
||||
if lLen < 0xF {
|
||||
dst[di] = byte(lLen << 4)
|
||||
} else {
|
||||
dst[di] = 0xF0
|
||||
di++
|
||||
lLen -= 0xF
|
||||
for ; lLen >= 0xFF; lLen -= 0xFF {
|
||||
dst[di] = 0xFF
|
||||
di++
|
||||
}
|
||||
dst[di] = byte(lLen)
|
||||
}
|
||||
di++
|
||||
|
||||
// Write the last literals.
|
||||
if isNotCompressible && di >= anchor {
|
||||
// Incompressible.
|
||||
return 0, nil
|
||||
}
|
||||
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
|
||||
return di, nil
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
// Package lz4block provides LZ4 BlockSize types and pools of buffers.
|
||||
package lz4block
|
||||
|
||||
import "sync"
|
||||
|
||||
const (
|
||||
Block64Kb uint32 = 1 << (16 + iota*2)
|
||||
Block256Kb
|
||||
Block1Mb
|
||||
Block4Mb
|
||||
)
|
||||
|
||||
// In legacy mode all blocks are compressed regardless
|
||||
// of the compressed size: use the bound size.
|
||||
var Block8Mb = uint32(CompressBlockBound(8 << 20))
|
||||
|
||||
var (
|
||||
BlockPool64K = sync.Pool{New: func() interface{} { return make([]byte, Block64Kb) }}
|
||||
BlockPool256K = sync.Pool{New: func() interface{} { return make([]byte, Block256Kb) }}
|
||||
BlockPool1M = sync.Pool{New: func() interface{} { return make([]byte, Block1Mb) }}
|
||||
BlockPool4M = sync.Pool{New: func() interface{} { return make([]byte, Block4Mb) }}
|
||||
BlockPool8M = sync.Pool{New: func() interface{} { return make([]byte, Block8Mb) }}
|
||||
)
|
||||
|
||||
func Index(b uint32) BlockSizeIndex {
|
||||
switch b {
|
||||
case Block64Kb:
|
||||
return 4
|
||||
case Block256Kb:
|
||||
return 5
|
||||
case Block1Mb:
|
||||
return 6
|
||||
case Block4Mb:
|
||||
return 7
|
||||
case Block8Mb: // only valid in legacy mode
|
||||
return 3
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func IsValid(b uint32) bool {
|
||||
return Index(b) > 0
|
||||
}
|
||||
|
||||
type BlockSizeIndex uint8
|
||||
|
||||
func (b BlockSizeIndex) IsValid() bool {
|
||||
switch b {
|
||||
case 4, 5, 6, 7:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (b BlockSizeIndex) Get() []byte {
|
||||
var buf interface{}
|
||||
switch b {
|
||||
case 4:
|
||||
buf = BlockPool64K.Get()
|
||||
case 5:
|
||||
buf = BlockPool256K.Get()
|
||||
case 6:
|
||||
buf = BlockPool1M.Get()
|
||||
case 7:
|
||||
buf = BlockPool4M.Get()
|
||||
case 3:
|
||||
buf = BlockPool8M.Get()
|
||||
}
|
||||
return buf.([]byte)
|
||||
}
|
||||
|
||||
func Put(buf []byte) {
|
||||
// Safeguard: do not allow invalid buffers.
|
||||
switch c := cap(buf); uint32(c) {
|
||||
case Block64Kb:
|
||||
BlockPool64K.Put(buf[:c])
|
||||
case Block256Kb:
|
||||
BlockPool256K.Put(buf[:c])
|
||||
case Block1Mb:
|
||||
BlockPool1M.Put(buf[:c])
|
||||
case Block4Mb:
|
||||
BlockPool4M.Put(buf[:c])
|
||||
case Block8Mb:
|
||||
BlockPool8M.Put(buf[:c])
|
||||
}
|
||||
}
|
||||
|
||||
type CompressionLevel uint32
|
||||
|
||||
const Fast CompressionLevel = 0
|
448
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_amd64.s
generated
vendored
Normal file
448
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_amd64.s
generated
vendored
Normal file
|
@ -0,0 +1,448 @@
|
|||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// AX scratch
|
||||
// BX scratch
|
||||
// CX literal and match lengths
|
||||
// DX token, match offset
|
||||
//
|
||||
// DI &dst
|
||||
// SI &src
|
||||
// R8 &dst + len(dst)
|
||||
// R9 &src + len(src)
|
||||
// R11 &dst
|
||||
// R12 short output end
|
||||
// R13 short input end
|
||||
// R14 &dict
|
||||
// R15 len(dict)
|
||||
|
||||
// func decodeBlock(dst, src, dict []byte) int
|
||||
TEXT ·decodeBlock(SB), NOSPLIT, $48-80
|
||||
MOVQ dst_base+0(FP), DI
|
||||
MOVQ DI, R11
|
||||
MOVQ dst_len+8(FP), R8
|
||||
ADDQ DI, R8
|
||||
|
||||
MOVQ src_base+24(FP), SI
|
||||
MOVQ src_len+32(FP), R9
|
||||
CMPQ R9, $0
|
||||
JE err_corrupt
|
||||
ADDQ SI, R9
|
||||
|
||||
MOVQ dict_base+48(FP), R14
|
||||
MOVQ dict_len+56(FP), R15
|
||||
|
||||
// shortcut ends
|
||||
// short output end
|
||||
MOVQ R8, R12
|
||||
SUBQ $32, R12
|
||||
// short input end
|
||||
MOVQ R9, R13
|
||||
SUBQ $16, R13
|
||||
|
||||
XORL CX, CX
|
||||
|
||||
loop:
|
||||
// token := uint32(src[si])
|
||||
MOVBLZX (SI), DX
|
||||
INCQ SI
|
||||
|
||||
// lit_len = token >> 4
|
||||
// if lit_len > 0
|
||||
// CX = lit_len
|
||||
MOVL DX, CX
|
||||
SHRL $4, CX
|
||||
|
||||
// if lit_len != 0xF
|
||||
CMPL CX, $0xF
|
||||
JEQ lit_len_loop
|
||||
CMPQ DI, R12
|
||||
JAE copy_literal
|
||||
CMPQ SI, R13
|
||||
JAE copy_literal
|
||||
|
||||
// copy shortcut
|
||||
|
||||
// A two-stage shortcut for the most common case:
|
||||
// 1) If the literal length is 0..14, and there is enough space,
|
||||
// enter the shortcut and copy 16 bytes on behalf of the literals
|
||||
// (in the fast mode, only 8 bytes can be safely copied this way).
|
||||
// 2) Further if the match length is 4..18, copy 18 bytes in a similar
|
||||
// manner; but we ensure that there's enough space in the output for
|
||||
// those 18 bytes earlier, upon entering the shortcut (in other words,
|
||||
// there is a combined check for both stages).
|
||||
|
||||
// copy literal
|
||||
MOVOU (SI), X0
|
||||
MOVOU X0, (DI)
|
||||
ADDQ CX, DI
|
||||
ADDQ CX, SI
|
||||
|
||||
MOVL DX, CX
|
||||
ANDL $0xF, CX
|
||||
|
||||
// The second stage: prepare for match copying, decode full info.
|
||||
// If it doesn't work out, the info won't be wasted.
|
||||
// offset := uint16(data[:2])
|
||||
MOVWLZX (SI), DX
|
||||
TESTL DX, DX
|
||||
JE err_corrupt
|
||||
ADDQ $2, SI
|
||||
JC err_short_buf
|
||||
|
||||
MOVQ DI, AX
|
||||
SUBQ DX, AX
|
||||
JC err_corrupt
|
||||
CMPQ AX, DI
|
||||
JA err_short_buf
|
||||
|
||||
// if we can't do the second stage then jump straight to read the
|
||||
// match length, we already have the offset.
|
||||
CMPL CX, $0xF
|
||||
JEQ match_len_loop_pre
|
||||
CMPL DX, $8
|
||||
JLT match_len_loop_pre
|
||||
CMPQ AX, R11
|
||||
JB match_len_loop_pre
|
||||
|
||||
// memcpy(op + 0, match + 0, 8);
|
||||
MOVQ (AX), BX
|
||||
MOVQ BX, (DI)
|
||||
// memcpy(op + 8, match + 8, 8);
|
||||
MOVQ 8(AX), BX
|
||||
MOVQ BX, 8(DI)
|
||||
// memcpy(op +16, match +16, 2);
|
||||
MOVW 16(AX), BX
|
||||
MOVW BX, 16(DI)
|
||||
|
||||
LEAQ const_minMatch(DI)(CX*1), DI
|
||||
|
||||
// shortcut complete, load next token
|
||||
JMP loopcheck
|
||||
|
||||
// Read the rest of the literal length:
|
||||
// do { BX = src[si++]; lit_len += BX } while (BX == 0xFF).
|
||||
lit_len_loop:
|
||||
CMPQ SI, R9
|
||||
JAE err_short_buf
|
||||
|
||||
MOVBLZX (SI), BX
|
||||
INCQ SI
|
||||
ADDQ BX, CX
|
||||
|
||||
CMPB BX, $0xFF
|
||||
JE lit_len_loop
|
||||
|
||||
copy_literal:
|
||||
// bounds check src and dst
|
||||
MOVQ SI, AX
|
||||
ADDQ CX, AX
|
||||
JC err_short_buf
|
||||
CMPQ AX, R9
|
||||
JA err_short_buf
|
||||
|
||||
MOVQ DI, BX
|
||||
ADDQ CX, BX
|
||||
JC err_short_buf
|
||||
CMPQ BX, R8
|
||||
JA err_short_buf
|
||||
|
||||
// Copy literals of <=48 bytes through the XMM registers.
|
||||
CMPQ CX, $48
|
||||
JGT memmove_lit
|
||||
|
||||
// if len(dst[di:]) < 48
|
||||
MOVQ R8, AX
|
||||
SUBQ DI, AX
|
||||
CMPQ AX, $48
|
||||
JLT memmove_lit
|
||||
|
||||
// if len(src[si:]) < 48
|
||||
MOVQ R9, BX
|
||||
SUBQ SI, BX
|
||||
CMPQ BX, $48
|
||||
JLT memmove_lit
|
||||
|
||||
MOVOU (SI), X0
|
||||
MOVOU 16(SI), X1
|
||||
MOVOU 32(SI), X2
|
||||
MOVOU X0, (DI)
|
||||
MOVOU X1, 16(DI)
|
||||
MOVOU X2, 32(DI)
|
||||
|
||||
ADDQ CX, SI
|
||||
ADDQ CX, DI
|
||||
|
||||
JMP finish_lit_copy
|
||||
|
||||
memmove_lit:
|
||||
// memmove(to, from, len)
|
||||
MOVQ DI, 0(SP)
|
||||
MOVQ SI, 8(SP)
|
||||
MOVQ CX, 16(SP)
|
||||
|
||||
// Spill registers. Increment SI, DI now so we don't need to save CX.
|
||||
ADDQ CX, DI
|
||||
ADDQ CX, SI
|
||||
MOVQ DI, 24(SP)
|
||||
MOVQ SI, 32(SP)
|
||||
MOVL DX, 40(SP)
|
||||
|
||||
CALL runtime·memmove(SB)
|
||||
|
||||
// restore registers
|
||||
MOVQ 24(SP), DI
|
||||
MOVQ 32(SP), SI
|
||||
MOVL 40(SP), DX
|
||||
|
||||
// recalc initial values
|
||||
MOVQ dst_base+0(FP), R8
|
||||
MOVQ R8, R11
|
||||
ADDQ dst_len+8(FP), R8
|
||||
MOVQ src_base+24(FP), R9
|
||||
ADDQ src_len+32(FP), R9
|
||||
MOVQ dict_base+48(FP), R14
|
||||
MOVQ dict_len+56(FP), R15
|
||||
MOVQ R8, R12
|
||||
SUBQ $32, R12
|
||||
MOVQ R9, R13
|
||||
SUBQ $16, R13
|
||||
|
||||
finish_lit_copy:
|
||||
// CX := mLen
|
||||
// free up DX to use for offset
|
||||
MOVL DX, CX
|
||||
ANDL $0xF, CX
|
||||
|
||||
CMPQ SI, R9
|
||||
JAE end
|
||||
|
||||
// offset
|
||||
// si += 2
|
||||
// DX := int(src[si-2]) | int(src[si-1])<<8
|
||||
ADDQ $2, SI
|
||||
JC err_short_buf
|
||||
CMPQ SI, R9
|
||||
JA err_short_buf
|
||||
MOVWQZX -2(SI), DX
|
||||
|
||||
// 0 offset is invalid
|
||||
TESTL DX, DX
|
||||
JEQ err_corrupt
|
||||
|
||||
match_len_loop_pre:
|
||||
// if mlen != 0xF
|
||||
CMPB CX, $0xF
|
||||
JNE copy_match
|
||||
|
||||
// do { BX = src[si++]; mlen += BX } while (BX == 0xFF).
|
||||
match_len_loop:
|
||||
CMPQ SI, R9
|
||||
JAE err_short_buf
|
||||
|
||||
MOVBLZX (SI), BX
|
||||
INCQ SI
|
||||
ADDQ BX, CX
|
||||
|
||||
CMPB BX, $0xFF
|
||||
JE match_len_loop
|
||||
|
||||
copy_match:
|
||||
ADDQ $const_minMatch, CX
|
||||
|
||||
// check we have match_len bytes left in dst
|
||||
// di+match_len < len(dst)
|
||||
MOVQ DI, AX
|
||||
ADDQ CX, AX
|
||||
JC err_short_buf
|
||||
CMPQ AX, R8
|
||||
JA err_short_buf
|
||||
|
||||
// DX = offset
|
||||
// CX = match_len
|
||||
// BX = &dst + (di - offset)
|
||||
MOVQ DI, BX
|
||||
SUBQ DX, BX
|
||||
|
||||
// check BX is within dst
|
||||
// if BX < &dst
|
||||
JC copy_match_from_dict
|
||||
CMPQ BX, R11
|
||||
JBE copy_match_from_dict
|
||||
|
||||
// if offset + match_len < di
|
||||
LEAQ (BX)(CX*1), AX
|
||||
CMPQ DI, AX
|
||||
JA copy_interior_match
|
||||
|
||||
// AX := len(dst[:di])
|
||||
// MOVQ DI, AX
|
||||
// SUBQ R11, AX
|
||||
|
||||
// copy 16 bytes at a time
|
||||
// if di-offset < 16 copy 16-(di-offset) bytes to di
|
||||
// then do the remaining
|
||||
|
||||
copy_match_loop:
|
||||
// for match_len >= 0
|
||||
// dst[di] = dst[i]
|
||||
// di++
|
||||
// i++
|
||||
MOVB (BX), AX
|
||||
MOVB AX, (DI)
|
||||
INCQ DI
|
||||
INCQ BX
|
||||
DECQ CX
|
||||
JNZ copy_match_loop
|
||||
|
||||
JMP loopcheck
|
||||
|
||||
copy_interior_match:
|
||||
CMPQ CX, $16
|
||||
JGT memmove_match
|
||||
|
||||
// if len(dst[di:]) < 16
|
||||
MOVQ R8, AX
|
||||
SUBQ DI, AX
|
||||
CMPQ AX, $16
|
||||
JLT memmove_match
|
||||
|
||||
MOVOU (BX), X0
|
||||
MOVOU X0, (DI)
|
||||
|
||||
ADDQ CX, DI
|
||||
XORL CX, CX
|
||||
JMP loopcheck
|
||||
|
||||
copy_match_from_dict:
|
||||
// CX = match_len
|
||||
// BX = &dst + (di - offset)
|
||||
|
||||
// AX = offset - di = dict_bytes_available => count of bytes potentially covered by the dictionary
|
||||
MOVQ R11, AX
|
||||
SUBQ BX, AX
|
||||
|
||||
// BX = len(dict) - dict_bytes_available
|
||||
MOVQ R15, BX
|
||||
SUBQ AX, BX
|
||||
JS err_short_dict
|
||||
|
||||
ADDQ R14, BX
|
||||
|
||||
// if match_len > dict_bytes_available, match fits entirely within external dictionary : just copy
|
||||
CMPQ CX, AX
|
||||
JLT memmove_match
|
||||
|
||||
// The match stretches over the dictionary and our block
|
||||
// 1) copy what comes from the dictionary
|
||||
// AX = dict_bytes_available = copy_size
|
||||
// BX = &dict_end - copy_size
|
||||
// CX = match_len
|
||||
|
||||
// memmove(to, from, len)
|
||||
MOVQ DI, 0(SP)
|
||||
MOVQ BX, 8(SP)
|
||||
MOVQ AX, 16(SP)
|
||||
// store extra stuff we want to recover
|
||||
// spill
|
||||
MOVQ DI, 24(SP)
|
||||
MOVQ SI, 32(SP)
|
||||
MOVQ CX, 40(SP)
|
||||
CALL runtime·memmove(SB)
|
||||
|
||||
// restore registers
|
||||
MOVQ 16(SP), AX // copy_size
|
||||
MOVQ 24(SP), DI
|
||||
MOVQ 32(SP), SI
|
||||
MOVQ 40(SP), CX // match_len
|
||||
|
||||
// recalc initial values
|
||||
MOVQ dst_base+0(FP), R8
|
||||
MOVQ R8, R11 // TODO: make these sensible numbers
|
||||
ADDQ dst_len+8(FP), R8
|
||||
MOVQ src_base+24(FP), R9
|
||||
ADDQ src_len+32(FP), R9
|
||||
MOVQ dict_base+48(FP), R14
|
||||
MOVQ dict_len+56(FP), R15
|
||||
MOVQ R8, R12
|
||||
SUBQ $32, R12
|
||||
MOVQ R9, R13
|
||||
SUBQ $16, R13
|
||||
|
||||
// di+=copy_size
|
||||
ADDQ AX, DI
|
||||
|
||||
// 2) copy the rest from the current block
|
||||
// CX = match_len - copy_size = rest_size
|
||||
SUBQ AX, CX
|
||||
MOVQ R11, BX
|
||||
|
||||
// check if we have a copy overlap
|
||||
// AX = &dst + rest_size
|
||||
MOVQ CX, AX
|
||||
ADDQ BX, AX
|
||||
// if &dst + rest_size > di, copy byte by byte
|
||||
CMPQ AX, DI
|
||||
|
||||
JA copy_match_loop
|
||||
|
||||
memmove_match:
|
||||
// memmove(to, from, len)
|
||||
MOVQ DI, 0(SP)
|
||||
MOVQ BX, 8(SP)
|
||||
MOVQ CX, 16(SP)
|
||||
|
||||
// Spill registers. Increment DI now so we don't need to save CX.
|
||||
ADDQ CX, DI
|
||||
MOVQ DI, 24(SP)
|
||||
MOVQ SI, 32(SP)
|
||||
|
||||
CALL runtime·memmove(SB)
|
||||
|
||||
// restore registers
|
||||
MOVQ 24(SP), DI
|
||||
MOVQ 32(SP), SI
|
||||
|
||||
// recalc initial values
|
||||
MOVQ dst_base+0(FP), R8
|
||||
MOVQ R8, R11 // TODO: make these sensible numbers
|
||||
ADDQ dst_len+8(FP), R8
|
||||
MOVQ src_base+24(FP), R9
|
||||
ADDQ src_len+32(FP), R9
|
||||
MOVQ R8, R12
|
||||
SUBQ $32, R12
|
||||
MOVQ R9, R13
|
||||
SUBQ $16, R13
|
||||
MOVQ dict_base+48(FP), R14
|
||||
MOVQ dict_len+56(FP), R15
|
||||
XORL CX, CX
|
||||
|
||||
loopcheck:
|
||||
// for si < len(src)
|
||||
CMPQ SI, R9
|
||||
JB loop
|
||||
|
||||
end:
|
||||
// Remaining length must be zero.
|
||||
TESTQ CX, CX
|
||||
JNE err_corrupt
|
||||
|
||||
SUBQ R11, DI
|
||||
MOVQ DI, ret+72(FP)
|
||||
RET
|
||||
|
||||
err_corrupt:
|
||||
MOVQ $-1, ret+72(FP)
|
||||
RET
|
||||
|
||||
err_short_buf:
|
||||
MOVQ $-2, ret+72(FP)
|
||||
RET
|
||||
|
||||
err_short_dict:
|
||||
MOVQ $-3, ret+72(FP)
|
||||
RET
|
|
@ -0,0 +1,231 @@
|
|||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// Register allocation.
|
||||
#define dst R0
|
||||
#define dstorig R1
|
||||
#define src R2
|
||||
#define dstend R3
|
||||
#define srcend R4
|
||||
#define match R5 // Match address.
|
||||
#define dictend R6
|
||||
#define token R7
|
||||
#define len R8 // Literal and match lengths.
|
||||
#define offset R7 // Match offset; overlaps with token.
|
||||
#define tmp1 R9
|
||||
#define tmp2 R11
|
||||
#define tmp3 R12
|
||||
|
||||
// func decodeBlock(dst, src, dict []byte) int
|
||||
TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $-4-40
|
||||
MOVW dst_base +0(FP), dst
|
||||
MOVW dst_len +4(FP), dstend
|
||||
MOVW src_base +12(FP), src
|
||||
MOVW src_len +16(FP), srcend
|
||||
|
||||
CMP $0, srcend
|
||||
BEQ shortSrc
|
||||
|
||||
ADD dst, dstend
|
||||
ADD src, srcend
|
||||
|
||||
MOVW dst, dstorig
|
||||
|
||||
loop:
|
||||
// Read token. Extract literal length.
|
||||
MOVBU.P 1(src), token
|
||||
MOVW token >> 4, len
|
||||
CMP $15, len
|
||||
BNE readLitlenDone
|
||||
|
||||
readLitlenLoop:
|
||||
CMP src, srcend
|
||||
BEQ shortSrc
|
||||
MOVBU.P 1(src), tmp1
|
||||
ADD.S tmp1, len
|
||||
BVS shortDst
|
||||
CMP $255, tmp1
|
||||
BEQ readLitlenLoop
|
||||
|
||||
readLitlenDone:
|
||||
CMP $0, len
|
||||
BEQ copyLiteralDone
|
||||
|
||||
// Bounds check dst+len and src+len.
|
||||
ADD.S dst, len, tmp1
|
||||
ADD.CC.S src, len, tmp2
|
||||
BCS shortSrc
|
||||
CMP dstend, tmp1
|
||||
//BHI shortDst // Uncomment for distinct error codes.
|
||||
CMP.LS srcend, tmp2
|
||||
BHI shortSrc
|
||||
|
||||
// Copy literal.
|
||||
CMP $4, len
|
||||
BLO copyLiteralFinish
|
||||
|
||||
// Copy 0-3 bytes until src is aligned.
|
||||
TST $1, src
|
||||
MOVBU.NE.P 1(src), tmp1
|
||||
MOVB.NE.P tmp1, 1(dst)
|
||||
SUB.NE $1, len
|
||||
|
||||
TST $2, src
|
||||
MOVHU.NE.P 2(src), tmp2
|
||||
MOVB.NE.P tmp2, 1(dst)
|
||||
MOVW.NE tmp2 >> 8, tmp1
|
||||
MOVB.NE.P tmp1, 1(dst)
|
||||
SUB.NE $2, len
|
||||
|
||||
B copyLiteralLoopCond
|
||||
|
||||
copyLiteralLoop:
|
||||
// Aligned load, unaligned write.
|
||||
MOVW.P 4(src), tmp1
|
||||
MOVW tmp1 >> 8, tmp2
|
||||
MOVB tmp2, 1(dst)
|
||||
MOVW tmp1 >> 16, tmp3
|
||||
MOVB tmp3, 2(dst)
|
||||
MOVW tmp1 >> 24, tmp2
|
||||
MOVB tmp2, 3(dst)
|
||||
MOVB.P tmp1, 4(dst)
|
||||
copyLiteralLoopCond:
|
||||
// Loop until len-4 < 0.
|
||||
SUB.S $4, len
|
||||
BPL copyLiteralLoop
|
||||
|
||||
copyLiteralFinish:
|
||||
// Copy remaining 0-3 bytes.
|
||||
// At this point, len may be < 0, but len&3 is still accurate.
|
||||
TST $1, len
|
||||
MOVB.NE.P 1(src), tmp3
|
||||
MOVB.NE.P tmp3, 1(dst)
|
||||
TST $2, len
|
||||
MOVB.NE.P 2(src), tmp1
|
||||
MOVB.NE.P tmp1, 2(dst)
|
||||
MOVB.NE -1(src), tmp2
|
||||
MOVB.NE tmp2, -1(dst)
|
||||
|
||||
copyLiteralDone:
|
||||
// Initial part of match length.
|
||||
// This frees up the token register for reuse as offset.
|
||||
AND $15, token, len
|
||||
|
||||
CMP src, srcend
|
||||
BEQ end
|
||||
|
||||
// Read offset.
|
||||
ADD.S $2, src
|
||||
BCS shortSrc
|
||||
CMP srcend, src
|
||||
BHI shortSrc
|
||||
MOVBU -2(src), offset
|
||||
MOVBU -1(src), tmp1
|
||||
ORR.S tmp1 << 8, offset
|
||||
BEQ corrupt
|
||||
|
||||
// Read rest of match length.
|
||||
CMP $15, len
|
||||
BNE readMatchlenDone
|
||||
|
||||
readMatchlenLoop:
|
||||
CMP src, srcend
|
||||
BEQ shortSrc
|
||||
MOVBU.P 1(src), tmp1
|
||||
ADD.S tmp1, len
|
||||
BVS shortDst
|
||||
CMP $255, tmp1
|
||||
BEQ readMatchlenLoop
|
||||
|
||||
readMatchlenDone:
|
||||
// Bounds check dst+len+minMatch.
|
||||
ADD.S dst, len, tmp1
|
||||
ADD.CC.S $const_minMatch, tmp1
|
||||
BCS shortDst
|
||||
CMP dstend, tmp1
|
||||
BHI shortDst
|
||||
|
||||
RSB dst, offset, match
|
||||
CMP dstorig, match
|
||||
BGE copyMatch4
|
||||
|
||||
// match < dstorig means the match starts in the dictionary,
|
||||
// at len(dict) - offset + (dst - dstorig).
|
||||
MOVW dict_base+24(FP), match
|
||||
MOVW dict_len +28(FP), dictend
|
||||
|
||||
ADD $const_minMatch, len
|
||||
|
||||
RSB dst, dstorig, tmp1
|
||||
RSB dictend, offset, tmp2
|
||||
ADD.S tmp2, tmp1
|
||||
BMI shortDict
|
||||
ADD match, dictend
|
||||
ADD tmp1, match
|
||||
|
||||
copyDict:
|
||||
MOVBU.P 1(match), tmp1
|
||||
MOVB.P tmp1, 1(dst)
|
||||
SUB.S $1, len
|
||||
CMP.NE match, dictend
|
||||
BNE copyDict
|
||||
|
||||
// If the match extends beyond the dictionary, the rest is at dstorig.
|
||||
CMP $0, len
|
||||
BEQ copyMatchDone
|
||||
MOVW dstorig, match
|
||||
B copyMatch
|
||||
|
||||
// Copy a regular match.
|
||||
// Since len+minMatch is at least four, we can do a 4× unrolled
|
||||
// byte copy loop. Using MOVW instead of four byte loads is faster,
|
||||
// but to remain portable we'd have to align match first, which is
|
||||
// too expensive. By alternating loads and stores, we also handle
|
||||
// the case offset < 4.
|
||||
copyMatch4:
|
||||
SUB.S $4, len
|
||||
MOVBU.P 4(match), tmp1
|
||||
MOVB.P tmp1, 4(dst)
|
||||
MOVBU -3(match), tmp2
|
||||
MOVB tmp2, -3(dst)
|
||||
MOVBU -2(match), tmp3
|
||||
MOVB tmp3, -2(dst)
|
||||
MOVBU -1(match), tmp1
|
||||
MOVB tmp1, -1(dst)
|
||||
BPL copyMatch4
|
||||
|
||||
// Restore len, which is now negative.
|
||||
ADD.S $4, len
|
||||
BEQ copyMatchDone
|
||||
|
||||
copyMatch:
|
||||
// Finish with a byte-at-a-time copy.
|
||||
SUB.S $1, len
|
||||
MOVBU.P 1(match), tmp2
|
||||
MOVB.P tmp2, 1(dst)
|
||||
BNE copyMatch
|
||||
|
||||
copyMatchDone:
|
||||
CMP src, srcend
|
||||
BNE loop
|
||||
|
||||
end:
|
||||
CMP $0, len
|
||||
BNE corrupt
|
||||
SUB dstorig, dst, tmp1
|
||||
MOVW tmp1, ret+36(FP)
|
||||
RET
|
||||
|
||||
// The error cases have distinct labels so we can put different
|
||||
// return codes here when debugging, or if the error returns need to
|
||||
// be changed.
|
||||
shortDict:
|
||||
shortDst:
|
||||
shortSrc:
|
||||
corrupt:
|
||||
MOVW $-1, tmp1
|
||||
MOVW tmp1, ret+36(FP)
|
||||
RET
|
241
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm64.s
generated
vendored
Normal file
241
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm64.s
generated
vendored
Normal file
|
@ -0,0 +1,241 @@
|
|||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
// This implementation assumes that strict alignment checking is turned off.
|
||||
// The Go compiler makes the same assumption.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// Register allocation.
|
||||
#define dst R0
|
||||
#define dstorig R1
|
||||
#define src R2
|
||||
#define dstend R3
|
||||
#define dstend16 R4 // dstend - 16
|
||||
#define srcend R5
|
||||
#define srcend16 R6 // srcend - 16
|
||||
#define match R7 // Match address.
|
||||
#define dict R8
|
||||
#define dictlen R9
|
||||
#define dictend R10
|
||||
#define token R11
|
||||
#define len R12 // Literal and match lengths.
|
||||
#define lenRem R13
|
||||
#define offset R14 // Match offset.
|
||||
#define tmp1 R15
|
||||
#define tmp2 R16
|
||||
#define tmp3 R17
|
||||
#define tmp4 R19
|
||||
|
||||
// func decodeBlock(dst, src, dict []byte) int
|
||||
TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $0-80
|
||||
LDP dst_base+0(FP), (dst, dstend)
|
||||
ADD dst, dstend
|
||||
MOVD dst, dstorig
|
||||
|
||||
LDP src_base+24(FP), (src, srcend)
|
||||
CBZ srcend, shortSrc
|
||||
ADD src, srcend
|
||||
|
||||
// dstend16 = max(dstend-16, 0) and similarly for srcend16.
|
||||
SUBS $16, dstend, dstend16
|
||||
CSEL LO, ZR, dstend16, dstend16
|
||||
SUBS $16, srcend, srcend16
|
||||
CSEL LO, ZR, srcend16, srcend16
|
||||
|
||||
LDP dict_base+48(FP), (dict, dictlen)
|
||||
ADD dict, dictlen, dictend
|
||||
|
||||
loop:
|
||||
// Read token. Extract literal length.
|
||||
MOVBU.P 1(src), token
|
||||
LSR $4, token, len
|
||||
CMP $15, len
|
||||
BNE readLitlenDone
|
||||
|
||||
readLitlenLoop:
|
||||
CMP src, srcend
|
||||
BEQ shortSrc
|
||||
MOVBU.P 1(src), tmp1
|
||||
ADDS tmp1, len
|
||||
BVS shortDst
|
||||
CMP $255, tmp1
|
||||
BEQ readLitlenLoop
|
||||
|
||||
readLitlenDone:
|
||||
CBZ len, copyLiteralDone
|
||||
|
||||
// Bounds check dst+len and src+len.
|
||||
ADDS dst, len, tmp1
|
||||
BCS shortSrc
|
||||
ADDS src, len, tmp2
|
||||
BCS shortSrc
|
||||
CMP dstend, tmp1
|
||||
BHI shortDst
|
||||
CMP srcend, tmp2
|
||||
BHI shortSrc
|
||||
|
||||
// Copy literal.
|
||||
SUBS $16, len
|
||||
BLO copyLiteralShort
|
||||
|
||||
copyLiteralLoop:
|
||||
LDP.P 16(src), (tmp1, tmp2)
|
||||
STP.P (tmp1, tmp2), 16(dst)
|
||||
SUBS $16, len
|
||||
BPL copyLiteralLoop
|
||||
|
||||
// Copy (final part of) literal of length 0-15.
|
||||
// If we have >=16 bytes left in src and dst, just copy 16 bytes.
|
||||
copyLiteralShort:
|
||||
CMP dstend16, dst
|
||||
CCMP LO, src, srcend16, $0b0010 // 0010 = preserve carry (LO).
|
||||
BHS copyLiteralShortEnd
|
||||
|
||||
AND $15, len
|
||||
|
||||
LDP (src), (tmp1, tmp2)
|
||||
ADD len, src
|
||||
STP (tmp1, tmp2), (dst)
|
||||
ADD len, dst
|
||||
|
||||
B copyLiteralDone
|
||||
|
||||
// Safe but slow copy near the end of src, dst.
|
||||
copyLiteralShortEnd:
|
||||
TBZ $3, len, 3(PC)
|
||||
MOVD.P 8(src), tmp1
|
||||
MOVD.P tmp1, 8(dst)
|
||||
TBZ $2, len, 3(PC)
|
||||
MOVW.P 4(src), tmp2
|
||||
MOVW.P tmp2, 4(dst)
|
||||
TBZ $1, len, 3(PC)
|
||||
MOVH.P 2(src), tmp3
|
||||
MOVH.P tmp3, 2(dst)
|
||||
TBZ $0, len, 3(PC)
|
||||
MOVBU.P 1(src), tmp4
|
||||
MOVB.P tmp4, 1(dst)
|
||||
|
||||
copyLiteralDone:
|
||||
// Initial part of match length.
|
||||
AND $15, token, len
|
||||
|
||||
CMP src, srcend
|
||||
BEQ end
|
||||
|
||||
// Read offset.
|
||||
ADDS $2, src
|
||||
BCS shortSrc
|
||||
CMP srcend, src
|
||||
BHI shortSrc
|
||||
MOVHU -2(src), offset
|
||||
CBZ offset, corrupt
|
||||
|
||||
// Read rest of match length.
|
||||
CMP $15, len
|
||||
BNE readMatchlenDone
|
||||
|
||||
readMatchlenLoop:
|
||||
CMP src, srcend
|
||||
BEQ shortSrc
|
||||
MOVBU.P 1(src), tmp1
|
||||
ADDS tmp1, len
|
||||
BVS shortDst
|
||||
CMP $255, tmp1
|
||||
BEQ readMatchlenLoop
|
||||
|
||||
readMatchlenDone:
|
||||
ADD $const_minMatch, len
|
||||
|
||||
// Bounds check dst+len.
|
||||
ADDS dst, len, tmp2
|
||||
BCS shortDst
|
||||
CMP dstend, tmp2
|
||||
BHI shortDst
|
||||
|
||||
SUB offset, dst, match
|
||||
CMP dstorig, match
|
||||
BHS copyMatchTry8
|
||||
|
||||
// match < dstorig means the match starts in the dictionary,
|
||||
// at len(dict) - offset + (dst - dstorig).
|
||||
SUB dstorig, dst, tmp1
|
||||
SUB offset, dictlen, tmp2
|
||||
ADDS tmp2, tmp1
|
||||
BMI shortDict
|
||||
ADD dict, tmp1, match
|
||||
|
||||
copyDict:
|
||||
MOVBU.P 1(match), tmp3
|
||||
MOVB.P tmp3, 1(dst)
|
||||
SUBS $1, len
|
||||
CCMP NE, dictend, match, $0b0100 // 0100 sets the Z (EQ) flag.
|
||||
BNE copyDict
|
||||
|
||||
CBZ len, copyMatchDone
|
||||
|
||||
// If the match extends beyond the dictionary, the rest is at dstorig.
|
||||
// Recompute the offset for the next check.
|
||||
MOVD dstorig, match
|
||||
SUB dstorig, dst, offset
|
||||
|
||||
copyMatchTry8:
|
||||
// Copy doublewords if both len and offset are at least eight.
|
||||
// A 16-at-a-time loop doesn't provide a further speedup.
|
||||
CMP $8, len
|
||||
CCMP HS, offset, $8, $0
|
||||
BLO copyMatchTry4
|
||||
|
||||
AND $7, len, lenRem
|
||||
SUB $8, len
|
||||
copyMatchLoop8:
|
||||
MOVD.P 8(match), tmp1
|
||||
MOVD.P tmp1, 8(dst)
|
||||
SUBS $8, len
|
||||
BPL copyMatchLoop8
|
||||
|
||||
MOVD (match)(len), tmp2 // match+len == match+lenRem-8.
|
||||
ADD lenRem, dst
|
||||
MOVD $0, len
|
||||
MOVD tmp2, -8(dst)
|
||||
B copyMatchDone
|
||||
|
||||
copyMatchTry4:
|
||||
// Copy words if both len and offset are at least four.
|
||||
CMP $4, len
|
||||
CCMP HS, offset, $4, $0
|
||||
BLO copyMatchLoop1
|
||||
|
||||
MOVWU.P 4(match), tmp2
|
||||
MOVWU.P tmp2, 4(dst)
|
||||
SUBS $4, len
|
||||
BEQ copyMatchDone
|
||||
|
||||
copyMatchLoop1:
|
||||
// Byte-at-a-time copy for small offsets <= 3.
|
||||
MOVBU.P 1(match), tmp2
|
||||
MOVB.P tmp2, 1(dst)
|
||||
SUBS $1, len
|
||||
BNE copyMatchLoop1
|
||||
|
||||
copyMatchDone:
|
||||
CMP src, srcend
|
||||
BNE loop
|
||||
|
||||
end:
|
||||
CBNZ len, corrupt
|
||||
SUB dstorig, dst, tmp1
|
||||
MOVD tmp1, ret+72(FP)
|
||||
RET
|
||||
|
||||
// The error cases have distinct labels so we can put different
|
||||
// return codes here when debugging, or if the error returns need to
|
||||
// be changed.
|
||||
shortDict:
|
||||
shortDst:
|
||||
shortSrc:
|
||||
corrupt:
|
||||
MOVD $-1, tmp1
|
||||
MOVD tmp1, ret+72(FP)
|
||||
RET
|
|
@ -0,0 +1,10 @@
|
|||
//go:build (amd64 || arm || arm64) && !appengine && gc && !noasm
|
||||
// +build amd64 arm arm64
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
package lz4block
|
||||
|
||||
//go:noescape
|
||||
func decodeBlock(dst, src, dict []byte) int
|
139
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_other.go
generated
vendored
Normal file
139
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_other.go
generated
vendored
Normal file
|
@ -0,0 +1,139 @@
|
|||
//go:build (!amd64 && !arm && !arm64) || appengine || !gc || noasm
|
||||
// +build !amd64,!arm,!arm64 appengine !gc noasm
|
||||
|
||||
package lz4block
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
func decodeBlock(dst, src, dict []byte) (ret int) {
|
||||
// Restrict capacities so we don't read or write out of bounds.
|
||||
dst = dst[:len(dst):len(dst)]
|
||||
src = src[:len(src):len(src)]
|
||||
|
||||
const hasError = -2
|
||||
|
||||
if len(src) == 0 {
|
||||
return hasError
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
ret = hasError
|
||||
}
|
||||
}()
|
||||
|
||||
var si, di uint
|
||||
for si < uint(len(src)) {
|
||||
// Literals and match lengths (token).
|
||||
b := uint(src[si])
|
||||
si++
|
||||
|
||||
// Literals.
|
||||
if lLen := b >> 4; lLen > 0 {
|
||||
switch {
|
||||
case lLen < 0xF && si+16 < uint(len(src)):
|
||||
// Shortcut 1
|
||||
// if we have enough room in src and dst, and the literals length
|
||||
// is small enough (0..14) then copy all 16 bytes, even if not all
|
||||
// are part of the literals.
|
||||
copy(dst[di:], src[si:si+16])
|
||||
si += lLen
|
||||
di += lLen
|
||||
if mLen := b & 0xF; mLen < 0xF {
|
||||
// Shortcut 2
|
||||
// if the match length (4..18) fits within the literals, then copy
|
||||
// all 18 bytes, even if not all are part of the literals.
|
||||
mLen += 4
|
||||
if offset := u16(src[si:]); mLen <= offset && offset < di {
|
||||
i := di - offset
|
||||
// The remaining buffer may not hold 18 bytes.
|
||||
// See https://github.com/pierrec/lz4/issues/51.
|
||||
if end := i + 18; end <= uint(len(dst)) {
|
||||
copy(dst[di:], dst[i:end])
|
||||
si += 2
|
||||
di += mLen
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
case lLen == 0xF:
|
||||
for {
|
||||
x := uint(src[si])
|
||||
if lLen += x; int(lLen) < 0 {
|
||||
return hasError
|
||||
}
|
||||
si++
|
||||
if x != 0xFF {
|
||||
break
|
||||
}
|
||||
}
|
||||
fallthrough
|
||||
default:
|
||||
copy(dst[di:di+lLen], src[si:si+lLen])
|
||||
si += lLen
|
||||
di += lLen
|
||||
}
|
||||
}
|
||||
|
||||
mLen := b & 0xF
|
||||
if si == uint(len(src)) && mLen == 0 {
|
||||
break
|
||||
} else if si >= uint(len(src)) {
|
||||
return hasError
|
||||
}
|
||||
|
||||
offset := u16(src[si:])
|
||||
if offset == 0 {
|
||||
return hasError
|
||||
}
|
||||
si += 2
|
||||
|
||||
// Match.
|
||||
mLen += minMatch
|
||||
if mLen == minMatch+0xF {
|
||||
for {
|
||||
x := uint(src[si])
|
||||
if mLen += x; int(mLen) < 0 {
|
||||
return hasError
|
||||
}
|
||||
si++
|
||||
if x != 0xFF {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the match.
|
||||
if di < offset {
|
||||
// The match is beyond our block, meaning the first part
|
||||
// is in the dictionary.
|
||||
fromDict := dict[uint(len(dict))+di-offset:]
|
||||
n := uint(copy(dst[di:di+mLen], fromDict))
|
||||
di += n
|
||||
if mLen -= n; mLen == 0 {
|
||||
continue
|
||||
}
|
||||
// We copied n = offset-di bytes from the dictionary,
|
||||
// then set di = di+n = offset, so the following code
|
||||
// copies from dst[di-offset:] = dst[0:].
|
||||
}
|
||||
|
||||
expanded := dst[di-offset:]
|
||||
if mLen > offset {
|
||||
// Efficiently copy the match dst[di-offset:di] into the dst slice.
|
||||
bytesToCopy := offset * (mLen / offset)
|
||||
for n := offset; n <= bytesToCopy+offset; n *= 2 {
|
||||
copy(expanded[n:], expanded[:n])
|
||||
}
|
||||
di += bytesToCopy
|
||||
mLen -= bytesToCopy
|
||||
}
|
||||
di += uint(copy(dst[di:di+mLen], expanded[:mLen]))
|
||||
}
|
||||
|
||||
return int(di)
|
||||
}
|
||||
|
||||
func u16(p []byte) uint { return uint(binary.LittleEndian.Uint16(p)) }
|
|
@ -0,0 +1,19 @@
|
|||
package lz4errors
|
||||
|
||||
type Error string
|
||||
|
||||
func (e Error) Error() string { return string(e) }
|
||||
|
||||
const (
|
||||
ErrInvalidSourceShortBuffer Error = "lz4: invalid source or destination buffer too short"
|
||||
ErrInvalidFrame Error = "lz4: bad magic number"
|
||||
ErrInternalUnhandledState Error = "lz4: unhandled state"
|
||||
ErrInvalidHeaderChecksum Error = "lz4: invalid header checksum"
|
||||
ErrInvalidBlockChecksum Error = "lz4: invalid block checksum"
|
||||
ErrInvalidFrameChecksum Error = "lz4: invalid frame checksum"
|
||||
ErrOptionInvalidCompressionLevel Error = "lz4: invalid compression level"
|
||||
ErrOptionClosedOrError Error = "lz4: cannot apply options on closed or in error object"
|
||||
ErrOptionInvalidBlockSize Error = "lz4: invalid block size"
|
||||
ErrOptionNotApplicable Error = "lz4: option not applicable"
|
||||
ErrWriterNotClosed Error = "lz4: writer not closed"
|
||||
)
|
|
@ -0,0 +1,350 @@
|
|||
package lz4stream
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
"github.com/pierrec/lz4/v4/internal/lz4block"
|
||||
"github.com/pierrec/lz4/v4/internal/lz4errors"
|
||||
"github.com/pierrec/lz4/v4/internal/xxh32"
|
||||
)
|
||||
|
||||
type Blocks struct {
|
||||
Block *FrameDataBlock
|
||||
Blocks chan chan *FrameDataBlock
|
||||
mu sync.Mutex
|
||||
err error
|
||||
}
|
||||
|
||||
func (b *Blocks) initW(f *Frame, dst io.Writer, num int) {
|
||||
if num == 1 {
|
||||
b.Blocks = nil
|
||||
b.Block = NewFrameDataBlock(f)
|
||||
return
|
||||
}
|
||||
b.Block = nil
|
||||
if cap(b.Blocks) != num {
|
||||
b.Blocks = make(chan chan *FrameDataBlock, num)
|
||||
}
|
||||
// goroutine managing concurrent block compression goroutines.
|
||||
go func() {
|
||||
// Process next block compression item.
|
||||
for c := range b.Blocks {
|
||||
// Read the next compressed block result.
|
||||
// Waiting here ensures that the blocks are output in the order they were sent.
|
||||
// The incoming channel is always closed as it indicates to the caller that
|
||||
// the block has been processed.
|
||||
block := <-c
|
||||
if block == nil {
|
||||
// Notify the block compression routine that we are done with its result.
|
||||
// This is used when a sentinel block is sent to terminate the compression.
|
||||
close(c)
|
||||
return
|
||||
}
|
||||
// Do not attempt to write the block upon any previous failure.
|
||||
if b.err == nil {
|
||||
// Write the block.
|
||||
if err := block.Write(f, dst); err != nil {
|
||||
// Keep the first error.
|
||||
b.err = err
|
||||
// All pending compression goroutines need to shut down, so we need to keep going.
|
||||
}
|
||||
}
|
||||
close(c)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (b *Blocks) close(f *Frame, num int) error {
|
||||
if num == 1 {
|
||||
if b.Block != nil {
|
||||
b.Block.Close(f)
|
||||
}
|
||||
err := b.err
|
||||
b.err = nil
|
||||
return err
|
||||
}
|
||||
if b.Blocks == nil {
|
||||
err := b.err
|
||||
b.err = nil
|
||||
return err
|
||||
}
|
||||
c := make(chan *FrameDataBlock)
|
||||
b.Blocks <- c
|
||||
c <- nil
|
||||
<-c
|
||||
err := b.err
|
||||
b.err = nil
|
||||
return err
|
||||
}
|
||||
|
||||
// ErrorR returns any error set while uncompressing a stream.
|
||||
func (b *Blocks) ErrorR() error {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
return b.err
|
||||
}
|
||||
|
||||
// initR returns a channel that streams the uncompressed blocks if in concurrent
|
||||
// mode and no error. When the channel is closed, check for any error with b.ErrorR.
|
||||
//
|
||||
// If not in concurrent mode, the uncompressed block is b.Block and the returned error
|
||||
// needs to be checked.
|
||||
func (b *Blocks) initR(f *Frame, num int, src io.Reader) (chan []byte, error) {
|
||||
size := f.Descriptor.Flags.BlockSizeIndex()
|
||||
if num == 1 {
|
||||
b.Blocks = nil
|
||||
b.Block = NewFrameDataBlock(f)
|
||||
return nil, nil
|
||||
}
|
||||
b.Block = nil
|
||||
blocks := make(chan chan []byte, num)
|
||||
// data receives the uncompressed blocks.
|
||||
data := make(chan []byte)
|
||||
// Read blocks from the source sequentially
|
||||
// and uncompress them concurrently.
|
||||
|
||||
// In legacy mode, accrue the uncompress sizes in cum.
|
||||
var cum uint32
|
||||
go func() {
|
||||
var cumx uint32
|
||||
var err error
|
||||
for b.ErrorR() == nil {
|
||||
block := NewFrameDataBlock(f)
|
||||
cumx, err = block.Read(f, src, 0)
|
||||
if err != nil {
|
||||
block.Close(f)
|
||||
break
|
||||
}
|
||||
// Recheck for an error as reading may be slow and uncompressing is expensive.
|
||||
if b.ErrorR() != nil {
|
||||
block.Close(f)
|
||||
break
|
||||
}
|
||||
c := make(chan []byte)
|
||||
blocks <- c
|
||||
go func() {
|
||||
defer block.Close(f)
|
||||
data, err := block.Uncompress(f, size.Get(), nil, false)
|
||||
if err != nil {
|
||||
b.closeR(err)
|
||||
// Close the block channel to indicate an error.
|
||||
close(c)
|
||||
} else {
|
||||
c <- data
|
||||
}
|
||||
}()
|
||||
}
|
||||
// End the collection loop and the data channel.
|
||||
c := make(chan []byte)
|
||||
blocks <- c
|
||||
c <- nil // signal the collection loop that we are done
|
||||
<-c // wait for the collect loop to complete
|
||||
if f.isLegacy() && cum == cumx {
|
||||
err = io.EOF
|
||||
}
|
||||
b.closeR(err)
|
||||
close(data)
|
||||
}()
|
||||
// Collect the uncompressed blocks and make them available
|
||||
// on the returned channel.
|
||||
go func(leg bool) {
|
||||
defer close(blocks)
|
||||
skipBlocks := false
|
||||
for c := range blocks {
|
||||
buf, ok := <-c
|
||||
if !ok {
|
||||
// A closed channel indicates an error.
|
||||
// All remaining channels should be discarded.
|
||||
skipBlocks = true
|
||||
continue
|
||||
}
|
||||
if buf == nil {
|
||||
// Signal to end the loop.
|
||||
close(c)
|
||||
return
|
||||
}
|
||||
if skipBlocks {
|
||||
// A previous error has occurred, skipping remaining channels.
|
||||
continue
|
||||
}
|
||||
// Perform checksum now as the blocks are received in order.
|
||||
if f.Descriptor.Flags.ContentChecksum() {
|
||||
_, _ = f.checksum.Write(buf)
|
||||
}
|
||||
if leg {
|
||||
cum += uint32(len(buf))
|
||||
}
|
||||
data <- buf
|
||||
close(c)
|
||||
}
|
||||
}(f.isLegacy())
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// closeR safely sets the error on b if not already set.
|
||||
func (b *Blocks) closeR(err error) {
|
||||
b.mu.Lock()
|
||||
if b.err == nil {
|
||||
b.err = err
|
||||
}
|
||||
b.mu.Unlock()
|
||||
}
|
||||
|
||||
func NewFrameDataBlock(f *Frame) *FrameDataBlock {
|
||||
buf := f.Descriptor.Flags.BlockSizeIndex().Get()
|
||||
return &FrameDataBlock{Data: buf, data: buf}
|
||||
}
|
||||
|
||||
type FrameDataBlock struct {
|
||||
Size DataBlockSize
|
||||
Data []byte // compressed or uncompressed data (.data or .src)
|
||||
Checksum uint32
|
||||
data []byte // buffer for compressed data
|
||||
src []byte // uncompressed data
|
||||
err error // used in concurrent mode
|
||||
}
|
||||
|
||||
func (b *FrameDataBlock) Close(f *Frame) {
|
||||
b.Size = 0
|
||||
b.Checksum = 0
|
||||
b.err = nil
|
||||
if b.data != nil {
|
||||
// Block was not already closed.
|
||||
lz4block.Put(b.data)
|
||||
b.Data = nil
|
||||
b.data = nil
|
||||
b.src = nil
|
||||
}
|
||||
}
|
||||
|
||||
// Block compression errors are ignored since the buffer is sized appropriately.
|
||||
func (b *FrameDataBlock) Compress(f *Frame, src []byte, level lz4block.CompressionLevel) *FrameDataBlock {
|
||||
data := b.data
|
||||
if f.isLegacy() {
|
||||
// In legacy mode, the buffer is sized according to CompressBlockBound,
|
||||
// but only 8Mb is buffered for compression.
|
||||
src = src[:8<<20]
|
||||
} else {
|
||||
data = data[:len(src)] // trigger the incompressible flag in CompressBlock
|
||||
}
|
||||
var n int
|
||||
switch level {
|
||||
case lz4block.Fast:
|
||||
n, _ = lz4block.CompressBlock(src, data)
|
||||
default:
|
||||
n, _ = lz4block.CompressBlockHC(src, data, level)
|
||||
}
|
||||
if n == 0 {
|
||||
b.Size.UncompressedSet(true)
|
||||
b.Data = src
|
||||
} else {
|
||||
b.Size.UncompressedSet(false)
|
||||
b.Data = data[:n]
|
||||
}
|
||||
b.Size.sizeSet(len(b.Data))
|
||||
b.src = src // keep track of the source for content checksum
|
||||
|
||||
if f.Descriptor.Flags.BlockChecksum() {
|
||||
b.Checksum = xxh32.ChecksumZero(src)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *FrameDataBlock) Write(f *Frame, dst io.Writer) error {
|
||||
// Write is called in the same order as blocks are compressed,
|
||||
// so content checksum must be done here.
|
||||
if f.Descriptor.Flags.ContentChecksum() {
|
||||
_, _ = f.checksum.Write(b.src)
|
||||
}
|
||||
buf := f.buf[:]
|
||||
binary.LittleEndian.PutUint32(buf, uint32(b.Size))
|
||||
if _, err := dst.Write(buf[:4]); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := dst.Write(b.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if b.Checksum == 0 {
|
||||
return nil
|
||||
}
|
||||
binary.LittleEndian.PutUint32(buf, b.Checksum)
|
||||
_, err := dst.Write(buf[:4])
|
||||
return err
|
||||
}
|
||||
|
||||
// Read updates b with the next block data, size and checksum if available.
|
||||
func (b *FrameDataBlock) Read(f *Frame, src io.Reader, cum uint32) (uint32, error) {
|
||||
x, err := f.readUint32(src)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if f.isLegacy() {
|
||||
switch x {
|
||||
case frameMagicLegacy:
|
||||
// Concatenated legacy frame.
|
||||
return b.Read(f, src, cum)
|
||||
case cum:
|
||||
// Only works in non concurrent mode, for concurrent mode
|
||||
// it is handled separately.
|
||||
// Linux kernel format appends the total uncompressed size at the end.
|
||||
return 0, io.EOF
|
||||
}
|
||||
} else if x == 0 {
|
||||
// Marker for end of stream.
|
||||
return 0, io.EOF
|
||||
}
|
||||
b.Size = DataBlockSize(x)
|
||||
|
||||
size := b.Size.size()
|
||||
if size > cap(b.data) {
|
||||
return x, lz4errors.ErrOptionInvalidBlockSize
|
||||
}
|
||||
b.data = b.data[:size]
|
||||
if _, err := io.ReadFull(src, b.data); err != nil {
|
||||
return x, err
|
||||
}
|
||||
if f.Descriptor.Flags.BlockChecksum() {
|
||||
sum, err := f.readUint32(src)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
b.Checksum = sum
|
||||
}
|
||||
return x, nil
|
||||
}
|
||||
|
||||
func (b *FrameDataBlock) Uncompress(f *Frame, dst, dict []byte, sum bool) ([]byte, error) {
|
||||
if b.Size.Uncompressed() {
|
||||
n := copy(dst, b.data)
|
||||
dst = dst[:n]
|
||||
} else {
|
||||
n, err := lz4block.UncompressBlock(b.data, dst, dict)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dst = dst[:n]
|
||||
}
|
||||
if f.Descriptor.Flags.BlockChecksum() {
|
||||
if c := xxh32.ChecksumZero(dst); c != b.Checksum {
|
||||
err := fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidBlockChecksum, c, b.Checksum)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if sum && f.Descriptor.Flags.ContentChecksum() {
|
||||
_, _ = f.checksum.Write(dst)
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
func (f *Frame) readUint32(r io.Reader) (x uint32, err error) {
|
||||
if _, err = io.ReadFull(r, f.buf[:4]); err != nil {
|
||||
return
|
||||
}
|
||||
x = binary.LittleEndian.Uint32(f.buf[:4])
|
||||
return
|
||||
}
|
|
@ -0,0 +1,204 @@
|
|||
// Package lz4stream provides the types that support reading and writing LZ4 data streams.
|
||||
package lz4stream
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
||||
"github.com/pierrec/lz4/v4/internal/lz4block"
|
||||
"github.com/pierrec/lz4/v4/internal/lz4errors"
|
||||
"github.com/pierrec/lz4/v4/internal/xxh32"
|
||||
)
|
||||
|
||||
//go:generate go run gen.go
|
||||
|
||||
const (
|
||||
frameMagic uint32 = 0x184D2204
|
||||
frameSkipMagic uint32 = 0x184D2A50
|
||||
frameMagicLegacy uint32 = 0x184C2102
|
||||
)
|
||||
|
||||
func NewFrame() *Frame {
|
||||
return &Frame{}
|
||||
}
|
||||
|
||||
type Frame struct {
|
||||
buf [15]byte // frame descriptor needs at most 4(magic)+4+8+1=11 bytes
|
||||
Magic uint32
|
||||
Descriptor FrameDescriptor
|
||||
Blocks Blocks
|
||||
Checksum uint32
|
||||
checksum xxh32.XXHZero
|
||||
}
|
||||
|
||||
// Reset allows reusing the Frame.
|
||||
// The Descriptor configuration is not modified.
|
||||
func (f *Frame) Reset(num int) {
|
||||
f.Magic = 0
|
||||
f.Descriptor.Checksum = 0
|
||||
f.Descriptor.ContentSize = 0
|
||||
_ = f.Blocks.close(f, num)
|
||||
f.Checksum = 0
|
||||
}
|
||||
|
||||
func (f *Frame) InitW(dst io.Writer, num int, legacy bool) {
|
||||
if legacy {
|
||||
f.Magic = frameMagicLegacy
|
||||
idx := lz4block.Index(lz4block.Block8Mb)
|
||||
f.Descriptor.Flags.BlockSizeIndexSet(idx)
|
||||
} else {
|
||||
f.Magic = frameMagic
|
||||
f.Descriptor.initW()
|
||||
}
|
||||
f.Blocks.initW(f, dst, num)
|
||||
f.checksum.Reset()
|
||||
}
|
||||
|
||||
func (f *Frame) CloseW(dst io.Writer, num int) error {
|
||||
if err := f.Blocks.close(f, num); err != nil {
|
||||
return err
|
||||
}
|
||||
if f.isLegacy() {
|
||||
return nil
|
||||
}
|
||||
buf := f.buf[:0]
|
||||
// End mark (data block size of uint32(0)).
|
||||
buf = append(buf, 0, 0, 0, 0)
|
||||
if f.Descriptor.Flags.ContentChecksum() {
|
||||
buf = f.checksum.Sum(buf)
|
||||
}
|
||||
_, err := dst.Write(buf)
|
||||
return err
|
||||
}
|
||||
|
||||
func (f *Frame) isLegacy() bool {
|
||||
return f.Magic == frameMagicLegacy
|
||||
}
|
||||
|
||||
func (f *Frame) ParseHeaders(src io.Reader) error {
|
||||
if f.Magic > 0 {
|
||||
// Header already read.
|
||||
return nil
|
||||
}
|
||||
|
||||
newFrame:
|
||||
var err error
|
||||
if f.Magic, err = f.readUint32(src); err != nil {
|
||||
return err
|
||||
}
|
||||
switch m := f.Magic; {
|
||||
case m == frameMagic || m == frameMagicLegacy:
|
||||
// All 16 values of frameSkipMagic are valid.
|
||||
case m>>8 == frameSkipMagic>>8:
|
||||
skip, err := f.readUint32(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := io.CopyN(ioutil.Discard, src, int64(skip)); err != nil {
|
||||
return err
|
||||
}
|
||||
goto newFrame
|
||||
default:
|
||||
return lz4errors.ErrInvalidFrame
|
||||
}
|
||||
if err := f.Descriptor.initR(f, src); err != nil {
|
||||
return err
|
||||
}
|
||||
f.checksum.Reset()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *Frame) InitR(src io.Reader, num int) (chan []byte, error) {
|
||||
return f.Blocks.initR(f, num, src)
|
||||
}
|
||||
|
||||
func (f *Frame) CloseR(src io.Reader) (err error) {
|
||||
if f.isLegacy() {
|
||||
return nil
|
||||
}
|
||||
if !f.Descriptor.Flags.ContentChecksum() {
|
||||
return nil
|
||||
}
|
||||
if f.Checksum, err = f.readUint32(src); err != nil {
|
||||
return err
|
||||
}
|
||||
if c := f.checksum.Sum32(); c != f.Checksum {
|
||||
return fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidFrameChecksum, c, f.Checksum)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type FrameDescriptor struct {
|
||||
Flags DescriptorFlags
|
||||
ContentSize uint64
|
||||
Checksum uint8
|
||||
}
|
||||
|
||||
func (fd *FrameDescriptor) initW() {
|
||||
fd.Flags.VersionSet(1)
|
||||
fd.Flags.BlockIndependenceSet(true)
|
||||
}
|
||||
|
||||
func (fd *FrameDescriptor) Write(f *Frame, dst io.Writer) error {
|
||||
if fd.Checksum > 0 {
|
||||
// Header already written.
|
||||
return nil
|
||||
}
|
||||
|
||||
buf := f.buf[:4]
|
||||
// Write the magic number here even though it belongs to the Frame.
|
||||
binary.LittleEndian.PutUint32(buf, f.Magic)
|
||||
if !f.isLegacy() {
|
||||
buf = buf[:4+2]
|
||||
binary.LittleEndian.PutUint16(buf[4:], uint16(fd.Flags))
|
||||
|
||||
if fd.Flags.Size() {
|
||||
buf = buf[:4+2+8]
|
||||
binary.LittleEndian.PutUint64(buf[4+2:], fd.ContentSize)
|
||||
}
|
||||
fd.Checksum = descriptorChecksum(buf[4:])
|
||||
buf = append(buf, fd.Checksum)
|
||||
}
|
||||
|
||||
_, err := dst.Write(buf)
|
||||
return err
|
||||
}
|
||||
|
||||
func (fd *FrameDescriptor) initR(f *Frame, src io.Reader) error {
|
||||
if f.isLegacy() {
|
||||
idx := lz4block.Index(lz4block.Block8Mb)
|
||||
f.Descriptor.Flags.BlockSizeIndexSet(idx)
|
||||
return nil
|
||||
}
|
||||
// Read the flags and the checksum, hoping that there is not content size.
|
||||
buf := f.buf[:3]
|
||||
if _, err := io.ReadFull(src, buf); err != nil {
|
||||
return err
|
||||
}
|
||||
descr := binary.LittleEndian.Uint16(buf)
|
||||
fd.Flags = DescriptorFlags(descr)
|
||||
if fd.Flags.Size() {
|
||||
// Append the 8 missing bytes.
|
||||
buf = buf[:3+8]
|
||||
if _, err := io.ReadFull(src, buf[3:]); err != nil {
|
||||
return err
|
||||
}
|
||||
fd.ContentSize = binary.LittleEndian.Uint64(buf[2:])
|
||||
}
|
||||
fd.Checksum = buf[len(buf)-1] // the checksum is the last byte
|
||||
buf = buf[:len(buf)-1] // all descriptor fields except checksum
|
||||
if c := descriptorChecksum(buf); fd.Checksum != c {
|
||||
return fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidHeaderChecksum, c, fd.Checksum)
|
||||
}
|
||||
// Validate the elements that can be.
|
||||
if idx := fd.Flags.BlockSizeIndex(); !idx.IsValid() {
|
||||
return lz4errors.ErrOptionInvalidBlockSize
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func descriptorChecksum(buf []byte) byte {
|
||||
return byte(xxh32.ChecksumZero(buf) >> 8)
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
// Code generated by `gen.exe`. DO NOT EDIT.
|
||||
|
||||
package lz4stream
|
||||
|
||||
import "github.com/pierrec/lz4/v4/internal/lz4block"
|
||||
|
||||
// DescriptorFlags is defined as follow:
|
||||
// field bits
|
||||
// ----- ----
|
||||
// _ 2
|
||||
// ContentChecksum 1
|
||||
// Size 1
|
||||
// BlockChecksum 1
|
||||
// BlockIndependence 1
|
||||
// Version 2
|
||||
// _ 4
|
||||
// BlockSizeIndex 3
|
||||
// _ 1
|
||||
type DescriptorFlags uint16
|
||||
|
||||
// Getters.
|
||||
func (x DescriptorFlags) ContentChecksum() bool { return x>>2&1 != 0 }
|
||||
func (x DescriptorFlags) Size() bool { return x>>3&1 != 0 }
|
||||
func (x DescriptorFlags) BlockChecksum() bool { return x>>4&1 != 0 }
|
||||
func (x DescriptorFlags) BlockIndependence() bool { return x>>5&1 != 0 }
|
||||
func (x DescriptorFlags) Version() uint16 { return uint16(x >> 6 & 0x3) }
|
||||
func (x DescriptorFlags) BlockSizeIndex() lz4block.BlockSizeIndex {
|
||||
return lz4block.BlockSizeIndex(x >> 12 & 0x7)
|
||||
}
|
||||
|
||||
// Setters.
|
||||
func (x *DescriptorFlags) ContentChecksumSet(v bool) *DescriptorFlags {
|
||||
const b = 1 << 2
|
||||
if v {
|
||||
*x = *x&^b | b
|
||||
} else {
|
||||
*x &^= b
|
||||
}
|
||||
return x
|
||||
}
|
||||
func (x *DescriptorFlags) SizeSet(v bool) *DescriptorFlags {
|
||||
const b = 1 << 3
|
||||
if v {
|
||||
*x = *x&^b | b
|
||||
} else {
|
||||
*x &^= b
|
||||
}
|
||||
return x
|
||||
}
|
||||
func (x *DescriptorFlags) BlockChecksumSet(v bool) *DescriptorFlags {
|
||||
const b = 1 << 4
|
||||
if v {
|
||||
*x = *x&^b | b
|
||||
} else {
|
||||
*x &^= b
|
||||
}
|
||||
return x
|
||||
}
|
||||
func (x *DescriptorFlags) BlockIndependenceSet(v bool) *DescriptorFlags {
|
||||
const b = 1 << 5
|
||||
if v {
|
||||
*x = *x&^b | b
|
||||
} else {
|
||||
*x &^= b
|
||||
}
|
||||
return x
|
||||
}
|
||||
func (x *DescriptorFlags) VersionSet(v uint16) *DescriptorFlags {
|
||||
*x = *x&^(0x3<<6) | (DescriptorFlags(v) & 0x3 << 6)
|
||||
return x
|
||||
}
|
||||
func (x *DescriptorFlags) BlockSizeIndexSet(v lz4block.BlockSizeIndex) *DescriptorFlags {
|
||||
*x = *x&^(0x7<<12) | (DescriptorFlags(v) & 0x7 << 12)
|
||||
return x
|
||||
}
|
||||
|
||||
// Code generated by `gen.exe`. DO NOT EDIT.
|
||||
|
||||
// DataBlockSize is defined as follow:
|
||||
// field bits
|
||||
// ----- ----
|
||||
// size 31
|
||||
// Uncompressed 1
|
||||
type DataBlockSize uint32
|
||||
|
||||
// Getters.
|
||||
func (x DataBlockSize) size() int { return int(x & 0x7FFFFFFF) }
|
||||
func (x DataBlockSize) Uncompressed() bool { return x>>31&1 != 0 }
|
||||
|
||||
// Setters.
|
||||
func (x *DataBlockSize) sizeSet(v int) *DataBlockSize {
|
||||
*x = *x&^0x7FFFFFFF | DataBlockSize(v)&0x7FFFFFFF
|
||||
return x
|
||||
}
|
||||
func (x *DataBlockSize) UncompressedSet(v bool) *DataBlockSize {
|
||||
const b = 1 << 31
|
||||
if v {
|
||||
*x = *x&^b | b
|
||||
} else {
|
||||
*x &^= b
|
||||
}
|
||||
return x
|
||||
}
|
|
@ -0,0 +1,212 @@
|
|||
// Package xxh32 implements the very fast XXH hashing algorithm (32 bits version).
|
||||
// (ported from the reference implementation https://github.com/Cyan4973/xxHash/)
|
||||
package xxh32
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
const (
|
||||
prime1 uint32 = 2654435761
|
||||
prime2 uint32 = 2246822519
|
||||
prime3 uint32 = 3266489917
|
||||
prime4 uint32 = 668265263
|
||||
prime5 uint32 = 374761393
|
||||
|
||||
primeMask = 0xFFFFFFFF
|
||||
prime1plus2 = uint32((uint64(prime1) + uint64(prime2)) & primeMask) // 606290984
|
||||
prime1minus = uint32((-int64(prime1)) & primeMask) // 1640531535
|
||||
)
|
||||
|
||||
// XXHZero represents an xxhash32 object with seed 0.
|
||||
type XXHZero struct {
|
||||
v [4]uint32
|
||||
totalLen uint64
|
||||
buf [16]byte
|
||||
bufused int
|
||||
}
|
||||
|
||||
// Sum appends the current hash to b and returns the resulting slice.
|
||||
// It does not change the underlying hash state.
|
||||
func (xxh XXHZero) Sum(b []byte) []byte {
|
||||
h32 := xxh.Sum32()
|
||||
return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24))
|
||||
}
|
||||
|
||||
// Reset resets the Hash to its initial state.
|
||||
func (xxh *XXHZero) Reset() {
|
||||
xxh.v[0] = prime1plus2
|
||||
xxh.v[1] = prime2
|
||||
xxh.v[2] = 0
|
||||
xxh.v[3] = prime1minus
|
||||
xxh.totalLen = 0
|
||||
xxh.bufused = 0
|
||||
}
|
||||
|
||||
// Size returns the number of bytes returned by Sum().
|
||||
func (xxh *XXHZero) Size() int {
|
||||
return 4
|
||||
}
|
||||
|
||||
// BlockSizeIndex gives the minimum number of bytes accepted by Write().
|
||||
func (xxh *XXHZero) BlockSize() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
// Write adds input bytes to the Hash.
|
||||
// It never returns an error.
|
||||
func (xxh *XXHZero) Write(input []byte) (int, error) {
|
||||
if xxh.totalLen == 0 {
|
||||
xxh.Reset()
|
||||
}
|
||||
n := len(input)
|
||||
m := xxh.bufused
|
||||
|
||||
xxh.totalLen += uint64(n)
|
||||
|
||||
r := len(xxh.buf) - m
|
||||
if n < r {
|
||||
copy(xxh.buf[m:], input)
|
||||
xxh.bufused += len(input)
|
||||
return n, nil
|
||||
}
|
||||
|
||||
var buf *[16]byte
|
||||
if m != 0 {
|
||||
// some data left from previous update
|
||||
buf = &xxh.buf
|
||||
c := copy(buf[m:], input)
|
||||
n -= c
|
||||
input = input[c:]
|
||||
}
|
||||
update(&xxh.v, buf, input)
|
||||
xxh.bufused = copy(xxh.buf[:], input[n-n%16:])
|
||||
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// Portable version of update. This updates v by processing all of buf
|
||||
// (if not nil) and all full 16-byte blocks of input.
|
||||
func updateGo(v *[4]uint32, buf *[16]byte, input []byte) {
|
||||
// Causes compiler to work directly from registers instead of stack:
|
||||
v1, v2, v3, v4 := v[0], v[1], v[2], v[3]
|
||||
|
||||
if buf != nil {
|
||||
v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime2) * prime1
|
||||
v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime2) * prime1
|
||||
v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime2) * prime1
|
||||
v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime2) * prime1
|
||||
}
|
||||
|
||||
for ; len(input) >= 16; input = input[16:] {
|
||||
sub := input[:16] //BCE hint for compiler
|
||||
v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
|
||||
v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
|
||||
v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
|
||||
v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
|
||||
}
|
||||
v[0], v[1], v[2], v[3] = v1, v2, v3, v4
|
||||
}
|
||||
|
||||
// Sum32 returns the 32 bits Hash value.
|
||||
func (xxh *XXHZero) Sum32() uint32 {
|
||||
h32 := uint32(xxh.totalLen)
|
||||
if h32 >= 16 {
|
||||
h32 += rol1(xxh.v[0]) + rol7(xxh.v[1]) + rol12(xxh.v[2]) + rol18(xxh.v[3])
|
||||
} else {
|
||||
h32 += prime5
|
||||
}
|
||||
|
||||
p := 0
|
||||
n := xxh.bufused
|
||||
buf := xxh.buf
|
||||
for n := n - 4; p <= n; p += 4 {
|
||||
h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime3
|
||||
h32 = rol17(h32) * prime4
|
||||
}
|
||||
for ; p < n; p++ {
|
||||
h32 += uint32(buf[p]) * prime5
|
||||
h32 = rol11(h32) * prime1
|
||||
}
|
||||
|
||||
h32 ^= h32 >> 15
|
||||
h32 *= prime2
|
||||
h32 ^= h32 >> 13
|
||||
h32 *= prime3
|
||||
h32 ^= h32 >> 16
|
||||
|
||||
return h32
|
||||
}
|
||||
|
||||
// Portable version of ChecksumZero.
|
||||
func checksumZeroGo(input []byte) uint32 {
|
||||
n := len(input)
|
||||
h32 := uint32(n)
|
||||
|
||||
if n < 16 {
|
||||
h32 += prime5
|
||||
} else {
|
||||
v1 := prime1plus2
|
||||
v2 := prime2
|
||||
v3 := uint32(0)
|
||||
v4 := prime1minus
|
||||
p := 0
|
||||
for n := n - 16; p <= n; p += 16 {
|
||||
sub := input[p:][:16] //BCE hint for compiler
|
||||
v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
|
||||
v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
|
||||
v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
|
||||
v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
|
||||
}
|
||||
input = input[p:]
|
||||
n -= p
|
||||
h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
|
||||
}
|
||||
|
||||
p := 0
|
||||
for n := n - 4; p <= n; p += 4 {
|
||||
h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime3
|
||||
h32 = rol17(h32) * prime4
|
||||
}
|
||||
for p < n {
|
||||
h32 += uint32(input[p]) * prime5
|
||||
h32 = rol11(h32) * prime1
|
||||
p++
|
||||
}
|
||||
|
||||
h32 ^= h32 >> 15
|
||||
h32 *= prime2
|
||||
h32 ^= h32 >> 13
|
||||
h32 *= prime3
|
||||
h32 ^= h32 >> 16
|
||||
|
||||
return h32
|
||||
}
|
||||
|
||||
func rol1(u uint32) uint32 {
|
||||
return u<<1 | u>>31
|
||||
}
|
||||
|
||||
func rol7(u uint32) uint32 {
|
||||
return u<<7 | u>>25
|
||||
}
|
||||
|
||||
func rol11(u uint32) uint32 {
|
||||
return u<<11 | u>>21
|
||||
}
|
||||
|
||||
func rol12(u uint32) uint32 {
|
||||
return u<<12 | u>>20
|
||||
}
|
||||
|
||||
func rol13(u uint32) uint32 {
|
||||
return u<<13 | u>>19
|
||||
}
|
||||
|
||||
func rol17(u uint32) uint32 {
|
||||
return u<<17 | u>>15
|
||||
}
|
||||
|
||||
func rol18(u uint32) uint32 {
|
||||
return u<<18 | u>>14
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
// +build !noasm
|
||||
|
||||
package xxh32
|
||||
|
||||
// ChecksumZero returns the 32-bit hash of input.
|
||||
//
|
||||
//go:noescape
|
||||
func ChecksumZero(input []byte) uint32
|
||||
|
||||
//go:noescape
|
||||
func update(v *[4]uint32, buf *[16]byte, input []byte)
|
|
@ -0,0 +1,251 @@
|
|||
// +build !noasm
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// Register allocation.
|
||||
#define p R0
|
||||
#define n R1
|
||||
#define h R2
|
||||
#define v1 R2 // Alias for h.
|
||||
#define v2 R3
|
||||
#define v3 R4
|
||||
#define v4 R5
|
||||
#define x1 R6
|
||||
#define x2 R7
|
||||
#define x3 R8
|
||||
#define x4 R9
|
||||
|
||||
// We need the primes in registers. The 16-byte loop only uses prime{1,2}.
|
||||
#define prime1r R11
|
||||
#define prime2r R12
|
||||
#define prime3r R3 // The rest can alias v{2-4}.
|
||||
#define prime4r R4
|
||||
#define prime5r R5
|
||||
|
||||
// Update round macros. These read from and increment p.
|
||||
|
||||
#define round16aligned \
|
||||
MOVM.IA.W (p), [x1, x2, x3, x4] \
|
||||
\
|
||||
MULA x1, prime2r, v1, v1 \
|
||||
MULA x2, prime2r, v2, v2 \
|
||||
MULA x3, prime2r, v3, v3 \
|
||||
MULA x4, prime2r, v4, v4 \
|
||||
\
|
||||
MOVW v1 @> 19, v1 \
|
||||
MOVW v2 @> 19, v2 \
|
||||
MOVW v3 @> 19, v3 \
|
||||
MOVW v4 @> 19, v4 \
|
||||
\
|
||||
MUL prime1r, v1 \
|
||||
MUL prime1r, v2 \
|
||||
MUL prime1r, v3 \
|
||||
MUL prime1r, v4 \
|
||||
|
||||
#define round16unaligned \
|
||||
MOVBU.P 16(p), x1 \
|
||||
MOVBU -15(p), x2 \
|
||||
ORR x2 << 8, x1 \
|
||||
MOVBU -14(p), x3 \
|
||||
MOVBU -13(p), x4 \
|
||||
ORR x4 << 8, x3 \
|
||||
ORR x3 << 16, x1 \
|
||||
\
|
||||
MULA x1, prime2r, v1, v1 \
|
||||
MOVW v1 @> 19, v1 \
|
||||
MUL prime1r, v1 \
|
||||
\
|
||||
MOVBU -12(p), x1 \
|
||||
MOVBU -11(p), x2 \
|
||||
ORR x2 << 8, x1 \
|
||||
MOVBU -10(p), x3 \
|
||||
MOVBU -9(p), x4 \
|
||||
ORR x4 << 8, x3 \
|
||||
ORR x3 << 16, x1 \
|
||||
\
|
||||
MULA x1, prime2r, v2, v2 \
|
||||
MOVW v2 @> 19, v2 \
|
||||
MUL prime1r, v2 \
|
||||
\
|
||||
MOVBU -8(p), x1 \
|
||||
MOVBU -7(p), x2 \
|
||||
ORR x2 << 8, x1 \
|
||||
MOVBU -6(p), x3 \
|
||||
MOVBU -5(p), x4 \
|
||||
ORR x4 << 8, x3 \
|
||||
ORR x3 << 16, x1 \
|
||||
\
|
||||
MULA x1, prime2r, v3, v3 \
|
||||
MOVW v3 @> 19, v3 \
|
||||
MUL prime1r, v3 \
|
||||
\
|
||||
MOVBU -4(p), x1 \
|
||||
MOVBU -3(p), x2 \
|
||||
ORR x2 << 8, x1 \
|
||||
MOVBU -2(p), x3 \
|
||||
MOVBU -1(p), x4 \
|
||||
ORR x4 << 8, x3 \
|
||||
ORR x3 << 16, x1 \
|
||||
\
|
||||
MULA x1, prime2r, v4, v4 \
|
||||
MOVW v4 @> 19, v4 \
|
||||
MUL prime1r, v4 \
|
||||
|
||||
|
||||
// func ChecksumZero([]byte) uint32
|
||||
TEXT ·ChecksumZero(SB), NOFRAME|NOSPLIT, $-4-16
|
||||
MOVW input_base+0(FP), p
|
||||
MOVW input_len+4(FP), n
|
||||
|
||||
MOVW $const_prime1, prime1r
|
||||
MOVW $const_prime2, prime2r
|
||||
|
||||
// Set up h for n < 16. It's tempting to say {ADD prime5, n, h}
|
||||
// here, but that's a pseudo-op that generates a load through R11.
|
||||
MOVW $const_prime5, prime5r
|
||||
ADD prime5r, n, h
|
||||
CMP $0, n
|
||||
BEQ end
|
||||
|
||||
// We let n go negative so we can do comparisons with SUB.S
|
||||
// instead of separate CMP.
|
||||
SUB.S $16, n
|
||||
BMI loop16done
|
||||
|
||||
ADD prime1r, prime2r, v1
|
||||
MOVW prime2r, v2
|
||||
MOVW $0, v3
|
||||
RSB $0, prime1r, v4
|
||||
|
||||
TST $3, p
|
||||
BNE loop16unaligned
|
||||
|
||||
loop16aligned:
|
||||
SUB.S $16, n
|
||||
round16aligned
|
||||
BPL loop16aligned
|
||||
B loop16finish
|
||||
|
||||
loop16unaligned:
|
||||
SUB.S $16, n
|
||||
round16unaligned
|
||||
BPL loop16unaligned
|
||||
|
||||
loop16finish:
|
||||
MOVW v1 @> 31, h
|
||||
ADD v2 @> 25, h
|
||||
ADD v3 @> 20, h
|
||||
ADD v4 @> 14, h
|
||||
|
||||
// h += len(input) with v2 as temporary.
|
||||
MOVW input_len+4(FP), v2
|
||||
ADD v2, h
|
||||
|
||||
loop16done:
|
||||
ADD $16, n // Restore number of bytes left.
|
||||
|
||||
SUB.S $4, n
|
||||
MOVW $const_prime3, prime3r
|
||||
BMI loop4done
|
||||
MOVW $const_prime4, prime4r
|
||||
|
||||
TST $3, p
|
||||
BNE loop4unaligned
|
||||
|
||||
loop4aligned:
|
||||
SUB.S $4, n
|
||||
|
||||
MOVW.P 4(p), x1
|
||||
MULA prime3r, x1, h, h
|
||||
MOVW h @> 15, h
|
||||
MUL prime4r, h
|
||||
|
||||
BPL loop4aligned
|
||||
B loop4done
|
||||
|
||||
loop4unaligned:
|
||||
SUB.S $4, n
|
||||
|
||||
MOVBU.P 4(p), x1
|
||||
MOVBU -3(p), x2
|
||||
ORR x2 << 8, x1
|
||||
MOVBU -2(p), x3
|
||||
ORR x3 << 16, x1
|
||||
MOVBU -1(p), x4
|
||||
ORR x4 << 24, x1
|
||||
|
||||
MULA prime3r, x1, h, h
|
||||
MOVW h @> 15, h
|
||||
MUL prime4r, h
|
||||
|
||||
BPL loop4unaligned
|
||||
|
||||
loop4done:
|
||||
ADD.S $4, n // Restore number of bytes left.
|
||||
BEQ end
|
||||
|
||||
MOVW $const_prime5, prime5r
|
||||
|
||||
loop1:
|
||||
SUB.S $1, n
|
||||
|
||||
MOVBU.P 1(p), x1
|
||||
MULA prime5r, x1, h, h
|
||||
MOVW h @> 21, h
|
||||
MUL prime1r, h
|
||||
|
||||
BNE loop1
|
||||
|
||||
end:
|
||||
MOVW $const_prime3, prime3r
|
||||
EOR h >> 15, h
|
||||
MUL prime2r, h
|
||||
EOR h >> 13, h
|
||||
MUL prime3r, h
|
||||
EOR h >> 16, h
|
||||
|
||||
MOVW h, ret+12(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func update(v *[4]uint64, buf *[16]byte, p []byte)
|
||||
TEXT ·update(SB), NOFRAME|NOSPLIT, $-4-20
|
||||
MOVW v+0(FP), p
|
||||
MOVM.IA (p), [v1, v2, v3, v4]
|
||||
|
||||
MOVW $const_prime1, prime1r
|
||||
MOVW $const_prime2, prime2r
|
||||
|
||||
// Process buf, if not nil.
|
||||
MOVW buf+4(FP), p
|
||||
CMP $0, p
|
||||
BEQ noBuffered
|
||||
|
||||
round16aligned
|
||||
|
||||
noBuffered:
|
||||
MOVW input_base +8(FP), p
|
||||
MOVW input_len +12(FP), n
|
||||
|
||||
SUB.S $16, n
|
||||
BMI end
|
||||
|
||||
TST $3, p
|
||||
BNE loop16unaligned
|
||||
|
||||
loop16aligned:
|
||||
SUB.S $16, n
|
||||
round16aligned
|
||||
BPL loop16aligned
|
||||
B end
|
||||
|
||||
loop16unaligned:
|
||||
SUB.S $16, n
|
||||
round16unaligned
|
||||
BPL loop16unaligned
|
||||
|
||||
end:
|
||||
MOVW v+0(FP), p
|
||||
MOVM.IA [v1, v2, v3, v4], (p)
|
||||
RET
|
10
vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_other.go
generated
vendored
Normal file
10
vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_other.go
generated
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
// +build !arm noasm
|
||||
|
||||
package xxh32
|
||||
|
||||
// ChecksumZero returns the 32-bit hash of input.
|
||||
func ChecksumZero(input []byte) uint32 { return checksumZeroGo(input) }
|
||||
|
||||
func update(v *[4]uint32, buf *[16]byte, input []byte) {
|
||||
updateGo(v, buf, input)
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
// Package lz4 implements reading and writing lz4 compressed data.
|
||||
//
|
||||
// The package supports both the LZ4 stream format,
|
||||
// as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html,
|
||||
// and the LZ4 block format, defined at
|
||||
// http://fastcompression.blogspot.fr/2011/05/lz4-explained.html.
|
||||
//
|
||||
// See https://github.com/lz4/lz4 for the reference C implementation.
|
||||
package lz4
|
||||
|
||||
import (
|
||||
"github.com/pierrec/lz4/v4/internal/lz4block"
|
||||
"github.com/pierrec/lz4/v4/internal/lz4errors"
|
||||
)
|
||||
|
||||
func _() {
|
||||
// Safety checks for duplicated elements.
|
||||
var x [1]struct{}
|
||||
_ = x[lz4block.CompressionLevel(Fast)-lz4block.Fast]
|
||||
_ = x[Block64Kb-BlockSize(lz4block.Block64Kb)]
|
||||
_ = x[Block256Kb-BlockSize(lz4block.Block256Kb)]
|
||||
_ = x[Block1Mb-BlockSize(lz4block.Block1Mb)]
|
||||
_ = x[Block4Mb-BlockSize(lz4block.Block4Mb)]
|
||||
}
|
||||
|
||||
// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
|
||||
func CompressBlockBound(n int) int {
|
||||
return lz4block.CompressBlockBound(n)
|
||||
}
|
||||
|
||||
// UncompressBlock uncompresses the source buffer into the destination one,
|
||||
// and returns the uncompressed size.
|
||||
//
|
||||
// The destination buffer must be sized appropriately.
|
||||
//
|
||||
// An error is returned if the source data is invalid or the destination buffer is too small.
|
||||
func UncompressBlock(src, dst []byte) (int, error) {
|
||||
return lz4block.UncompressBlock(src, dst, nil)
|
||||
}
|
||||
|
||||
// UncompressBlockWithDict uncompresses the source buffer into the destination one using a
|
||||
// dictionary, and returns the uncompressed size.
|
||||
//
|
||||
// The destination buffer must be sized appropriately.
|
||||
//
|
||||
// An error is returned if the source data is invalid or the destination buffer is too small.
|
||||
func UncompressBlockWithDict(src, dst, dict []byte) (int, error) {
|
||||
return lz4block.UncompressBlock(src, dst, dict)
|
||||
}
|
||||
|
||||
// A Compressor compresses data into the LZ4 block format.
|
||||
// It uses a fast compression algorithm.
|
||||
//
|
||||
// A Compressor is not safe for concurrent use by multiple goroutines.
|
||||
//
|
||||
// Use a Writer to compress into the LZ4 stream format.
|
||||
type Compressor struct{ c lz4block.Compressor }
|
||||
|
||||
// CompressBlock compresses the source buffer src into the destination dst.
|
||||
//
|
||||
// If compression is successful, the first return value is the size of the
|
||||
// compressed data, which is always >0.
|
||||
//
|
||||
// If dst has length at least CompressBlockBound(len(src)), compression always
|
||||
// succeeds. Otherwise, the first return value is zero. The error return is
|
||||
// non-nil if the compressed data does not fit in dst, but it might fit in a
|
||||
// larger buffer that is still smaller than CompressBlockBound(len(src)). The
|
||||
// return value (0, nil) means the data is likely incompressible and a buffer
|
||||
// of length CompressBlockBound(len(src)) should be passed in.
|
||||
func (c *Compressor) CompressBlock(src, dst []byte) (int, error) {
|
||||
return c.c.CompressBlock(src, dst)
|
||||
}
|
||||
|
||||
// CompressBlock compresses the source buffer into the destination one.
|
||||
// This is the fast version of LZ4 compression and also the default one.
|
||||
//
|
||||
// The argument hashTable is scratch space for a hash table used by the
|
||||
// compressor. If provided, it should have length at least 1<<16. If it is
|
||||
// shorter (or nil), CompressBlock allocates its own hash table.
|
||||
//
|
||||
// The size of the compressed data is returned.
|
||||
//
|
||||
// If the destination buffer size is lower than CompressBlockBound and
|
||||
// the compressed size is 0 and no error, then the data is incompressible.
|
||||
//
|
||||
// An error is returned if the destination buffer is too small.
|
||||
|
||||
// CompressBlock is equivalent to Compressor.CompressBlock.
|
||||
// The final argument is ignored and should be set to nil.
|
||||
//
|
||||
// This function is deprecated. Use a Compressor instead.
|
||||
func CompressBlock(src, dst []byte, _ []int) (int, error) {
|
||||
return lz4block.CompressBlock(src, dst)
|
||||
}
|
||||
|
||||
// A CompressorHC compresses data into the LZ4 block format.
|
||||
// Its compression ratio is potentially better than that of a Compressor,
|
||||
// but it is also slower and requires more memory.
|
||||
//
|
||||
// A Compressor is not safe for concurrent use by multiple goroutines.
|
||||
//
|
||||
// Use a Writer to compress into the LZ4 stream format.
|
||||
type CompressorHC struct {
|
||||
// Level is the maximum search depth for compression.
|
||||
// Values <= 0 mean no maximum.
|
||||
Level CompressionLevel
|
||||
c lz4block.CompressorHC
|
||||
}
|
||||
|
||||
// CompressBlock compresses the source buffer src into the destination dst.
|
||||
//
|
||||
// If compression is successful, the first return value is the size of the
|
||||
// compressed data, which is always >0.
|
||||
//
|
||||
// If dst has length at least CompressBlockBound(len(src)), compression always
|
||||
// succeeds. Otherwise, the first return value is zero. The error return is
|
||||
// non-nil if the compressed data does not fit in dst, but it might fit in a
|
||||
// larger buffer that is still smaller than CompressBlockBound(len(src)). The
|
||||
// return value (0, nil) means the data is likely incompressible and a buffer
|
||||
// of length CompressBlockBound(len(src)) should be passed in.
|
||||
func (c *CompressorHC) CompressBlock(src, dst []byte) (int, error) {
|
||||
return c.c.CompressBlock(src, dst, lz4block.CompressionLevel(c.Level))
|
||||
}
|
||||
|
||||
// CompressBlockHC is equivalent to CompressorHC.CompressBlock.
|
||||
// The final two arguments are ignored and should be set to nil.
|
||||
//
|
||||
// This function is deprecated. Use a CompressorHC instead.
|
||||
func CompressBlockHC(src, dst []byte, depth CompressionLevel, _, _ []int) (int, error) {
|
||||
return lz4block.CompressBlockHC(src, dst, lz4block.CompressionLevel(depth))
|
||||
}
|
||||
|
||||
const (
|
||||
// ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed
|
||||
// block is corrupted or the destination buffer is not large enough for the uncompressed data.
|
||||
ErrInvalidSourceShortBuffer = lz4errors.ErrInvalidSourceShortBuffer
|
||||
// ErrInvalidFrame is returned when reading an invalid LZ4 archive.
|
||||
ErrInvalidFrame = lz4errors.ErrInvalidFrame
|
||||
// ErrInternalUnhandledState is an internal error.
|
||||
ErrInternalUnhandledState = lz4errors.ErrInternalUnhandledState
|
||||
// ErrInvalidHeaderChecksum is returned when reading a frame.
|
||||
ErrInvalidHeaderChecksum = lz4errors.ErrInvalidHeaderChecksum
|
||||
// ErrInvalidBlockChecksum is returned when reading a frame.
|
||||
ErrInvalidBlockChecksum = lz4errors.ErrInvalidBlockChecksum
|
||||
// ErrInvalidFrameChecksum is returned when reading a frame.
|
||||
ErrInvalidFrameChecksum = lz4errors.ErrInvalidFrameChecksum
|
||||
// ErrOptionInvalidCompressionLevel is returned when the supplied compression level is invalid.
|
||||
ErrOptionInvalidCompressionLevel = lz4errors.ErrOptionInvalidCompressionLevel
|
||||
// ErrOptionClosedOrError is returned when an option is applied to a closed or in error object.
|
||||
ErrOptionClosedOrError = lz4errors.ErrOptionClosedOrError
|
||||
// ErrOptionInvalidBlockSize is returned when
|
||||
ErrOptionInvalidBlockSize = lz4errors.ErrOptionInvalidBlockSize
|
||||
// ErrOptionNotApplicable is returned when trying to apply an option to an object not supporting it.
|
||||
ErrOptionNotApplicable = lz4errors.ErrOptionNotApplicable
|
||||
// ErrWriterNotClosed is returned when attempting to reset an unclosed writer.
|
||||
ErrWriterNotClosed = lz4errors.ErrWriterNotClosed
|
||||
)
|
|
@ -0,0 +1,214 @@
|
|||
package lz4
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"runtime"
|
||||
|
||||
"github.com/pierrec/lz4/v4/internal/lz4block"
|
||||
"github.com/pierrec/lz4/v4/internal/lz4errors"
|
||||
)
|
||||
|
||||
//go:generate go run golang.org/x/tools/cmd/stringer -type=BlockSize,CompressionLevel -output options_gen.go
|
||||
|
||||
type (
|
||||
applier interface {
|
||||
Apply(...Option) error
|
||||
private()
|
||||
}
|
||||
// Option defines the parameters to setup an LZ4 Writer or Reader.
|
||||
Option func(applier) error
|
||||
)
|
||||
|
||||
// String returns a string representation of the option with its parameter(s).
|
||||
func (o Option) String() string {
|
||||
return o(nil).Error()
|
||||
}
|
||||
|
||||
// Default options.
|
||||
var (
|
||||
DefaultBlockSizeOption = BlockSizeOption(Block4Mb)
|
||||
DefaultChecksumOption = ChecksumOption(true)
|
||||
DefaultConcurrency = ConcurrencyOption(1)
|
||||
defaultOnBlockDone = OnBlockDoneOption(nil)
|
||||
)
|
||||
|
||||
const (
|
||||
Block64Kb BlockSize = 1 << (16 + iota*2)
|
||||
Block256Kb
|
||||
Block1Mb
|
||||
Block4Mb
|
||||
)
|
||||
|
||||
// BlockSizeIndex defines the size of the blocks to be compressed.
|
||||
type BlockSize uint32
|
||||
|
||||
// BlockSizeOption defines the maximum size of compressed blocks (default=Block4Mb).
|
||||
func BlockSizeOption(size BlockSize) Option {
|
||||
return func(a applier) error {
|
||||
switch w := a.(type) {
|
||||
case nil:
|
||||
s := fmt.Sprintf("BlockSizeOption(%s)", size)
|
||||
return lz4errors.Error(s)
|
||||
case *Writer:
|
||||
size := uint32(size)
|
||||
if !lz4block.IsValid(size) {
|
||||
return fmt.Errorf("%w: %d", lz4errors.ErrOptionInvalidBlockSize, size)
|
||||
}
|
||||
w.frame.Descriptor.Flags.BlockSizeIndexSet(lz4block.Index(size))
|
||||
return nil
|
||||
}
|
||||
return lz4errors.ErrOptionNotApplicable
|
||||
}
|
||||
}
|
||||
|
||||
// BlockChecksumOption enables or disables block checksum (default=false).
|
||||
func BlockChecksumOption(flag bool) Option {
|
||||
return func(a applier) error {
|
||||
switch w := a.(type) {
|
||||
case nil:
|
||||
s := fmt.Sprintf("BlockChecksumOption(%v)", flag)
|
||||
return lz4errors.Error(s)
|
||||
case *Writer:
|
||||
w.frame.Descriptor.Flags.BlockChecksumSet(flag)
|
||||
return nil
|
||||
}
|
||||
return lz4errors.ErrOptionNotApplicable
|
||||
}
|
||||
}
|
||||
|
||||
// ChecksumOption enables/disables all blocks or content checksum (default=true).
|
||||
func ChecksumOption(flag bool) Option {
|
||||
return func(a applier) error {
|
||||
switch w := a.(type) {
|
||||
case nil:
|
||||
s := fmt.Sprintf("ChecksumOption(%v)", flag)
|
||||
return lz4errors.Error(s)
|
||||
case *Writer:
|
||||
w.frame.Descriptor.Flags.ContentChecksumSet(flag)
|
||||
return nil
|
||||
}
|
||||
return lz4errors.ErrOptionNotApplicable
|
||||
}
|
||||
}
|
||||
|
||||
// SizeOption sets the size of the original uncompressed data (default=0). It is useful to know the size of the
|
||||
// whole uncompressed data stream.
|
||||
func SizeOption(size uint64) Option {
|
||||
return func(a applier) error {
|
||||
switch w := a.(type) {
|
||||
case nil:
|
||||
s := fmt.Sprintf("SizeOption(%d)", size)
|
||||
return lz4errors.Error(s)
|
||||
case *Writer:
|
||||
w.frame.Descriptor.Flags.SizeSet(size > 0)
|
||||
w.frame.Descriptor.ContentSize = size
|
||||
return nil
|
||||
}
|
||||
return lz4errors.ErrOptionNotApplicable
|
||||
}
|
||||
}
|
||||
|
||||
// ConcurrencyOption sets the number of go routines used for compression.
|
||||
// If n <= 0, then the output of runtime.GOMAXPROCS(0) is used.
|
||||
func ConcurrencyOption(n int) Option {
|
||||
if n <= 0 {
|
||||
n = runtime.GOMAXPROCS(0)
|
||||
}
|
||||
return func(a applier) error {
|
||||
switch rw := a.(type) {
|
||||
case nil:
|
||||
s := fmt.Sprintf("ConcurrencyOption(%d)", n)
|
||||
return lz4errors.Error(s)
|
||||
case *Writer:
|
||||
rw.num = n
|
||||
return nil
|
||||
case *Reader:
|
||||
rw.num = n
|
||||
return nil
|
||||
}
|
||||
return lz4errors.ErrOptionNotApplicable
|
||||
}
|
||||
}
|
||||
|
||||
// CompressionLevel defines the level of compression to use. The higher the better, but slower, compression.
|
||||
type CompressionLevel uint32
|
||||
|
||||
const (
|
||||
Fast CompressionLevel = 0
|
||||
Level1 CompressionLevel = 1 << (8 + iota)
|
||||
Level2
|
||||
Level3
|
||||
Level4
|
||||
Level5
|
||||
Level6
|
||||
Level7
|
||||
Level8
|
||||
Level9
|
||||
)
|
||||
|
||||
// CompressionLevelOption defines the compression level (default=Fast).
|
||||
func CompressionLevelOption(level CompressionLevel) Option {
|
||||
return func(a applier) error {
|
||||
switch w := a.(type) {
|
||||
case nil:
|
||||
s := fmt.Sprintf("CompressionLevelOption(%s)", level)
|
||||
return lz4errors.Error(s)
|
||||
case *Writer:
|
||||
switch level {
|
||||
case Fast, Level1, Level2, Level3, Level4, Level5, Level6, Level7, Level8, Level9:
|
||||
default:
|
||||
return fmt.Errorf("%w: %d", lz4errors.ErrOptionInvalidCompressionLevel, level)
|
||||
}
|
||||
w.level = lz4block.CompressionLevel(level)
|
||||
return nil
|
||||
}
|
||||
return lz4errors.ErrOptionNotApplicable
|
||||
}
|
||||
}
|
||||
|
||||
func onBlockDone(int) {}
|
||||
|
||||
// OnBlockDoneOption is triggered when a block has been processed. For a Writer, it is when is has been compressed,
|
||||
// for a Reader, it is when it has been uncompressed.
|
||||
func OnBlockDoneOption(handler func(size int)) Option {
|
||||
if handler == nil {
|
||||
handler = onBlockDone
|
||||
}
|
||||
return func(a applier) error {
|
||||
switch rw := a.(type) {
|
||||
case nil:
|
||||
s := fmt.Sprintf("OnBlockDoneOption(%s)", reflect.TypeOf(handler).String())
|
||||
return lz4errors.Error(s)
|
||||
case *Writer:
|
||||
rw.handler = handler
|
||||
return nil
|
||||
case *Reader:
|
||||
rw.handler = handler
|
||||
return nil
|
||||
}
|
||||
return lz4errors.ErrOptionNotApplicable
|
||||
}
|
||||
}
|
||||
|
||||
// LegacyOption provides support for writing LZ4 frames in the legacy format.
|
||||
//
|
||||
// See https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md#legacy-frame.
|
||||
//
|
||||
// NB. compressed Linux kernel images use a tweaked LZ4 legacy format where
|
||||
// the compressed stream is followed by the original (uncompressed) size of
|
||||
// the kernel (https://events.static.linuxfound.org/sites/events/files/lcjpcojp13_klee.pdf).
|
||||
// This is also supported as a special case.
|
||||
func LegacyOption(legacy bool) Option {
|
||||
return func(a applier) error {
|
||||
switch rw := a.(type) {
|
||||
case nil:
|
||||
s := fmt.Sprintf("LegacyOption(%v)", legacy)
|
||||
return lz4errors.Error(s)
|
||||
case *Writer:
|
||||
rw.legacy = legacy
|
||||
return nil
|
||||
}
|
||||
return lz4errors.ErrOptionNotApplicable
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
// Code generated by "stringer -type=BlockSize,CompressionLevel -output options_gen.go"; DO NOT EDIT.
|
||||
|
||||
package lz4
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[Block64Kb-65536]
|
||||
_ = x[Block256Kb-262144]
|
||||
_ = x[Block1Mb-1048576]
|
||||
_ = x[Block4Mb-4194304]
|
||||
}
|
||||
|
||||
const (
|
||||
_BlockSize_name_0 = "Block64Kb"
|
||||
_BlockSize_name_1 = "Block256Kb"
|
||||
_BlockSize_name_2 = "Block1Mb"
|
||||
_BlockSize_name_3 = "Block4Mb"
|
||||
)
|
||||
|
||||
func (i BlockSize) String() string {
|
||||
switch {
|
||||
case i == 65536:
|
||||
return _BlockSize_name_0
|
||||
case i == 262144:
|
||||
return _BlockSize_name_1
|
||||
case i == 1048576:
|
||||
return _BlockSize_name_2
|
||||
case i == 4194304:
|
||||
return _BlockSize_name_3
|
||||
default:
|
||||
return "BlockSize(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[Fast-0]
|
||||
_ = x[Level1-512]
|
||||
_ = x[Level2-1024]
|
||||
_ = x[Level3-2048]
|
||||
_ = x[Level4-4096]
|
||||
_ = x[Level5-8192]
|
||||
_ = x[Level6-16384]
|
||||
_ = x[Level7-32768]
|
||||
_ = x[Level8-65536]
|
||||
_ = x[Level9-131072]
|
||||
}
|
||||
|
||||
const (
|
||||
_CompressionLevel_name_0 = "Fast"
|
||||
_CompressionLevel_name_1 = "Level1"
|
||||
_CompressionLevel_name_2 = "Level2"
|
||||
_CompressionLevel_name_3 = "Level3"
|
||||
_CompressionLevel_name_4 = "Level4"
|
||||
_CompressionLevel_name_5 = "Level5"
|
||||
_CompressionLevel_name_6 = "Level6"
|
||||
_CompressionLevel_name_7 = "Level7"
|
||||
_CompressionLevel_name_8 = "Level8"
|
||||
_CompressionLevel_name_9 = "Level9"
|
||||
)
|
||||
|
||||
func (i CompressionLevel) String() string {
|
||||
switch {
|
||||
case i == 0:
|
||||
return _CompressionLevel_name_0
|
||||
case i == 512:
|
||||
return _CompressionLevel_name_1
|
||||
case i == 1024:
|
||||
return _CompressionLevel_name_2
|
||||
case i == 2048:
|
||||
return _CompressionLevel_name_3
|
||||
case i == 4096:
|
||||
return _CompressionLevel_name_4
|
||||
case i == 8192:
|
||||
return _CompressionLevel_name_5
|
||||
case i == 16384:
|
||||
return _CompressionLevel_name_6
|
||||
case i == 32768:
|
||||
return _CompressionLevel_name_7
|
||||
case i == 65536:
|
||||
return _CompressionLevel_name_8
|
||||
case i == 131072:
|
||||
return _CompressionLevel_name_9
|
||||
default:
|
||||
return "CompressionLevel(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,275 @@
|
|||
package lz4
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
|
||||
"github.com/pierrec/lz4/v4/internal/lz4block"
|
||||
"github.com/pierrec/lz4/v4/internal/lz4errors"
|
||||
"github.com/pierrec/lz4/v4/internal/lz4stream"
|
||||
)
|
||||
|
||||
var readerStates = []aState{
|
||||
noState: newState,
|
||||
errorState: newState,
|
||||
newState: readState,
|
||||
readState: closedState,
|
||||
closedState: newState,
|
||||
}
|
||||
|
||||
// NewReader returns a new LZ4 frame decoder.
|
||||
func NewReader(r io.Reader) *Reader {
|
||||
return newReader(r, false)
|
||||
}
|
||||
|
||||
func newReader(r io.Reader, legacy bool) *Reader {
|
||||
zr := &Reader{frame: lz4stream.NewFrame()}
|
||||
zr.state.init(readerStates)
|
||||
_ = zr.Apply(DefaultConcurrency, defaultOnBlockDone)
|
||||
zr.Reset(r)
|
||||
return zr
|
||||
}
|
||||
|
||||
// Reader allows reading an LZ4 stream.
|
||||
type Reader struct {
|
||||
state _State
|
||||
src io.Reader // source reader
|
||||
num int // concurrency level
|
||||
frame *lz4stream.Frame // frame being read
|
||||
data []byte // block buffer allocated in non concurrent mode
|
||||
reads chan []byte // pending data
|
||||
idx int // size of pending data
|
||||
handler func(int)
|
||||
cum uint32
|
||||
dict []byte
|
||||
}
|
||||
|
||||
func (*Reader) private() {}
|
||||
|
||||
func (r *Reader) Apply(options ...Option) (err error) {
|
||||
defer r.state.check(&err)
|
||||
switch r.state.state {
|
||||
case newState:
|
||||
case errorState:
|
||||
return r.state.err
|
||||
default:
|
||||
return lz4errors.ErrOptionClosedOrError
|
||||
}
|
||||
for _, o := range options {
|
||||
if err = o(r); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Size returns the size of the underlying uncompressed data, if set in the stream.
|
||||
func (r *Reader) Size() int {
|
||||
switch r.state.state {
|
||||
case readState, closedState:
|
||||
if r.frame.Descriptor.Flags.Size() {
|
||||
return int(r.frame.Descriptor.ContentSize)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (r *Reader) isNotConcurrent() bool {
|
||||
return r.num == 1
|
||||
}
|
||||
|
||||
func (r *Reader) init() error {
|
||||
err := r.frame.ParseHeaders(r.src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !r.frame.Descriptor.Flags.BlockIndependence() {
|
||||
// We can't decompress dependent blocks concurrently.
|
||||
// Instead of throwing an error to the user, silently drop concurrency
|
||||
r.num = 1
|
||||
}
|
||||
data, err := r.frame.InitR(r.src, r.num)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.reads = data
|
||||
r.idx = 0
|
||||
size := r.frame.Descriptor.Flags.BlockSizeIndex()
|
||||
r.data = size.Get()
|
||||
r.cum = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *Reader) Read(buf []byte) (n int, err error) {
|
||||
defer r.state.check(&err)
|
||||
switch r.state.state {
|
||||
case readState:
|
||||
case closedState, errorState:
|
||||
return 0, r.state.err
|
||||
case newState:
|
||||
// First initialization.
|
||||
if err = r.init(); r.state.next(err) {
|
||||
return
|
||||
}
|
||||
default:
|
||||
return 0, r.state.fail()
|
||||
}
|
||||
for len(buf) > 0 {
|
||||
var bn int
|
||||
if r.idx == 0 {
|
||||
if r.isNotConcurrent() {
|
||||
bn, err = r.read(buf)
|
||||
} else {
|
||||
lz4block.Put(r.data)
|
||||
r.data = <-r.reads
|
||||
if len(r.data) == 0 {
|
||||
// No uncompressed data: something went wrong or we are done.
|
||||
err = r.frame.Blocks.ErrorR()
|
||||
}
|
||||
}
|
||||
switch err {
|
||||
case nil:
|
||||
case io.EOF:
|
||||
if er := r.frame.CloseR(r.src); er != nil {
|
||||
err = er
|
||||
}
|
||||
lz4block.Put(r.data)
|
||||
r.data = nil
|
||||
return
|
||||
default:
|
||||
return
|
||||
}
|
||||
}
|
||||
if bn == 0 {
|
||||
// Fill buf with buffered data.
|
||||
bn = copy(buf, r.data[r.idx:])
|
||||
r.idx += bn
|
||||
if r.idx == len(r.data) {
|
||||
// All data read, get ready for the next Read.
|
||||
r.idx = 0
|
||||
}
|
||||
}
|
||||
buf = buf[bn:]
|
||||
n += bn
|
||||
r.handler(bn)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// read uncompresses the next block as follow:
|
||||
// - if buf has enough room, the block is uncompressed into it directly
|
||||
// and the lenght of used space is returned
|
||||
// - else, the uncompress data is stored in r.data and 0 is returned
|
||||
func (r *Reader) read(buf []byte) (int, error) {
|
||||
block := r.frame.Blocks.Block
|
||||
_, err := block.Read(r.frame, r.src, r.cum)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
var direct bool
|
||||
dst := r.data[:cap(r.data)]
|
||||
if len(buf) >= len(dst) {
|
||||
// Uncompress directly into buf.
|
||||
direct = true
|
||||
dst = buf
|
||||
}
|
||||
dst, err = block.Uncompress(r.frame, dst, r.dict, true)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if !r.frame.Descriptor.Flags.BlockIndependence() {
|
||||
if len(r.dict)+len(dst) > 128*1024 {
|
||||
preserveSize := 64*1024 - len(dst)
|
||||
if preserveSize < 0 {
|
||||
preserveSize = 0
|
||||
}
|
||||
r.dict = r.dict[len(r.dict)-preserveSize:]
|
||||
}
|
||||
r.dict = append(r.dict, dst...)
|
||||
}
|
||||
r.cum += uint32(len(dst))
|
||||
if direct {
|
||||
return len(dst), nil
|
||||
}
|
||||
r.data = dst
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// Reset clears the state of the Reader r such that it is equivalent to its
|
||||
// initial state from NewReader, but instead reading from reader.
|
||||
// No access to reader is performed.
|
||||
func (r *Reader) Reset(reader io.Reader) {
|
||||
if r.data != nil {
|
||||
lz4block.Put(r.data)
|
||||
r.data = nil
|
||||
}
|
||||
r.frame.Reset(r.num)
|
||||
r.state.reset()
|
||||
r.src = reader
|
||||
r.reads = nil
|
||||
}
|
||||
|
||||
// WriteTo efficiently uncompresses the data from the Reader underlying source to w.
|
||||
func (r *Reader) WriteTo(w io.Writer) (n int64, err error) {
|
||||
switch r.state.state {
|
||||
case closedState, errorState:
|
||||
return 0, r.state.err
|
||||
case newState:
|
||||
if err = r.init(); r.state.next(err) {
|
||||
return
|
||||
}
|
||||
default:
|
||||
return 0, r.state.fail()
|
||||
}
|
||||
defer r.state.nextd(&err)
|
||||
|
||||
var data []byte
|
||||
if r.isNotConcurrent() {
|
||||
size := r.frame.Descriptor.Flags.BlockSizeIndex()
|
||||
data = size.Get()
|
||||
defer lz4block.Put(data)
|
||||
}
|
||||
for {
|
||||
var bn int
|
||||
var dst []byte
|
||||
if r.isNotConcurrent() {
|
||||
bn, err = r.read(data)
|
||||
dst = data[:bn]
|
||||
} else {
|
||||
lz4block.Put(dst)
|
||||
dst = <-r.reads
|
||||
bn = len(dst)
|
||||
if bn == 0 {
|
||||
// No uncompressed data: something went wrong or we are done.
|
||||
err = r.frame.Blocks.ErrorR()
|
||||
}
|
||||
}
|
||||
switch err {
|
||||
case nil:
|
||||
case io.EOF:
|
||||
err = r.frame.CloseR(r.src)
|
||||
return
|
||||
default:
|
||||
return
|
||||
}
|
||||
r.handler(bn)
|
||||
bn, err = w.Write(dst)
|
||||
n += int64(bn)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ValidFrameHeader returns a bool indicating if the given bytes slice matches a LZ4 header.
|
||||
func ValidFrameHeader(in []byte) (bool, error) {
|
||||
f := lz4stream.NewFrame()
|
||||
err := f.ParseHeaders(bytes.NewReader(in))
|
||||
if err == nil {
|
||||
return true, nil
|
||||
}
|
||||
if err == lz4errors.ErrInvalidFrame {
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
package lz4
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/pierrec/lz4/v4/internal/lz4errors"
|
||||
)
|
||||
|
||||
//go:generate go run golang.org/x/tools/cmd/stringer -type=aState -output state_gen.go
|
||||
|
||||
const (
|
||||
noState aState = iota // uninitialized reader
|
||||
errorState // unrecoverable error encountered
|
||||
newState // instantiated object
|
||||
readState // reading data
|
||||
writeState // writing data
|
||||
closedState // all done
|
||||
)
|
||||
|
||||
type (
|
||||
aState uint8
|
||||
_State struct {
|
||||
states []aState
|
||||
state aState
|
||||
err error
|
||||
}
|
||||
)
|
||||
|
||||
func (s *_State) init(states []aState) {
|
||||
s.states = states
|
||||
s.state = states[0]
|
||||
}
|
||||
|
||||
func (s *_State) reset() {
|
||||
s.state = s.states[0]
|
||||
s.err = nil
|
||||
}
|
||||
|
||||
// next sets the state to the next one unless it is passed a non nil error.
|
||||
// It returns whether or not it is in error.
|
||||
func (s *_State) next(err error) bool {
|
||||
if err != nil {
|
||||
s.err = fmt.Errorf("%s: %w", s.state, err)
|
||||
s.state = errorState
|
||||
return true
|
||||
}
|
||||
s.state = s.states[s.state]
|
||||
return false
|
||||
}
|
||||
|
||||
// nextd is like next but for defers.
|
||||
func (s *_State) nextd(errp *error) bool {
|
||||
return errp != nil && s.next(*errp)
|
||||
}
|
||||
|
||||
// check sets s in error if not already in error and if the error is not nil or io.EOF,
|
||||
func (s *_State) check(errp *error) {
|
||||
if s.state == errorState || errp == nil {
|
||||
return
|
||||
}
|
||||
if err := *errp; err != nil {
|
||||
s.err = fmt.Errorf("%w[%s]", err, s.state)
|
||||
if !errors.Is(err, io.EOF) {
|
||||
s.state = errorState
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *_State) fail() error {
|
||||
s.state = errorState
|
||||
s.err = fmt.Errorf("%w[%s]", lz4errors.ErrInternalUnhandledState, s.state)
|
||||
return s.err
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
// Code generated by "stringer -type=aState -output state_gen.go"; DO NOT EDIT.
|
||||
|
||||
package lz4
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[noState-0]
|
||||
_ = x[errorState-1]
|
||||
_ = x[newState-2]
|
||||
_ = x[readState-3]
|
||||
_ = x[writeState-4]
|
||||
_ = x[closedState-5]
|
||||
}
|
||||
|
||||
const _aState_name = "noStateerrorStatenewStatereadStatewriteStateclosedState"
|
||||
|
||||
var _aState_index = [...]uint8{0, 7, 17, 25, 34, 44, 55}
|
||||
|
||||
func (i aState) String() string {
|
||||
if i >= aState(len(_aState_index)-1) {
|
||||
return "aState(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _aState_name[_aState_index[i]:_aState_index[i+1]]
|
||||
}
|
|
@ -0,0 +1,242 @@
|
|||
package lz4
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/pierrec/lz4/v4/internal/lz4block"
|
||||
"github.com/pierrec/lz4/v4/internal/lz4errors"
|
||||
"github.com/pierrec/lz4/v4/internal/lz4stream"
|
||||
)
|
||||
|
||||
var writerStates = []aState{
|
||||
noState: newState,
|
||||
newState: writeState,
|
||||
writeState: closedState,
|
||||
closedState: newState,
|
||||
errorState: newState,
|
||||
}
|
||||
|
||||
// NewWriter returns a new LZ4 frame encoder.
|
||||
func NewWriter(w io.Writer) *Writer {
|
||||
zw := &Writer{frame: lz4stream.NewFrame()}
|
||||
zw.state.init(writerStates)
|
||||
_ = zw.Apply(DefaultBlockSizeOption, DefaultChecksumOption, DefaultConcurrency, defaultOnBlockDone)
|
||||
zw.Reset(w)
|
||||
return zw
|
||||
}
|
||||
|
||||
// Writer allows writing an LZ4 stream.
|
||||
type Writer struct {
|
||||
state _State
|
||||
src io.Writer // destination writer
|
||||
level lz4block.CompressionLevel // how hard to try
|
||||
num int // concurrency level
|
||||
frame *lz4stream.Frame // frame being built
|
||||
data []byte // pending data
|
||||
idx int // size of pending data
|
||||
handler func(int)
|
||||
legacy bool
|
||||
}
|
||||
|
||||
func (*Writer) private() {}
|
||||
|
||||
func (w *Writer) Apply(options ...Option) (err error) {
|
||||
defer w.state.check(&err)
|
||||
switch w.state.state {
|
||||
case newState:
|
||||
case errorState:
|
||||
return w.state.err
|
||||
default:
|
||||
return lz4errors.ErrOptionClosedOrError
|
||||
}
|
||||
w.Reset(w.src)
|
||||
for _, o := range options {
|
||||
if err = o(w); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (w *Writer) isNotConcurrent() bool {
|
||||
return w.num == 1
|
||||
}
|
||||
|
||||
// init sets up the Writer when in newState. It does not change the Writer state.
|
||||
func (w *Writer) init() error {
|
||||
w.frame.InitW(w.src, w.num, w.legacy)
|
||||
size := w.frame.Descriptor.Flags.BlockSizeIndex()
|
||||
w.data = size.Get()
|
||||
w.idx = 0
|
||||
return w.frame.Descriptor.Write(w.frame, w.src)
|
||||
}
|
||||
|
||||
func (w *Writer) Write(buf []byte) (n int, err error) {
|
||||
defer w.state.check(&err)
|
||||
switch w.state.state {
|
||||
case writeState:
|
||||
case closedState, errorState:
|
||||
return 0, w.state.err
|
||||
case newState:
|
||||
if err = w.init(); w.state.next(err) {
|
||||
return
|
||||
}
|
||||
default:
|
||||
return 0, w.state.fail()
|
||||
}
|
||||
|
||||
zn := len(w.data)
|
||||
for len(buf) > 0 {
|
||||
if w.isNotConcurrent() && w.idx == 0 && len(buf) >= zn {
|
||||
// Avoid a copy as there is enough data for a block.
|
||||
if err = w.write(buf[:zn], false); err != nil {
|
||||
return
|
||||
}
|
||||
n += zn
|
||||
buf = buf[zn:]
|
||||
continue
|
||||
}
|
||||
// Accumulate the data to be compressed.
|
||||
m := copy(w.data[w.idx:], buf)
|
||||
n += m
|
||||
w.idx += m
|
||||
buf = buf[m:]
|
||||
|
||||
if w.idx < len(w.data) {
|
||||
// Buffer not filled.
|
||||
return
|
||||
}
|
||||
|
||||
// Buffer full.
|
||||
if err = w.write(w.data, true); err != nil {
|
||||
return
|
||||
}
|
||||
if !w.isNotConcurrent() {
|
||||
size := w.frame.Descriptor.Flags.BlockSizeIndex()
|
||||
w.data = size.Get()
|
||||
}
|
||||
w.idx = 0
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (w *Writer) write(data []byte, safe bool) error {
|
||||
if w.isNotConcurrent() {
|
||||
block := w.frame.Blocks.Block
|
||||
err := block.Compress(w.frame, data, w.level).Write(w.frame, w.src)
|
||||
w.handler(len(block.Data))
|
||||
return err
|
||||
}
|
||||
c := make(chan *lz4stream.FrameDataBlock)
|
||||
w.frame.Blocks.Blocks <- c
|
||||
go func(c chan *lz4stream.FrameDataBlock, data []byte, safe bool) {
|
||||
b := lz4stream.NewFrameDataBlock(w.frame)
|
||||
c <- b.Compress(w.frame, data, w.level)
|
||||
<-c
|
||||
w.handler(len(b.Data))
|
||||
b.Close(w.frame)
|
||||
if safe {
|
||||
// safe to put it back as the last usage of it was FrameDataBlock.Write() called before c is closed
|
||||
lz4block.Put(data)
|
||||
}
|
||||
}(c, data, safe)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Flush any buffered data to the underlying writer immediately.
|
||||
func (w *Writer) Flush() (err error) {
|
||||
switch w.state.state {
|
||||
case writeState:
|
||||
case errorState:
|
||||
return w.state.err
|
||||
case newState:
|
||||
if err = w.init(); w.state.next(err) {
|
||||
return
|
||||
}
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
|
||||
if w.idx > 0 {
|
||||
// Flush pending data, disable w.data freeing as it is done later on.
|
||||
if err = w.write(w.data[:w.idx], false); err != nil {
|
||||
return err
|
||||
}
|
||||
w.idx = 0
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes the Writer, flushing any unwritten data to the underlying writer
|
||||
// without closing it.
|
||||
func (w *Writer) Close() error {
|
||||
if err := w.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
err := w.frame.CloseW(w.src, w.num)
|
||||
// It is now safe to free the buffer.
|
||||
if w.data != nil {
|
||||
lz4block.Put(w.data)
|
||||
w.data = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Reset clears the state of the Writer w such that it is equivalent to its
|
||||
// initial state from NewWriter, but instead writing to writer.
|
||||
// Reset keeps the previous options unless overwritten by the supplied ones.
|
||||
// No access to writer is performed.
|
||||
//
|
||||
// w.Close must be called before Reset or pending data may be dropped.
|
||||
func (w *Writer) Reset(writer io.Writer) {
|
||||
w.frame.Reset(w.num)
|
||||
w.state.reset()
|
||||
w.src = writer
|
||||
}
|
||||
|
||||
// ReadFrom efficiently reads from r and compressed into the Writer destination.
|
||||
func (w *Writer) ReadFrom(r io.Reader) (n int64, err error) {
|
||||
switch w.state.state {
|
||||
case closedState, errorState:
|
||||
return 0, w.state.err
|
||||
case newState:
|
||||
if err = w.init(); w.state.next(err) {
|
||||
return
|
||||
}
|
||||
default:
|
||||
return 0, w.state.fail()
|
||||
}
|
||||
defer w.state.check(&err)
|
||||
|
||||
size := w.frame.Descriptor.Flags.BlockSizeIndex()
|
||||
var done bool
|
||||
var rn int
|
||||
data := size.Get()
|
||||
if w.isNotConcurrent() {
|
||||
// Keep the same buffer for the whole process.
|
||||
defer lz4block.Put(data)
|
||||
}
|
||||
for !done {
|
||||
rn, err = io.ReadFull(r, data)
|
||||
switch err {
|
||||
case nil:
|
||||
case io.EOF, io.ErrUnexpectedEOF: // read may be partial
|
||||
done = true
|
||||
default:
|
||||
return
|
||||
}
|
||||
n += int64(rn)
|
||||
err = w.write(data[:rn], true)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
w.handler(rn)
|
||||
if !done && !w.isNotConcurrent() {
|
||||
// The buffer will be returned automatically by go routines (safe=true)
|
||||
// so get a new one fo the next round.
|
||||
data = size.Get()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
|
@ -8,6 +8,9 @@ github.com/alecthomas/chroma/v2/styles
|
|||
## explicit; go 1.13
|
||||
github.com/dlclark/regexp2
|
||||
github.com/dlclark/regexp2/syntax
|
||||
# github.com/golang/snappy v0.0.4
|
||||
## explicit
|
||||
github.com/golang/snappy
|
||||
# github.com/inconshreveable/mousetrap v1.1.0
|
||||
## explicit; go 1.18
|
||||
github.com/inconshreveable/mousetrap
|
||||
|
@ -23,6 +26,13 @@ github.com/klauspost/compress/internal/cpuinfo
|
|||
github.com/klauspost/compress/internal/snapref
|
||||
github.com/klauspost/compress/zstd
|
||||
github.com/klauspost/compress/zstd/internal/xxhash
|
||||
# github.com/pierrec/lz4/v4 v4.1.19
|
||||
## explicit; go 1.14
|
||||
github.com/pierrec/lz4/v4
|
||||
github.com/pierrec/lz4/v4/internal/lz4block
|
||||
github.com/pierrec/lz4/v4/internal/lz4errors
|
||||
github.com/pierrec/lz4/v4/internal/lz4stream
|
||||
github.com/pierrec/lz4/v4/internal/xxh32
|
||||
# github.com/spf13/cobra v1.8.0
|
||||
## explicit; go 1.15
|
||||
github.com/spf13/cobra
|
||||
|
|
Loading…
Reference in New Issue