Remove snappy, lz4, lzw, deflate, and gzip compression options

This commit is contained in:
Seednode 2024-01-09 13:39:08 -06:00
parent 656c74b883
commit 431de92bbb
49 changed files with 13 additions and 8051 deletions

View File

@ -71,7 +71,7 @@ The index file consists of (optionally compressed) [gobs](https://pkg.go.dev/enc
The compression format can be specified via the `--compression` flag. The compression format can be specified via the `--compression` flag.
Supported formats are `flate`, `gzip`, `lz4`, `lzw`, `none`, `snappy`, `zlib`, and `zstd`. Supported formats are `none`, `zlib`, and `zstd`.
Optionally, `--compression-fast` can be used to use the fastest instead of the best compression mode. Optionally, `--compression-fast` can be used to use the fastest instead of the best compression mode.
@ -153,7 +153,7 @@ Flags:
--case-sensitive use case-sensitive matching for filters --case-sensitive use case-sensitive matching for filters
--code enable support for source code files --code enable support for source code files
--code-theme string theme for source code syntax highlighting (default "solarized-dark256") --code-theme string theme for source code syntax highlighting (default "solarized-dark256")
--compression string compression format to use for index (flate, gzip, lz4, lzw, none, snappy, zlib, zstd) (default "zstd") --compression string compression format to use for index (none, zlib, zstd) (default "zstd")
--compression-fast use fastest compression level (default is best) --compression-fast use fastest compression level (default is best)
--concurrency int maximum concurrency for scan threads (default 2147483647) --concurrency int maximum concurrency for scan threads (default 2147483647)
-d, --debug display even more verbose logs -d, --debug display even more verbose logs

View File

@ -16,7 +16,7 @@ import (
var ( var (
ErrInvalidAdminPrefix = errors.New("admin path must match the pattern " + AllowedCharacters) ErrInvalidAdminPrefix = errors.New("admin path must match the pattern " + AllowedCharacters)
ErrInvalidCompression = errors.New("supported compression formats: flate, gzip, lz4, lzw, none, snappy, zlib, zstd") ErrInvalidCompression = errors.New("supported compression formats: none, zlib, zstd")
ErrInvalidConcurrency = errors.New("concurrency limit must be a positive integer") ErrInvalidConcurrency = errors.New("concurrency limit must be a positive integer")
ErrInvalidFileCountRange = errors.New("maximum file count limit must be greater than or equal to minimum file count limit") ErrInvalidFileCountRange = errors.New("maximum file count limit must be greater than or equal to minimum file count limit")
ErrInvalidFileCountValue = errors.New("file count limits must be non-negative integers no greater than 2147483647") ErrInvalidFileCountValue = errors.New("file count limits must be non-negative integers no greater than 2147483647")

View File

@ -5,9 +5,6 @@ Copyright © 2023 Seednode <seednode@seedno.de>
package cmd package cmd
import ( import (
"compress/flate"
"compress/gzip"
"compress/lzw"
"compress/zlib" "compress/zlib"
"encoding/gob" "encoding/gob"
"fmt" "fmt"
@ -17,20 +14,13 @@ import (
"sync" "sync"
"time" "time"
"github.com/golang/snappy"
"github.com/julienschmidt/httprouter" "github.com/julienschmidt/httprouter"
"github.com/klauspost/compress/zstd" "github.com/klauspost/compress/zstd"
"github.com/pierrec/lz4/v4"
"seedno.de/seednode/roulette/types" "seedno.de/seednode/roulette/types"
) )
var CompressionFormats = []string{ var CompressionFormats = []string{
"flate",
"gzip",
"lz4",
"lzw",
"none", "none",
"snappy",
"zlib", "zlib",
"zstd", "zstd",
} }
@ -104,24 +94,19 @@ func (index *fileIndex) isEmpty() bool {
return length == 0 return length == 0
} }
func getReader(format string, file io.Reader) (io.Reader, error) { func getReader(format string, file io.Reader) (io.ReadCloser, error) {
switch format { switch format {
case "flate":
return flate.NewReader(file), nil
case "gzip":
return gzip.NewReader(file)
case "lz4":
return lz4.NewReader(file), nil
case "lzw":
return lzw.NewReader(file, lzw.MSB, 8), nil
case "none": case "none":
return io.NopCloser(file), nil return io.NopCloser(file), nil
case "snappy":
return snappy.NewReader(file), nil
case "zlib": case "zlib":
return zlib.NewReader(file) return zlib.NewReader(file)
case "zstd": case "zstd":
return zstd.NewReader(file) reader, err := zstd.NewReader(file)
if err != nil {
return io.NopCloser(file), err
}
return reader.IOReadCloser(), nil
} }
return io.NopCloser(file), ErrInvalidCompression return io.NopCloser(file), ErrInvalidCompression
@ -129,38 +114,8 @@ func getReader(format string, file io.Reader) (io.Reader, error) {
func getWriter(format string, file io.WriteCloser) (io.WriteCloser, error) { func getWriter(format string, file io.WriteCloser) (io.WriteCloser, error) {
switch { switch {
case format == "flate" && CompressionFast:
return flate.NewWriter(file, flate.BestCompression)
case format == "flate":
return flate.NewWriter(file, flate.BestSpeed)
case format == "gzip" && CompressionFast:
return gzip.NewWriterLevel(file, gzip.BestSpeed)
case format == "gzip":
return gzip.NewWriterLevel(file, gzip.BestCompression)
case format == "lz4" && CompressionFast:
encoder := lz4.NewWriter(file)
err := encoder.Apply(lz4.CompressionLevelOption(lz4.Fast))
if err != nil {
return file, err
}
return encoder, nil
case format == "lz4":
encoder := lz4.NewWriter(file)
err := encoder.Apply(lz4.CompressionLevelOption(lz4.Level9))
if err != nil {
return file, err
}
return encoder, nil
case format == "lzw":
return lzw.NewWriter(file, lzw.MSB, 8), nil
case format == "none": case format == "none":
return file, nil return file, nil
case format == "snappy":
return snappy.NewBufferedWriter(file), nil
case format == "zlib" && CompressionFast: case format == "zlib" && CompressionFast:
return zlib.NewWriterLevel(file, zlib.BestSpeed) return zlib.NewWriterLevel(file, zlib.BestSpeed)
case format == "zlib": case format == "zlib":
@ -245,6 +200,7 @@ func (index *fileIndex) Import(path string) error {
if err != nil { if err != nil {
return err return err
} }
defer reader.Close()
dec := gob.NewDecoder(reader) dec := gob.NewDecoder(reader)

View File

@ -17,7 +17,7 @@ import (
const ( const (
AllowedCharacters string = `^[A-z0-9.\-_]+$` AllowedCharacters string = `^[A-z0-9.\-_]+$`
ReleaseVersion string = "4.1.0" ReleaseVersion string = "5.0.0"
) )
var ( var (
@ -130,7 +130,7 @@ func init() {
rootCmd.Flags().BoolVar(&CaseSensitive, "case-sensitive", false, "use case-sensitive matching for filters") rootCmd.Flags().BoolVar(&CaseSensitive, "case-sensitive", false, "use case-sensitive matching for filters")
rootCmd.Flags().BoolVar(&Code, "code", false, "enable support for source code files") rootCmd.Flags().BoolVar(&Code, "code", false, "enable support for source code files")
rootCmd.Flags().StringVar(&CodeTheme, "code-theme", "solarized-dark256", "theme for source code syntax highlighting") rootCmd.Flags().StringVar(&CodeTheme, "code-theme", "solarized-dark256", "theme for source code syntax highlighting")
rootCmd.Flags().StringVar(&Compression, "compression", "zstd", "compression format to use for index (flate, gzip, lz4, lzw, none, snappy, zlib, zstd)") rootCmd.Flags().StringVar(&Compression, "compression", "zstd", "compression format to use for index (none, zlib, zstd)")
rootCmd.Flags().BoolVar(&CompressionFast, "compression-fast", false, "use fastest compression level (default is best)") rootCmd.Flags().BoolVar(&CompressionFast, "compression-fast", false, "use fastest compression level (default is best)")
rootCmd.Flags().IntVar(&Concurrency, "concurrency", math.MaxInt32, "maximum concurrency for scan threads") rootCmd.Flags().IntVar(&Concurrency, "concurrency", math.MaxInt32, "maximum concurrency for scan threads")
rootCmd.Flags().BoolVarP(&Debug, "debug", "d", false, "display even more verbose logs") rootCmd.Flags().BoolVarP(&Debug, "debug", "d", false, "display even more verbose logs")

2
go.mod
View File

@ -4,10 +4,8 @@ go 1.21
require ( require (
github.com/alecthomas/chroma/v2 v2.12.0 github.com/alecthomas/chroma/v2 v2.12.0
github.com/golang/snappy v0.0.4
github.com/julienschmidt/httprouter v1.3.0 github.com/julienschmidt/httprouter v1.3.0
github.com/klauspost/compress v1.17.4 github.com/klauspost/compress v1.17.4
github.com/pierrec/lz4/v4 v4.1.21
github.com/spf13/cobra v1.8.0 github.com/spf13/cobra v1.8.0
github.com/yosssi/gohtml v0.0.0-20201013000340-ee4748c638f4 github.com/yosssi/gohtml v0.0.0-20201013000340-ee4748c638f4
golang.org/x/image v0.15.0 golang.org/x/image v0.15.0

4
go.sum
View File

@ -7,8 +7,6 @@ github.com/alecthomas/repr v0.2.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW5
github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@ -17,8 +15,6 @@ github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4d
github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=

View File

@ -1,16 +0,0 @@
cmd/snappytool/snappytool
testdata/bench
# These explicitly listed benchmark data files are for an obsolete version of
# snappy_test.go.
testdata/alice29.txt
testdata/asyoulik.txt
testdata/fireworks.jpeg
testdata/geo.protodata
testdata/html
testdata/html_x_4
testdata/kppkn.gtb
testdata/lcet10.txt
testdata/paper-100k.pdf
testdata/plrabn12.txt
testdata/urls.10K

View File

@ -1,18 +0,0 @@
# This is the official list of Snappy-Go authors for copyright purposes.
# This file is distinct from the CONTRIBUTORS files.
# See the latter for an explanation.
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# Please keep the list sorted.
Amazon.com, Inc
Damian Gryski <dgryski@gmail.com>
Eric Buth <eric@topos.com>
Google Inc.
Jan Mercl <0xjnml@gmail.com>
Klaus Post <klauspost@gmail.com>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Sebastien Binet <seb.binet@gmail.com>

View File

@ -1,41 +0,0 @@
# This is the official list of people who can contribute
# (and typically have contributed) code to the Snappy-Go repository.
# The AUTHORS file lists the copyright holders; this file
# lists people. For example, Google employees are listed here
# but not in AUTHORS, because Google holds the copyright.
#
# The submission process automatically checks to make sure
# that people submitting code are listed in this file (by email address).
#
# Names should be added to this file only after verifying that
# the individual or the individual's organization has agreed to
# the appropriate Contributor License Agreement, found here:
#
# http://code.google.com/legal/individual-cla-v1.0.html
# http://code.google.com/legal/corporate-cla-v1.0.html
#
# The agreement for individuals can be filled out on the web.
#
# When adding J Random Contributor's name to this file,
# either J's name or J's organization's name should be
# added to the AUTHORS file, depending on whether the
# individual or corporate CLA was used.
# Names should be added to this file like so:
# Name <email address>
# Please keep the list sorted.
Alex Legg <alexlegg@google.com>
Damian Gryski <dgryski@gmail.com>
Eric Buth <eric@topos.com>
Jan Mercl <0xjnml@gmail.com>
Jonathan Swinney <jswinney@amazon.com>
Kai Backman <kaib@golang.org>
Klaus Post <klauspost@gmail.com>
Marc-Antoine Ruel <maruel@chromium.org>
Nigel Tao <nigeltao@golang.org>
Rob Pike <r@golang.org>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Russ Cox <rsc@golang.org>
Sebastien Binet <seb.binet@gmail.com>

View File

@ -1,27 +0,0 @@
Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,107 +0,0 @@
The Snappy compression format in the Go programming language.
To download and install from source:
$ go get github.com/golang/snappy
Unless otherwise noted, the Snappy-Go source files are distributed
under the BSD-style license found in the LICENSE file.
Benchmarks.
The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
or so files, the same set used by the C++ Snappy code (github.com/google/snappy
and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
"go test -test.bench=."
_UFlat0-8 2.19GB/s ± 0% html
_UFlat1-8 1.41GB/s ± 0% urls
_UFlat2-8 23.5GB/s ± 2% jpg
_UFlat3-8 1.91GB/s ± 0% jpg_200
_UFlat4-8 14.0GB/s ± 1% pdf
_UFlat5-8 1.97GB/s ± 0% html4
_UFlat6-8 814MB/s ± 0% txt1
_UFlat7-8 785MB/s ± 0% txt2
_UFlat8-8 857MB/s ± 0% txt3
_UFlat9-8 719MB/s ± 1% txt4
_UFlat10-8 2.84GB/s ± 0% pb
_UFlat11-8 1.05GB/s ± 0% gaviota
_ZFlat0-8 1.04GB/s ± 0% html
_ZFlat1-8 534MB/s ± 0% urls
_ZFlat2-8 15.7GB/s ± 1% jpg
_ZFlat3-8 740MB/s ± 3% jpg_200
_ZFlat4-8 9.20GB/s ± 1% pdf
_ZFlat5-8 991MB/s ± 0% html4
_ZFlat6-8 379MB/s ± 0% txt1
_ZFlat7-8 352MB/s ± 0% txt2
_ZFlat8-8 396MB/s ± 1% txt3
_ZFlat9-8 327MB/s ± 1% txt4
_ZFlat10-8 1.33GB/s ± 1% pb
_ZFlat11-8 605MB/s ± 1% gaviota
"go test -test.bench=. -tags=noasm"
_UFlat0-8 621MB/s ± 2% html
_UFlat1-8 494MB/s ± 1% urls
_UFlat2-8 23.2GB/s ± 1% jpg
_UFlat3-8 1.12GB/s ± 1% jpg_200
_UFlat4-8 4.35GB/s ± 1% pdf
_UFlat5-8 609MB/s ± 0% html4
_UFlat6-8 296MB/s ± 0% txt1
_UFlat7-8 288MB/s ± 0% txt2
_UFlat8-8 309MB/s ± 1% txt3
_UFlat9-8 280MB/s ± 1% txt4
_UFlat10-8 753MB/s ± 0% pb
_UFlat11-8 400MB/s ± 0% gaviota
_ZFlat0-8 409MB/s ± 1% html
_ZFlat1-8 250MB/s ± 1% urls
_ZFlat2-8 12.3GB/s ± 1% jpg
_ZFlat3-8 132MB/s ± 0% jpg_200
_ZFlat4-8 2.92GB/s ± 0% pdf
_ZFlat5-8 405MB/s ± 1% html4
_ZFlat6-8 179MB/s ± 1% txt1
_ZFlat7-8 170MB/s ± 1% txt2
_ZFlat8-8 189MB/s ± 1% txt3
_ZFlat9-8 164MB/s ± 1% txt4
_ZFlat10-8 479MB/s ± 1% pb
_ZFlat11-8 270MB/s ± 1% gaviota
For comparison (Go's encoded output is byte-for-byte identical to C++'s), here
are the numbers from C++ Snappy's
make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log
BM_UFlat/0 2.4GB/s html
BM_UFlat/1 1.4GB/s urls
BM_UFlat/2 21.8GB/s jpg
BM_UFlat/3 1.5GB/s jpg_200
BM_UFlat/4 13.3GB/s pdf
BM_UFlat/5 2.1GB/s html4
BM_UFlat/6 1.0GB/s txt1
BM_UFlat/7 959.4MB/s txt2
BM_UFlat/8 1.0GB/s txt3
BM_UFlat/9 864.5MB/s txt4
BM_UFlat/10 2.9GB/s pb
BM_UFlat/11 1.2GB/s gaviota
BM_ZFlat/0 944.3MB/s html (22.31 %)
BM_ZFlat/1 501.6MB/s urls (47.78 %)
BM_ZFlat/2 14.3GB/s jpg (99.95 %)
BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %)
BM_ZFlat/4 8.3GB/s pdf (83.30 %)
BM_ZFlat/5 903.5MB/s html4 (22.52 %)
BM_ZFlat/6 336.0MB/s txt1 (57.88 %)
BM_ZFlat/7 312.3MB/s txt2 (61.91 %)
BM_ZFlat/8 353.1MB/s txt3 (54.99 %)
BM_ZFlat/9 289.9MB/s txt4 (66.26 %)
BM_ZFlat/10 1.2GB/s pb (19.68 %)
BM_ZFlat/11 527.4MB/s gaviota (37.72 %)

View File

@ -1,264 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"encoding/binary"
"errors"
"io"
)
var (
// ErrCorrupt reports that the input is invalid.
ErrCorrupt = errors.New("snappy: corrupt input")
// ErrTooLarge reports that the uncompressed length is too large.
ErrTooLarge = errors.New("snappy: decoded block is too large")
// ErrUnsupported reports that the input isn't supported.
ErrUnsupported = errors.New("snappy: unsupported input")
errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
)
// DecodedLen returns the length of the decoded block.
func DecodedLen(src []byte) (int, error) {
v, _, err := decodedLen(src)
return v, err
}
// decodedLen returns the length of the decoded block and the number of bytes
// that the length header occupied.
func decodedLen(src []byte) (blockLen, headerLen int, err error) {
v, n := binary.Uvarint(src)
if n <= 0 || v > 0xffffffff {
return 0, 0, ErrCorrupt
}
const wordSize = 32 << (^uint(0) >> 32 & 1)
if wordSize == 32 && v > 0x7fffffff {
return 0, 0, ErrTooLarge
}
return int(v), n, nil
}
const (
decodeErrCodeCorrupt = 1
decodeErrCodeUnsupportedLiteralLength = 2
)
// Decode returns the decoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire decoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// Decode handles the Snappy block format, not the Snappy stream format.
func Decode(dst, src []byte) ([]byte, error) {
dLen, s, err := decodedLen(src)
if err != nil {
return nil, err
}
if dLen <= len(dst) {
dst = dst[:dLen]
} else {
dst = make([]byte, dLen)
}
switch decode(dst, src[s:]) {
case 0:
return dst, nil
case decodeErrCodeUnsupportedLiteralLength:
return nil, errUnsupportedLiteralLength
}
return nil, ErrCorrupt
}
// NewReader returns a new Reader that decompresses from r, using the framing
// format described at
// https://github.com/google/snappy/blob/master/framing_format.txt
func NewReader(r io.Reader) *Reader {
return &Reader{
r: r,
decoded: make([]byte, maxBlockSize),
buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize),
}
}
// Reader is an io.Reader that can read Snappy-compressed bytes.
//
// Reader handles the Snappy stream format, not the Snappy block format.
type Reader struct {
r io.Reader
err error
decoded []byte
buf []byte
// decoded[i:j] contains decoded bytes that have not yet been passed on.
i, j int
readHeader bool
}
// Reset discards any buffered data, resets all state, and switches the Snappy
// reader to read from r. This permits reusing a Reader rather than allocating
// a new one.
func (r *Reader) Reset(reader io.Reader) {
r.r = reader
r.err = nil
r.i = 0
r.j = 0
r.readHeader = false
}
func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
if _, r.err = io.ReadFull(r.r, p); r.err != nil {
if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
r.err = ErrCorrupt
}
return false
}
return true
}
func (r *Reader) fill() error {
for r.i >= r.j {
if !r.readFull(r.buf[:4], true) {
return r.err
}
chunkType := r.buf[0]
if !r.readHeader {
if chunkType != chunkTypeStreamIdentifier {
r.err = ErrCorrupt
return r.err
}
r.readHeader = true
}
chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
if chunkLen > len(r.buf) {
r.err = ErrUnsupported
return r.err
}
// The chunk types are specified at
// https://github.com/google/snappy/blob/master/framing_format.txt
switch chunkType {
case chunkTypeCompressedData:
// Section 4.2. Compressed data (chunk type 0x00).
if chunkLen < checksumSize {
r.err = ErrCorrupt
return r.err
}
buf := r.buf[:chunkLen]
if !r.readFull(buf, false) {
return r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
buf = buf[checksumSize:]
n, err := DecodedLen(buf)
if err != nil {
r.err = err
return r.err
}
if n > len(r.decoded) {
r.err = ErrCorrupt
return r.err
}
if _, err := Decode(r.decoded, buf); err != nil {
r.err = err
return r.err
}
if crc(r.decoded[:n]) != checksum {
r.err = ErrCorrupt
return r.err
}
r.i, r.j = 0, n
continue
case chunkTypeUncompressedData:
// Section 4.3. Uncompressed data (chunk type 0x01).
if chunkLen < checksumSize {
r.err = ErrCorrupt
return r.err
}
buf := r.buf[:checksumSize]
if !r.readFull(buf, false) {
return r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
// Read directly into r.decoded instead of via r.buf.
n := chunkLen - checksumSize
if n > len(r.decoded) {
r.err = ErrCorrupt
return r.err
}
if !r.readFull(r.decoded[:n], false) {
return r.err
}
if crc(r.decoded[:n]) != checksum {
r.err = ErrCorrupt
return r.err
}
r.i, r.j = 0, n
continue
case chunkTypeStreamIdentifier:
// Section 4.1. Stream identifier (chunk type 0xff).
if chunkLen != len(magicBody) {
r.err = ErrCorrupt
return r.err
}
if !r.readFull(r.buf[:len(magicBody)], false) {
return r.err
}
for i := 0; i < len(magicBody); i++ {
if r.buf[i] != magicBody[i] {
r.err = ErrCorrupt
return r.err
}
}
continue
}
if chunkType <= 0x7f {
// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
r.err = ErrUnsupported
return r.err
}
// Section 4.4 Padding (chunk type 0xfe).
// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
if !r.readFull(r.buf[:chunkLen], false) {
return r.err
}
}
return nil
}
// Read satisfies the io.Reader interface.
func (r *Reader) Read(p []byte) (int, error) {
if r.err != nil {
return 0, r.err
}
if err := r.fill(); err != nil {
return 0, err
}
n := copy(p, r.decoded[r.i:r.j])
r.i += n
return n, nil
}
// ReadByte satisfies the io.ByteReader interface.
func (r *Reader) ReadByte() (byte, error) {
if r.err != nil {
return 0, r.err
}
if err := r.fill(); err != nil {
return 0, err
}
c := r.decoded[r.i]
r.i++
return c, nil
}

View File

@ -1,490 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// The asm code generally follows the pure Go code in decode_other.go, except
// where marked with a "!!!".
// func decode(dst, src []byte) int
//
// All local variables fit into registers. The non-zero stack size is only to
// spill registers and push args when issuing a CALL. The register allocation:
// - AX scratch
// - BX scratch
// - CX length or x
// - DX offset
// - SI &src[s]
// - DI &dst[d]
// + R8 dst_base
// + R9 dst_len
// + R10 dst_base + dst_len
// + R11 src_base
// + R12 src_len
// + R13 src_base + src_len
// - R14 used by doCopy
// - R15 used by doCopy
//
// The registers R8-R13 (marked with a "+") are set at the start of the
// function, and after a CALL returns, and are not otherwise modified.
//
// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI.
// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI.
TEXT ·decode(SB), NOSPLIT, $48-56
// Initialize SI, DI and R8-R13.
MOVQ dst_base+0(FP), R8
MOVQ dst_len+8(FP), R9
MOVQ R8, DI
MOVQ R8, R10
ADDQ R9, R10
MOVQ src_base+24(FP), R11
MOVQ src_len+32(FP), R12
MOVQ R11, SI
MOVQ R11, R13
ADDQ R12, R13
loop:
// for s < len(src)
CMPQ SI, R13
JEQ end
// CX = uint32(src[s])
//
// switch src[s] & 0x03
MOVBLZX (SI), CX
MOVL CX, BX
ANDL $3, BX
CMPL BX, $1
JAE tagCopy
// ----------------------------------------
// The code below handles literal tags.
// case tagLiteral:
// x := uint32(src[s] >> 2)
// switch
SHRL $2, CX
CMPL CX, $60
JAE tagLit60Plus
// case x < 60:
// s++
INCQ SI
doLit:
// This is the end of the inner "switch", when we have a literal tag.
//
// We assume that CX == x and x fits in a uint32, where x is the variable
// used in the pure Go decode_other.go code.
// length = int(x) + 1
//
// Unlike the pure Go code, we don't need to check if length <= 0 because
// CX can hold 64 bits, so the increment cannot overflow.
INCQ CX
// Prepare to check if copying length bytes will run past the end of dst or
// src.
//
// AX = len(dst) - d
// BX = len(src) - s
MOVQ R10, AX
SUBQ DI, AX
MOVQ R13, BX
SUBQ SI, BX
// !!! Try a faster technique for short (16 or fewer bytes) copies.
//
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
// goto callMemmove // Fall back on calling runtime·memmove.
// }
//
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
// against 21 instead of 16, because it cannot assume that all of its input
// is contiguous in memory and so it needs to leave enough source bytes to
// read the next tag without refilling buffers, but Go's Decode assumes
// contiguousness (the src argument is a []byte).
CMPQ CX, $16
JGT callMemmove
CMPQ AX, $16
JLT callMemmove
CMPQ BX, $16
JLT callMemmove
// !!! Implement the copy from src to dst as a 16-byte load and store.
// (Decode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only length bytes, but that's
// OK. If the input is a valid Snappy encoding then subsequent iterations
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
// non-nil error), so the overrun will be ignored.
//
// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
MOVOU 0(SI), X0
MOVOU X0, 0(DI)
// d += length
// s += length
ADDQ CX, DI
ADDQ CX, SI
JMP loop
callMemmove:
// if length > len(dst)-d || length > len(src)-s { etc }
CMPQ CX, AX
JGT errCorrupt
CMPQ CX, BX
JGT errCorrupt
// copy(dst[d:], src[s:s+length])
//
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
// DI, SI and CX as arguments. Coincidentally, we also need to spill those
// three registers to the stack, to save local variables across the CALL.
MOVQ DI, 0(SP)
MOVQ SI, 8(SP)
MOVQ CX, 16(SP)
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
MOVQ CX, 40(SP)
CALL runtime·memmove(SB)
// Restore local variables: unspill registers from the stack and
// re-calculate R8-R13.
MOVQ 24(SP), DI
MOVQ 32(SP), SI
MOVQ 40(SP), CX
MOVQ dst_base+0(FP), R8
MOVQ dst_len+8(FP), R9
MOVQ R8, R10
ADDQ R9, R10
MOVQ src_base+24(FP), R11
MOVQ src_len+32(FP), R12
MOVQ R11, R13
ADDQ R12, R13
// d += length
// s += length
ADDQ CX, DI
ADDQ CX, SI
JMP loop
tagLit60Plus:
// !!! This fragment does the
//
// s += x - 58; if uint(s) > uint(len(src)) { etc }
//
// checks. In the asm version, we code it once instead of once per switch case.
ADDQ CX, SI
SUBQ $58, SI
MOVQ SI, BX
SUBQ R11, BX
CMPQ BX, R12
JA errCorrupt
// case x == 60:
CMPL CX, $61
JEQ tagLit61
JA tagLit62Plus
// x = uint32(src[s-1])
MOVBLZX -1(SI), CX
JMP doLit
tagLit61:
// case x == 61:
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
MOVWLZX -2(SI), CX
JMP doLit
tagLit62Plus:
CMPL CX, $62
JA tagLit63
// case x == 62:
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
MOVWLZX -3(SI), CX
MOVBLZX -1(SI), BX
SHLL $16, BX
ORL BX, CX
JMP doLit
tagLit63:
// case x == 63:
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
MOVL -4(SI), CX
JMP doLit
// The code above handles literal tags.
// ----------------------------------------
// The code below handles copy tags.
tagCopy4:
// case tagCopy4:
// s += 5
ADDQ $5, SI
// if uint(s) > uint(len(src)) { etc }
MOVQ SI, BX
SUBQ R11, BX
CMPQ BX, R12
JA errCorrupt
// length = 1 + int(src[s-5])>>2
SHRQ $2, CX
INCQ CX
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
MOVLQZX -4(SI), DX
JMP doCopy
tagCopy2:
// case tagCopy2:
// s += 3
ADDQ $3, SI
// if uint(s) > uint(len(src)) { etc }
MOVQ SI, BX
SUBQ R11, BX
CMPQ BX, R12
JA errCorrupt
// length = 1 + int(src[s-3])>>2
SHRQ $2, CX
INCQ CX
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
MOVWQZX -2(SI), DX
JMP doCopy
tagCopy:
// We have a copy tag. We assume that:
// - BX == src[s] & 0x03
// - CX == src[s]
CMPQ BX, $2
JEQ tagCopy2
JA tagCopy4
// case tagCopy1:
// s += 2
ADDQ $2, SI
// if uint(s) > uint(len(src)) { etc }
MOVQ SI, BX
SUBQ R11, BX
CMPQ BX, R12
JA errCorrupt
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
MOVQ CX, DX
ANDQ $0xe0, DX
SHLQ $3, DX
MOVBQZX -1(SI), BX
ORQ BX, DX
// length = 4 + int(src[s-2])>>2&0x7
SHRQ $2, CX
ANDQ $7, CX
ADDQ $4, CX
doCopy:
// This is the end of the outer "switch", when we have a copy tag.
//
// We assume that:
// - CX == length && CX > 0
// - DX == offset
// if offset <= 0 { etc }
CMPQ DX, $0
JLE errCorrupt
// if d < offset { etc }
MOVQ DI, BX
SUBQ R8, BX
CMPQ BX, DX
JLT errCorrupt
// if length > len(dst)-d { etc }
MOVQ R10, BX
SUBQ DI, BX
CMPQ CX, BX
JGT errCorrupt
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
//
// Set:
// - R14 = len(dst)-d
// - R15 = &dst[d-offset]
MOVQ R10, R14
SUBQ DI, R14
MOVQ DI, R15
SUBQ DX, R15
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
//
// First, try using two 8-byte load/stores, similar to the doLit technique
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
// and not one 16-byte load/store, and the first store has to be before the
// second load, due to the overlap if offset is in the range [8, 16).
//
// if length > 16 || offset < 8 || len(dst)-d < 16 {
// goto slowForwardCopy
// }
// copy 16 bytes
// d += length
CMPQ CX, $16
JGT slowForwardCopy
CMPQ DX, $8
JLT slowForwardCopy
CMPQ R14, $16
JLT slowForwardCopy
MOVQ 0(R15), AX
MOVQ AX, 0(DI)
MOVQ 8(R15), BX
MOVQ BX, 8(DI)
ADDQ CX, DI
JMP loop
slowForwardCopy:
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
// can still try 8-byte load stores, provided we can overrun up to 10 extra
// bytes. As above, the overrun will be fixed up by subsequent iterations
// of the outermost loop.
//
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
// commentary says:
//
// ----
//
// The main part of this loop is a simple copy of eight bytes at a time
// until we've copied (at least) the requested amount of bytes. However,
// if d and d-offset are less than eight bytes apart (indicating a
// repeating pattern of length < 8), we first need to expand the pattern in
// order to get the correct results. For instance, if the buffer looks like
// this, with the eight-byte <d-offset> and <d> patterns marked as
// intervals:
//
// abxxxxxxxxxxxx
// [------] d-offset
// [------] d
//
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
// once, after which we can move <d> two bytes without moving <d-offset>:
//
// ababxxxxxxxxxx
// [------] d-offset
// [------] d
//
// and repeat the exercise until the two no longer overlap.
//
// This allows us to do very well in the special case of one single byte
// repeated many times, without taking a big hit for more general cases.
//
// The worst case of extra writing past the end of the match occurs when
// offset == 1 and length == 1; the last copy will read from byte positions
// [0..7] and write to [4..11], whereas it was only supposed to write to
// position 1. Thus, ten excess bytes.
//
// ----
//
// That "10 byte overrun" worst case is confirmed by Go's
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
// and finishSlowForwardCopy algorithm.
//
// if length > len(dst)-d-10 {
// goto verySlowForwardCopy
// }
SUBQ $10, R14
CMPQ CX, R14
JGT verySlowForwardCopy
makeOffsetAtLeast8:
// !!! As above, expand the pattern so that offset >= 8 and we can use
// 8-byte load/stores.
//
// for offset < 8 {
// copy 8 bytes from dst[d-offset:] to dst[d:]
// length -= offset
// d += offset
// offset += offset
// // The two previous lines together means that d-offset, and therefore
// // R15, is unchanged.
// }
CMPQ DX, $8
JGE fixUpSlowForwardCopy
MOVQ (R15), BX
MOVQ BX, (DI)
SUBQ DX, CX
ADDQ DX, DI
ADDQ DX, DX
JMP makeOffsetAtLeast8
fixUpSlowForwardCopy:
// !!! Add length (which might be negative now) to d (implied by DI being
// &dst[d]) so that d ends up at the right place when we jump back to the
// top of the loop. Before we do that, though, we save DI to AX so that, if
// length is positive, copying the remaining length bytes will write to the
// right place.
MOVQ DI, AX
ADDQ CX, DI
finishSlowForwardCopy:
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
// length means that we overrun, but as above, that will be fixed up by
// subsequent iterations of the outermost loop.
CMPQ CX, $0
JLE loop
MOVQ (R15), BX
MOVQ BX, (AX)
ADDQ $8, R15
ADDQ $8, AX
SUBQ $8, CX
JMP finishSlowForwardCopy
verySlowForwardCopy:
// verySlowForwardCopy is a simple implementation of forward copy. In C
// parlance, this is a do/while loop instead of a while loop, since we know
// that length > 0. In Go syntax:
//
// for {
// dst[d] = dst[d - offset]
// d++
// length--
// if length == 0 {
// break
// }
// }
MOVB (R15), BX
MOVB BX, (DI)
INCQ R15
INCQ DI
DECQ CX
JNZ verySlowForwardCopy
JMP loop
// The code above handles copy tags.
// ----------------------------------------
end:
// This is the end of the "for s < len(src)".
//
// if d != len(dst) { etc }
CMPQ DI, R10
JNE errCorrupt
// return 0
MOVQ $0, ret+48(FP)
RET
errCorrupt:
// return decodeErrCodeCorrupt
MOVQ $1, ret+48(FP)
RET

View File

@ -1,494 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// The asm code generally follows the pure Go code in decode_other.go, except
// where marked with a "!!!".
// func decode(dst, src []byte) int
//
// All local variables fit into registers. The non-zero stack size is only to
// spill registers and push args when issuing a CALL. The register allocation:
// - R2 scratch
// - R3 scratch
// - R4 length or x
// - R5 offset
// - R6 &src[s]
// - R7 &dst[d]
// + R8 dst_base
// + R9 dst_len
// + R10 dst_base + dst_len
// + R11 src_base
// + R12 src_len
// + R13 src_base + src_len
// - R14 used by doCopy
// - R15 used by doCopy
//
// The registers R8-R13 (marked with a "+") are set at the start of the
// function, and after a CALL returns, and are not otherwise modified.
//
// The d variable is implicitly R7 - R8, and len(dst)-d is R10 - R7.
// The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6.
TEXT ·decode(SB), NOSPLIT, $56-56
// Initialize R6, R7 and R8-R13.
MOVD dst_base+0(FP), R8
MOVD dst_len+8(FP), R9
MOVD R8, R7
MOVD R8, R10
ADD R9, R10, R10
MOVD src_base+24(FP), R11
MOVD src_len+32(FP), R12
MOVD R11, R6
MOVD R11, R13
ADD R12, R13, R13
loop:
// for s < len(src)
CMP R13, R6
BEQ end
// R4 = uint32(src[s])
//
// switch src[s] & 0x03
MOVBU (R6), R4
MOVW R4, R3
ANDW $3, R3
MOVW $1, R1
CMPW R1, R3
BGE tagCopy
// ----------------------------------------
// The code below handles literal tags.
// case tagLiteral:
// x := uint32(src[s] >> 2)
// switch
MOVW $60, R1
LSRW $2, R4, R4
CMPW R4, R1
BLS tagLit60Plus
// case x < 60:
// s++
ADD $1, R6, R6
doLit:
// This is the end of the inner "switch", when we have a literal tag.
//
// We assume that R4 == x and x fits in a uint32, where x is the variable
// used in the pure Go decode_other.go code.
// length = int(x) + 1
//
// Unlike the pure Go code, we don't need to check if length <= 0 because
// R4 can hold 64 bits, so the increment cannot overflow.
ADD $1, R4, R4
// Prepare to check if copying length bytes will run past the end of dst or
// src.
//
// R2 = len(dst) - d
// R3 = len(src) - s
MOVD R10, R2
SUB R7, R2, R2
MOVD R13, R3
SUB R6, R3, R3
// !!! Try a faster technique for short (16 or fewer bytes) copies.
//
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
// goto callMemmove // Fall back on calling runtime·memmove.
// }
//
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
// against 21 instead of 16, because it cannot assume that all of its input
// is contiguous in memory and so it needs to leave enough source bytes to
// read the next tag without refilling buffers, but Go's Decode assumes
// contiguousness (the src argument is a []byte).
CMP $16, R4
BGT callMemmove
CMP $16, R2
BLT callMemmove
CMP $16, R3
BLT callMemmove
// !!! Implement the copy from src to dst as a 16-byte load and store.
// (Decode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only length bytes, but that's
// OK. If the input is a valid Snappy encoding then subsequent iterations
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
// non-nil error), so the overrun will be ignored.
//
// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
LDP 0(R6), (R14, R15)
STP (R14, R15), 0(R7)
// d += length
// s += length
ADD R4, R7, R7
ADD R4, R6, R6
B loop
callMemmove:
// if length > len(dst)-d || length > len(src)-s { etc }
CMP R2, R4
BGT errCorrupt
CMP R3, R4
BGT errCorrupt
// copy(dst[d:], src[s:s+length])
//
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
// R7, R6 and R4 as arguments. Coincidentally, we also need to spill those
// three registers to the stack, to save local variables across the CALL.
MOVD R7, 8(RSP)
MOVD R6, 16(RSP)
MOVD R4, 24(RSP)
MOVD R7, 32(RSP)
MOVD R6, 40(RSP)
MOVD R4, 48(RSP)
CALL runtime·memmove(SB)
// Restore local variables: unspill registers from the stack and
// re-calculate R8-R13.
MOVD 32(RSP), R7
MOVD 40(RSP), R6
MOVD 48(RSP), R4
MOVD dst_base+0(FP), R8
MOVD dst_len+8(FP), R9
MOVD R8, R10
ADD R9, R10, R10
MOVD src_base+24(FP), R11
MOVD src_len+32(FP), R12
MOVD R11, R13
ADD R12, R13, R13
// d += length
// s += length
ADD R4, R7, R7
ADD R4, R6, R6
B loop
tagLit60Plus:
// !!! This fragment does the
//
// s += x - 58; if uint(s) > uint(len(src)) { etc }
//
// checks. In the asm version, we code it once instead of once per switch case.
ADD R4, R6, R6
SUB $58, R6, R6
MOVD R6, R3
SUB R11, R3, R3
CMP R12, R3
BGT errCorrupt
// case x == 60:
MOVW $61, R1
CMPW R1, R4
BEQ tagLit61
BGT tagLit62Plus
// x = uint32(src[s-1])
MOVBU -1(R6), R4
B doLit
tagLit61:
// case x == 61:
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
MOVHU -2(R6), R4
B doLit
tagLit62Plus:
CMPW $62, R4
BHI tagLit63
// case x == 62:
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
MOVHU -3(R6), R4
MOVBU -1(R6), R3
ORR R3<<16, R4
B doLit
tagLit63:
// case x == 63:
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
MOVWU -4(R6), R4
B doLit
// The code above handles literal tags.
// ----------------------------------------
// The code below handles copy tags.
tagCopy4:
// case tagCopy4:
// s += 5
ADD $5, R6, R6
// if uint(s) > uint(len(src)) { etc }
MOVD R6, R3
SUB R11, R3, R3
CMP R12, R3
BGT errCorrupt
// length = 1 + int(src[s-5])>>2
MOVD $1, R1
ADD R4>>2, R1, R4
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
MOVWU -4(R6), R5
B doCopy
tagCopy2:
// case tagCopy2:
// s += 3
ADD $3, R6, R6
// if uint(s) > uint(len(src)) { etc }
MOVD R6, R3
SUB R11, R3, R3
CMP R12, R3
BGT errCorrupt
// length = 1 + int(src[s-3])>>2
MOVD $1, R1
ADD R4>>2, R1, R4
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
MOVHU -2(R6), R5
B doCopy
tagCopy:
// We have a copy tag. We assume that:
// - R3 == src[s] & 0x03
// - R4 == src[s]
CMP $2, R3
BEQ tagCopy2
BGT tagCopy4
// case tagCopy1:
// s += 2
ADD $2, R6, R6
// if uint(s) > uint(len(src)) { etc }
MOVD R6, R3
SUB R11, R3, R3
CMP R12, R3
BGT errCorrupt
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
MOVD R4, R5
AND $0xe0, R5
MOVBU -1(R6), R3
ORR R5<<3, R3, R5
// length = 4 + int(src[s-2])>>2&0x7
MOVD $7, R1
AND R4>>2, R1, R4
ADD $4, R4, R4
doCopy:
// This is the end of the outer "switch", when we have a copy tag.
//
// We assume that:
// - R4 == length && R4 > 0
// - R5 == offset
// if offset <= 0 { etc }
MOVD $0, R1
CMP R1, R5
BLE errCorrupt
// if d < offset { etc }
MOVD R7, R3
SUB R8, R3, R3
CMP R5, R3
BLT errCorrupt
// if length > len(dst)-d { etc }
MOVD R10, R3
SUB R7, R3, R3
CMP R3, R4
BGT errCorrupt
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
//
// Set:
// - R14 = len(dst)-d
// - R15 = &dst[d-offset]
MOVD R10, R14
SUB R7, R14, R14
MOVD R7, R15
SUB R5, R15, R15
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
//
// First, try using two 8-byte load/stores, similar to the doLit technique
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
// and not one 16-byte load/store, and the first store has to be before the
// second load, due to the overlap if offset is in the range [8, 16).
//
// if length > 16 || offset < 8 || len(dst)-d < 16 {
// goto slowForwardCopy
// }
// copy 16 bytes
// d += length
CMP $16, R4
BGT slowForwardCopy
CMP $8, R5
BLT slowForwardCopy
CMP $16, R14
BLT slowForwardCopy
MOVD 0(R15), R2
MOVD R2, 0(R7)
MOVD 8(R15), R3
MOVD R3, 8(R7)
ADD R4, R7, R7
B loop
slowForwardCopy:
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
// can still try 8-byte load stores, provided we can overrun up to 10 extra
// bytes. As above, the overrun will be fixed up by subsequent iterations
// of the outermost loop.
//
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
// commentary says:
//
// ----
//
// The main part of this loop is a simple copy of eight bytes at a time
// until we've copied (at least) the requested amount of bytes. However,
// if d and d-offset are less than eight bytes apart (indicating a
// repeating pattern of length < 8), we first need to expand the pattern in
// order to get the correct results. For instance, if the buffer looks like
// this, with the eight-byte <d-offset> and <d> patterns marked as
// intervals:
//
// abxxxxxxxxxxxx
// [------] d-offset
// [------] d
//
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
// once, after which we can move <d> two bytes without moving <d-offset>:
//
// ababxxxxxxxxxx
// [------] d-offset
// [------] d
//
// and repeat the exercise until the two no longer overlap.
//
// This allows us to do very well in the special case of one single byte
// repeated many times, without taking a big hit for more general cases.
//
// The worst case of extra writing past the end of the match occurs when
// offset == 1 and length == 1; the last copy will read from byte positions
// [0..7] and write to [4..11], whereas it was only supposed to write to
// position 1. Thus, ten excess bytes.
//
// ----
//
// That "10 byte overrun" worst case is confirmed by Go's
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
// and finishSlowForwardCopy algorithm.
//
// if length > len(dst)-d-10 {
// goto verySlowForwardCopy
// }
SUB $10, R14, R14
CMP R14, R4
BGT verySlowForwardCopy
makeOffsetAtLeast8:
// !!! As above, expand the pattern so that offset >= 8 and we can use
// 8-byte load/stores.
//
// for offset < 8 {
// copy 8 bytes from dst[d-offset:] to dst[d:]
// length -= offset
// d += offset
// offset += offset
// // The two previous lines together means that d-offset, and therefore
// // R15, is unchanged.
// }
CMP $8, R5
BGE fixUpSlowForwardCopy
MOVD (R15), R3
MOVD R3, (R7)
SUB R5, R4, R4
ADD R5, R7, R7
ADD R5, R5, R5
B makeOffsetAtLeast8
fixUpSlowForwardCopy:
// !!! Add length (which might be negative now) to d (implied by R7 being
// &dst[d]) so that d ends up at the right place when we jump back to the
// top of the loop. Before we do that, though, we save R7 to R2 so that, if
// length is positive, copying the remaining length bytes will write to the
// right place.
MOVD R7, R2
ADD R4, R7, R7
finishSlowForwardCopy:
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
// length means that we overrun, but as above, that will be fixed up by
// subsequent iterations of the outermost loop.
MOVD $0, R1
CMP R1, R4
BLE loop
MOVD (R15), R3
MOVD R3, (R2)
ADD $8, R15, R15
ADD $8, R2, R2
SUB $8, R4, R4
B finishSlowForwardCopy
verySlowForwardCopy:
// verySlowForwardCopy is a simple implementation of forward copy. In C
// parlance, this is a do/while loop instead of a while loop, since we know
// that length > 0. In Go syntax:
//
// for {
// dst[d] = dst[d - offset]
// d++
// length--
// if length == 0 {
// break
// }
// }
MOVB (R15), R3
MOVB R3, (R7)
ADD $1, R15, R15
ADD $1, R7, R7
SUB $1, R4, R4
CBNZ R4, verySlowForwardCopy
B loop
// The code above handles copy tags.
// ----------------------------------------
end:
// This is the end of the "for s < len(src)".
//
// if d != len(dst) { etc }
CMP R10, R7
BNE errCorrupt
// return 0
MOVD $0, ret+48(FP)
RET
errCorrupt:
// return decodeErrCodeCorrupt
MOVD $1, R2
MOVD R2, ret+48(FP)
RET

View File

@ -1,15 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
// +build amd64 arm64
package snappy
// decode has the same semantics as in decode_other.go.
//
//go:noescape
func decode(dst, src []byte) int

View File

@ -1,115 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64,!arm64 appengine !gc noasm
package snappy
// decode writes the decoding of src to dst. It assumes that the varint-encoded
// length of the decompressed bytes has already been read, and that len(dst)
// equals that length.
//
// It returns 0 on success or a decodeErrCodeXxx error code on failure.
func decode(dst, src []byte) int {
var d, s, offset, length int
for s < len(src) {
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
x = uint32(src[s-1])
case x == 61:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
x = uint32(src[s-2]) | uint32(src[s-1])<<8
case x == 62:
s += 4
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
case x == 63:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
}
length = int(x) + 1
if length <= 0 {
return decodeErrCodeUnsupportedLiteralLength
}
if length > len(dst)-d || length > len(src)-s {
return decodeErrCodeCorrupt
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = 4 + int(src[s-2])>>2&0x7
offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
case tagCopy2:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-3])>>2
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
case tagCopy4:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-5])>>2
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
}
if offset <= 0 || d < offset || length > len(dst)-d {
return decodeErrCodeCorrupt
}
// Copy from an earlier sub-slice of dst to a later sub-slice.
// If no overlap, use the built-in copy:
if offset >= length {
copy(dst[d:d+length], dst[d-offset:])
d += length
continue
}
// Unlike the built-in copy function, this byte-by-byte copy always runs
// forwards, even if the slices overlap. Conceptually, this is:
//
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
//
// We align the slices into a and b and show the compiler they are the same size.
// This allows the loop to run without bounds checks.
a := dst[d : d+length]
b := dst[d-offset:]
b = b[:len(a)]
for i := range a {
a[i] = b[i]
}
d += length
}
if d != len(dst) {
return decodeErrCodeCorrupt
}
return 0
}

View File

@ -1,289 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"encoding/binary"
"errors"
"io"
)
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// Encode handles the Snappy block format, not the Snappy stream format.
func Encode(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if len(dst) < n {
dst = make([]byte, n)
}
// The block starts with the varint-encoded length of the decompressed bytes.
d := binary.PutUvarint(dst, uint64(len(src)))
for len(src) > 0 {
p := src
src = nil
if len(p) > maxBlockSize {
p, src = p[:maxBlockSize], p[maxBlockSize:]
}
if len(p) < minNonLiteralBlockSize {
d += emitLiteral(dst[d:], p)
} else {
d += encodeBlock(dst[d:], p)
}
}
return dst[:d]
}
// inputMargin is the minimum number of extra input bytes to keep, inside
// encodeBlock's inner loop. On some architectures, this margin lets us
// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
// literals can be implemented as a single load to and store from a 16-byte
// register. That literal's actual length can be as short as 1 byte, so this
// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
// the encoding loop will fix up the copy overrun, and this inputMargin ensures
// that we don't overrun the dst and src buffers.
const inputMargin = 16 - 1
// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
// could be encoded with a copy tag. This is the minimum with respect to the
// algorithm used by encodeBlock, not a minimum enforced by the file format.
//
// The encoded output must start with at least a 1 byte literal, as there are
// no previous bytes to copy. A minimal (1 byte) copy after that, generated
// from an emitCopy call in encodeBlock's main loop, would require at least
// another inputMargin bytes, for the reason above: we want any emitLiteral
// calls inside encodeBlock's main loop to use the fast path if possible, which
// requires being able to overrun by inputMargin bytes. Thus,
// minNonLiteralBlockSize equals 1 + 1 + inputMargin.
//
// The C++ code doesn't use this exact threshold, but it could, as discussed at
// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
// optimization. It should not affect the encoded form. This is tested by
// TestSameEncodingAsCppShortCopies.
const minNonLiteralBlockSize = 1 + 1 + inputMargin
// MaxEncodedLen returns the maximum length of a snappy block, given its
// uncompressed length.
//
// It will return a negative value if srcLen is too large to encode.
func MaxEncodedLen(srcLen int) int {
n := uint64(srcLen)
if n > 0xffffffff {
return -1
}
// Compressed data can be defined as:
// compressed := item* literal*
// item := literal* copy
//
// The trailing literal sequence has a space blowup of at most 62/60
// since a literal of length 60 needs one tag byte + one extra byte
// for length information.
//
// Item blowup is trickier to measure. Suppose the "copy" op copies
// 4 bytes of data. Because of a special check in the encoding code,
// we produce a 4-byte copy only if the offset is < 65536. Therefore
// the copy op takes 3 bytes to encode, and this type of item leads
// to at most the 62/60 blowup for representing literals.
//
// Suppose the "copy" op copies 5 bytes of data. If the offset is big
// enough, it will take 5 bytes to encode the copy op. Therefore the
// worst case here is a one-byte literal followed by a five-byte copy.
// That is, 6 bytes of input turn into 7 bytes of "compressed" data.
//
// This last factor dominates the blowup, so the final estimate is:
n = 32 + n + n/6
if n > 0xffffffff {
return -1
}
return int(n)
}
var errClosed = errors.New("snappy: Writer is closed")
// NewWriter returns a new Writer that compresses to w.
//
// The Writer returned does not buffer writes. There is no need to Flush or
// Close such a Writer.
//
// Deprecated: the Writer returned is not suitable for many small writes, only
// for few large writes. Use NewBufferedWriter instead, which is efficient
// regardless of the frequency and shape of the writes, and remember to Close
// that Writer when done.
func NewWriter(w io.Writer) *Writer {
return &Writer{
w: w,
obuf: make([]byte, obufLen),
}
}
// NewBufferedWriter returns a new Writer that compresses to w, using the
// framing format described at
// https://github.com/google/snappy/blob/master/framing_format.txt
//
// The Writer returned buffers writes. Users must call Close to guarantee all
// data has been forwarded to the underlying io.Writer. They may also call
// Flush zero or more times before calling Close.
func NewBufferedWriter(w io.Writer) *Writer {
return &Writer{
w: w,
ibuf: make([]byte, 0, maxBlockSize),
obuf: make([]byte, obufLen),
}
}
// Writer is an io.Writer that can write Snappy-compressed bytes.
//
// Writer handles the Snappy stream format, not the Snappy block format.
type Writer struct {
w io.Writer
err error
// ibuf is a buffer for the incoming (uncompressed) bytes.
//
// Its use is optional. For backwards compatibility, Writers created by the
// NewWriter function have ibuf == nil, do not buffer incoming bytes, and
// therefore do not need to be Flush'ed or Close'd.
ibuf []byte
// obuf is a buffer for the outgoing (compressed) bytes.
obuf []byte
// wroteStreamHeader is whether we have written the stream header.
wroteStreamHeader bool
}
// Reset discards the writer's state and switches the Snappy writer to write to
// w. This permits reusing a Writer rather than allocating a new one.
func (w *Writer) Reset(writer io.Writer) {
w.w = writer
w.err = nil
if w.ibuf != nil {
w.ibuf = w.ibuf[:0]
}
w.wroteStreamHeader = false
}
// Write satisfies the io.Writer interface.
func (w *Writer) Write(p []byte) (nRet int, errRet error) {
if w.ibuf == nil {
// Do not buffer incoming bytes. This does not perform or compress well
// if the caller of Writer.Write writes many small slices. This
// behavior is therefore deprecated, but still supported for backwards
// compatibility with code that doesn't explicitly Flush or Close.
return w.write(p)
}
// The remainder of this method is based on bufio.Writer.Write from the
// standard library.
for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil {
var n int
if len(w.ibuf) == 0 {
// Large write, empty buffer.
// Write directly from p to avoid copy.
n, _ = w.write(p)
} else {
n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
w.ibuf = w.ibuf[:len(w.ibuf)+n]
w.Flush()
}
nRet += n
p = p[n:]
}
if w.err != nil {
return nRet, w.err
}
n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
w.ibuf = w.ibuf[:len(w.ibuf)+n]
nRet += n
return nRet, nil
}
func (w *Writer) write(p []byte) (nRet int, errRet error) {
if w.err != nil {
return 0, w.err
}
for len(p) > 0 {
obufStart := len(magicChunk)
if !w.wroteStreamHeader {
w.wroteStreamHeader = true
copy(w.obuf, magicChunk)
obufStart = 0
}
var uncompressed []byte
if len(p) > maxBlockSize {
uncompressed, p = p[:maxBlockSize], p[maxBlockSize:]
} else {
uncompressed, p = p, nil
}
checksum := crc(uncompressed)
// Compress the buffer, discarding the result if the improvement
// isn't at least 12.5%.
compressed := Encode(w.obuf[obufHeaderLen:], uncompressed)
chunkType := uint8(chunkTypeCompressedData)
chunkLen := 4 + len(compressed)
obufEnd := obufHeaderLen + len(compressed)
if len(compressed) >= len(uncompressed)-len(uncompressed)/8 {
chunkType = chunkTypeUncompressedData
chunkLen = 4 + len(uncompressed)
obufEnd = obufHeaderLen
}
// Fill in the per-chunk header that comes before the body.
w.obuf[len(magicChunk)+0] = chunkType
w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0)
w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8)
w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16)
w.obuf[len(magicChunk)+4] = uint8(checksum >> 0)
w.obuf[len(magicChunk)+5] = uint8(checksum >> 8)
w.obuf[len(magicChunk)+6] = uint8(checksum >> 16)
w.obuf[len(magicChunk)+7] = uint8(checksum >> 24)
if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil {
w.err = err
return nRet, err
}
if chunkType == chunkTypeUncompressedData {
if _, err := w.w.Write(uncompressed); err != nil {
w.err = err
return nRet, err
}
}
nRet += len(uncompressed)
}
return nRet, nil
}
// Flush flushes the Writer to its underlying io.Writer.
func (w *Writer) Flush() error {
if w.err != nil {
return w.err
}
if len(w.ibuf) == 0 {
return nil
}
w.write(w.ibuf)
w.ibuf = w.ibuf[:0]
return w.err
}
// Close calls Flush and then closes the Writer.
func (w *Writer) Close() error {
w.Flush()
ret := w.err
if w.err == nil {
w.err = errClosed
}
return ret
}

View File

@ -1,730 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
// Go toolchain regression. See https://github.com/golang/go/issues/15426 and
// https://github.com/golang/snappy/issues/29
//
// As a workaround, the package was built with a known good assembler, and
// those instructions were disassembled by "objdump -d" to yield the
// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
// style comments, in AT&T asm syntax. Note that rsp here is a physical
// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
// The instructions were then encoded as "BYTE $0x.." sequences, which assemble
// fine on Go 1.6.
// The asm code generally follows the pure Go code in encode_other.go, except
// where marked with a "!!!".
// ----------------------------------------------------------------------------
// func emitLiteral(dst, lit []byte) int
//
// All local variables fit into registers. The register allocation:
// - AX len(lit)
// - BX n
// - DX return value
// - DI &dst[i]
// - R10 &lit[0]
//
// The 24 bytes of stack space is to call runtime·memmove.
//
// The unusual register allocation of local variables, such as R10 for the
// source pointer, matches the allocation used at the call site in encodeBlock,
// which makes it easier to manually inline this function.
TEXT ·emitLiteral(SB), NOSPLIT, $24-56
MOVQ dst_base+0(FP), DI
MOVQ lit_base+24(FP), R10
MOVQ lit_len+32(FP), AX
MOVQ AX, DX
MOVL AX, BX
SUBL $1, BX
CMPL BX, $60
JLT oneByte
CMPL BX, $256
JLT twoBytes
threeBytes:
MOVB $0xf4, 0(DI)
MOVW BX, 1(DI)
ADDQ $3, DI
ADDQ $3, DX
JMP memmove
twoBytes:
MOVB $0xf0, 0(DI)
MOVB BX, 1(DI)
ADDQ $2, DI
ADDQ $2, DX
JMP memmove
oneByte:
SHLB $2, BX
MOVB BX, 0(DI)
ADDQ $1, DI
ADDQ $1, DX
memmove:
MOVQ DX, ret+48(FP)
// copy(dst[i:], lit)
//
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
// DI, R10 and AX as arguments.
MOVQ DI, 0(SP)
MOVQ R10, 8(SP)
MOVQ AX, 16(SP)
CALL runtime·memmove(SB)
RET
// ----------------------------------------------------------------------------
// func emitCopy(dst []byte, offset, length int) int
//
// All local variables fit into registers. The register allocation:
// - AX length
// - SI &dst[0]
// - DI &dst[i]
// - R11 offset
//
// The unusual register allocation of local variables, such as R11 for the
// offset, matches the allocation used at the call site in encodeBlock, which
// makes it easier to manually inline this function.
TEXT ·emitCopy(SB), NOSPLIT, $0-48
MOVQ dst_base+0(FP), DI
MOVQ DI, SI
MOVQ offset+24(FP), R11
MOVQ length+32(FP), AX
loop0:
// for length >= 68 { etc }
CMPL AX, $68
JLT step1
// Emit a length 64 copy, encoded as 3 bytes.
MOVB $0xfe, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $64, AX
JMP loop0
step1:
// if length > 64 { etc }
CMPL AX, $64
JLE step2
// Emit a length 60 copy, encoded as 3 bytes.
MOVB $0xee, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $60, AX
step2:
// if length >= 12 || offset >= 2048 { goto step3 }
CMPL AX, $12
JGE step3
CMPL R11, $2048
JGE step3
// Emit the remaining copy, encoded as 2 bytes.
MOVB R11, 1(DI)
SHRL $8, R11
SHLB $5, R11
SUBB $4, AX
SHLB $2, AX
ORB AX, R11
ORB $1, R11
MOVB R11, 0(DI)
ADDQ $2, DI
// Return the number of bytes written.
SUBQ SI, DI
MOVQ DI, ret+40(FP)
RET
step3:
// Emit the remaining copy, encoded as 3 bytes.
SUBL $1, AX
SHLB $2, AX
ORB $2, AX
MOVB AX, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
// Return the number of bytes written.
SUBQ SI, DI
MOVQ DI, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func extendMatch(src []byte, i, j int) int
//
// All local variables fit into registers. The register allocation:
// - DX &src[0]
// - SI &src[j]
// - R13 &src[len(src) - 8]
// - R14 &src[len(src)]
// - R15 &src[i]
//
// The unusual register allocation of local variables, such as R15 for a source
// pointer, matches the allocation used at the call site in encodeBlock, which
// makes it easier to manually inline this function.
TEXT ·extendMatch(SB), NOSPLIT, $0-48
MOVQ src_base+0(FP), DX
MOVQ src_len+8(FP), R14
MOVQ i+24(FP), R15
MOVQ j+32(FP), SI
ADDQ DX, R14
ADDQ DX, R15
ADDQ DX, SI
MOVQ R14, R13
SUBQ $8, R13
cmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMPQ SI, R13
JA cmp1
MOVQ (R15), AX
MOVQ (SI), BX
CMPQ AX, BX
JNE bsf
ADDQ $8, R15
ADDQ $8, SI
JMP cmp8
bsf:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs. The BSF instruction finds the
// least significant 1 bit, the amd64 architecture is little-endian, and
// the shift by 3 converts a bit index to a byte index.
XORQ AX, BX
BSFQ BX, BX
SHRQ $3, BX
ADDQ BX, SI
// Convert from &src[ret] to ret.
SUBQ DX, SI
MOVQ SI, ret+40(FP)
RET
cmp1:
// In src's tail, compare 1 byte at a time.
CMPQ SI, R14
JAE extendMatchEnd
MOVB (R15), AX
MOVB (SI), BX
CMPB AX, BX
JNE extendMatchEnd
ADDQ $1, R15
ADDQ $1, SI
JMP cmp1
extendMatchEnd:
// Convert from &src[ret] to ret.
SUBQ DX, SI
MOVQ SI, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func encodeBlock(dst, src []byte) (d int)
//
// All local variables fit into registers, other than "var table". The register
// allocation:
// - AX . .
// - BX . .
// - CX 56 shift (note that amd64 shifts by non-immediates must use CX).
// - DX 64 &src[0], tableSize
// - SI 72 &src[s]
// - DI 80 &dst[d]
// - R9 88 sLimit
// - R10 . &src[nextEmit]
// - R11 96 prevHash, currHash, nextHash, offset
// - R12 104 &src[base], skip
// - R13 . &src[nextS], &src[len(src) - 8]
// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x
// - R15 112 candidate
//
// The second column (56, 64, etc) is the stack offset to spill the registers
// when calling other functions. We could pack this slightly tighter, but it's
// simpler to have a dedicated spill map independent of the function called.
//
// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
// local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
TEXT ·encodeBlock(SB), 0, $32888-56
MOVQ dst_base+0(FP), DI
MOVQ src_base+24(FP), SI
MOVQ src_len+32(FP), R14
// shift, tableSize := uint32(32-8), 1<<8
MOVQ $24, CX
MOVQ $256, DX
calcShift:
// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
// shift--
// }
CMPQ DX, $16384
JGE varTable
CMPQ DX, R14
JGE varTable
SUBQ $1, CX
SHLQ $1, DX
JMP calcShift
varTable:
// var table [maxTableSize]uint16
//
// In the asm code, unlike the Go code, we can zero-initialize only the
// first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
// writes 16 bytes, so we can do only tableSize/8 writes instead of the
// 2048 writes that would zero-initialize all of table's 32768 bytes.
SHRQ $3, DX
LEAQ table-32768(SP), BX
PXOR X0, X0
memclr:
MOVOU X0, 0(BX)
ADDQ $16, BX
SUBQ $1, DX
JNZ memclr
// !!! DX = &src[0]
MOVQ SI, DX
// sLimit := len(src) - inputMargin
MOVQ R14, R9
SUBQ $15, R9
// !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
// change for the rest of the function.
MOVQ CX, 56(SP)
MOVQ DX, 64(SP)
MOVQ R9, 88(SP)
// nextEmit := 0
MOVQ DX, R10
// s := 1
ADDQ $1, SI
// nextHash := hash(load32(src, s), shift)
MOVL 0(SI), R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
outer:
// for { etc }
// skip := 32
MOVQ $32, R12
// nextS := s
MOVQ SI, R13
// candidate := 0
MOVQ $0, R15
inner0:
// for { etc }
// s := nextS
MOVQ R13, SI
// bytesBetweenHashLookups := skip >> 5
MOVQ R12, R14
SHRQ $5, R14
// nextS = s + bytesBetweenHashLookups
ADDQ R14, R13
// skip += bytesBetweenHashLookups
ADDQ R14, R12
// if nextS > sLimit { goto emitRemainder }
MOVQ R13, AX
SUBQ DX, AX
CMPQ AX, R9
JA emitRemainder
// candidate = int(table[nextHash])
// XXX: MOVWQZX table-32768(SP)(R11*2), R15
// XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
BYTE $0x4e
BYTE $0x0f
BYTE $0xb7
BYTE $0x7c
BYTE $0x5c
BYTE $0x78
// table[nextHash] = uint16(s)
MOVQ SI, AX
SUBQ DX, AX
// XXX: MOVW AX, table-32768(SP)(R11*2)
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
BYTE $0x66
BYTE $0x42
BYTE $0x89
BYTE $0x44
BYTE $0x5c
BYTE $0x78
// nextHash = hash(load32(src, nextS), shift)
MOVL 0(R13), R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// if load32(src, s) != load32(src, candidate) { continue } break
MOVL 0(SI), AX
MOVL (DX)(R15*1), BX
CMPL AX, BX
JNE inner0
fourByteMatch:
// As per the encode_other.go code:
//
// A 4-byte match has been found. We'll later see etc.
// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
// on inputMargin in encode.go.
MOVQ SI, AX
SUBQ R10, AX
CMPQ AX, $16
JLE emitLiteralFastPath
// ----------------------------------------
// Begin inline of the emitLiteral call.
//
// d += emitLiteral(dst[d:], src[nextEmit:s])
MOVL AX, BX
SUBL $1, BX
CMPL BX, $60
JLT inlineEmitLiteralOneByte
CMPL BX, $256
JLT inlineEmitLiteralTwoBytes
inlineEmitLiteralThreeBytes:
MOVB $0xf4, 0(DI)
MOVW BX, 1(DI)
ADDQ $3, DI
JMP inlineEmitLiteralMemmove
inlineEmitLiteralTwoBytes:
MOVB $0xf0, 0(DI)
MOVB BX, 1(DI)
ADDQ $2, DI
JMP inlineEmitLiteralMemmove
inlineEmitLiteralOneByte:
SHLB $2, BX
MOVB BX, 0(DI)
ADDQ $1, DI
inlineEmitLiteralMemmove:
// Spill local variables (registers) onto the stack; call; unspill.
//
// copy(dst[i:], lit)
//
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
// DI, R10 and AX as arguments.
MOVQ DI, 0(SP)
MOVQ R10, 8(SP)
MOVQ AX, 16(SP)
ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)".
MOVQ SI, 72(SP)
MOVQ DI, 80(SP)
MOVQ R15, 112(SP)
CALL runtime·memmove(SB)
MOVQ 56(SP), CX
MOVQ 64(SP), DX
MOVQ 72(SP), SI
MOVQ 80(SP), DI
MOVQ 88(SP), R9
MOVQ 112(SP), R15
JMP inner1
inlineEmitLiteralEnd:
// End inline of the emitLiteral call.
// ----------------------------------------
emitLiteralFastPath:
// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
MOVB AX, BX
SUBB $1, BX
SHLB $2, BX
MOVB BX, (DI)
ADDQ $1, DI
// !!! Implement the copy from lit to dst as a 16-byte load and store.
// (Encode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only len(lit) bytes, but that's
// OK. Subsequent iterations will fix up the overrun.
//
// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
MOVOU 0(R10), X0
MOVOU X0, 0(DI)
ADDQ AX, DI
inner1:
// for { etc }
// base := s
MOVQ SI, R12
// !!! offset := base - candidate
MOVQ R12, R11
SUBQ R15, R11
SUBQ DX, R11
// ----------------------------------------
// Begin inline of the extendMatch call.
//
// s = extendMatch(src, candidate+4, s+4)
// !!! R14 = &src[len(src)]
MOVQ src_len+32(FP), R14
ADDQ DX, R14
// !!! R13 = &src[len(src) - 8]
MOVQ R14, R13
SUBQ $8, R13
// !!! R15 = &src[candidate + 4]
ADDQ $4, R15
ADDQ DX, R15
// !!! s += 4
ADDQ $4, SI
inlineExtendMatchCmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMPQ SI, R13
JA inlineExtendMatchCmp1
MOVQ (R15), AX
MOVQ (SI), BX
CMPQ AX, BX
JNE inlineExtendMatchBSF
ADDQ $8, R15
ADDQ $8, SI
JMP inlineExtendMatchCmp8
inlineExtendMatchBSF:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs. The BSF instruction finds the
// least significant 1 bit, the amd64 architecture is little-endian, and
// the shift by 3 converts a bit index to a byte index.
XORQ AX, BX
BSFQ BX, BX
SHRQ $3, BX
ADDQ BX, SI
JMP inlineExtendMatchEnd
inlineExtendMatchCmp1:
// In src's tail, compare 1 byte at a time.
CMPQ SI, R14
JAE inlineExtendMatchEnd
MOVB (R15), AX
MOVB (SI), BX
CMPB AX, BX
JNE inlineExtendMatchEnd
ADDQ $1, R15
ADDQ $1, SI
JMP inlineExtendMatchCmp1
inlineExtendMatchEnd:
// End inline of the extendMatch call.
// ----------------------------------------
// ----------------------------------------
// Begin inline of the emitCopy call.
//
// d += emitCopy(dst[d:], base-candidate, s-base)
// !!! length := s - base
MOVQ SI, AX
SUBQ R12, AX
inlineEmitCopyLoop0:
// for length >= 68 { etc }
CMPL AX, $68
JLT inlineEmitCopyStep1
// Emit a length 64 copy, encoded as 3 bytes.
MOVB $0xfe, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $64, AX
JMP inlineEmitCopyLoop0
inlineEmitCopyStep1:
// if length > 64 { etc }
CMPL AX, $64
JLE inlineEmitCopyStep2
// Emit a length 60 copy, encoded as 3 bytes.
MOVB $0xee, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $60, AX
inlineEmitCopyStep2:
// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
CMPL AX, $12
JGE inlineEmitCopyStep3
CMPL R11, $2048
JGE inlineEmitCopyStep3
// Emit the remaining copy, encoded as 2 bytes.
MOVB R11, 1(DI)
SHRL $8, R11
SHLB $5, R11
SUBB $4, AX
SHLB $2, AX
ORB AX, R11
ORB $1, R11
MOVB R11, 0(DI)
ADDQ $2, DI
JMP inlineEmitCopyEnd
inlineEmitCopyStep3:
// Emit the remaining copy, encoded as 3 bytes.
SUBL $1, AX
SHLB $2, AX
ORB $2, AX
MOVB AX, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
inlineEmitCopyEnd:
// End inline of the emitCopy call.
// ----------------------------------------
// nextEmit = s
MOVQ SI, R10
// if s >= sLimit { goto emitRemainder }
MOVQ SI, AX
SUBQ DX, AX
CMPQ AX, R9
JAE emitRemainder
// As per the encode_other.go code:
//
// We could immediately etc.
// x := load64(src, s-1)
MOVQ -1(SI), R14
// prevHash := hash(uint32(x>>0), shift)
MOVL R14, R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// table[prevHash] = uint16(s-1)
MOVQ SI, AX
SUBQ DX, AX
SUBQ $1, AX
// XXX: MOVW AX, table-32768(SP)(R11*2)
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
BYTE $0x66
BYTE $0x42
BYTE $0x89
BYTE $0x44
BYTE $0x5c
BYTE $0x78
// currHash := hash(uint32(x>>8), shift)
SHRQ $8, R14
MOVL R14, R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// candidate = int(table[currHash])
// XXX: MOVWQZX table-32768(SP)(R11*2), R15
// XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
BYTE $0x4e
BYTE $0x0f
BYTE $0xb7
BYTE $0x7c
BYTE $0x5c
BYTE $0x78
// table[currHash] = uint16(s)
ADDQ $1, AX
// XXX: MOVW AX, table-32768(SP)(R11*2)
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
BYTE $0x66
BYTE $0x42
BYTE $0x89
BYTE $0x44
BYTE $0x5c
BYTE $0x78
// if uint32(x>>8) == load32(src, candidate) { continue }
MOVL (DX)(R15*1), BX
CMPL R14, BX
JEQ inner1
// nextHash = hash(uint32(x>>16), shift)
SHRQ $8, R14
MOVL R14, R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// s++
ADDQ $1, SI
// break out of the inner1 for loop, i.e. continue the outer loop.
JMP outer
emitRemainder:
// if nextEmit < len(src) { etc }
MOVQ src_len+32(FP), AX
ADDQ DX, AX
CMPQ R10, AX
JEQ encodeBlockEnd
// d += emitLiteral(dst[d:], src[nextEmit:])
//
// Push args.
MOVQ DI, 0(SP)
MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative.
MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative.
MOVQ R10, 24(SP)
SUBQ R10, AX
MOVQ AX, 32(SP)
MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative.
// Spill local variables (registers) onto the stack; call; unspill.
MOVQ DI, 80(SP)
CALL ·emitLiteral(SB)
MOVQ 80(SP), DI
// Finish the "d +=" part of "d += emitLiteral(etc)".
ADDQ 48(SP), DI
encodeBlockEnd:
MOVQ dst_base+0(FP), AX
SUBQ AX, DI
MOVQ DI, d+48(FP)
RET

View File

@ -1,722 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// The asm code generally follows the pure Go code in encode_other.go, except
// where marked with a "!!!".
// ----------------------------------------------------------------------------
// func emitLiteral(dst, lit []byte) int
//
// All local variables fit into registers. The register allocation:
// - R3 len(lit)
// - R4 n
// - R6 return value
// - R8 &dst[i]
// - R10 &lit[0]
//
// The 32 bytes of stack space is to call runtime·memmove.
//
// The unusual register allocation of local variables, such as R10 for the
// source pointer, matches the allocation used at the call site in encodeBlock,
// which makes it easier to manually inline this function.
TEXT ·emitLiteral(SB), NOSPLIT, $32-56
MOVD dst_base+0(FP), R8
MOVD lit_base+24(FP), R10
MOVD lit_len+32(FP), R3
MOVD R3, R6
MOVW R3, R4
SUBW $1, R4, R4
CMPW $60, R4
BLT oneByte
CMPW $256, R4
BLT twoBytes
threeBytes:
MOVD $0xf4, R2
MOVB R2, 0(R8)
MOVW R4, 1(R8)
ADD $3, R8, R8
ADD $3, R6, R6
B memmove
twoBytes:
MOVD $0xf0, R2
MOVB R2, 0(R8)
MOVB R4, 1(R8)
ADD $2, R8, R8
ADD $2, R6, R6
B memmove
oneByte:
LSLW $2, R4, R4
MOVB R4, 0(R8)
ADD $1, R8, R8
ADD $1, R6, R6
memmove:
MOVD R6, ret+48(FP)
// copy(dst[i:], lit)
//
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
// R8, R10 and R3 as arguments.
MOVD R8, 8(RSP)
MOVD R10, 16(RSP)
MOVD R3, 24(RSP)
CALL runtime·memmove(SB)
RET
// ----------------------------------------------------------------------------
// func emitCopy(dst []byte, offset, length int) int
//
// All local variables fit into registers. The register allocation:
// - R3 length
// - R7 &dst[0]
// - R8 &dst[i]
// - R11 offset
//
// The unusual register allocation of local variables, such as R11 for the
// offset, matches the allocation used at the call site in encodeBlock, which
// makes it easier to manually inline this function.
TEXT ·emitCopy(SB), NOSPLIT, $0-48
MOVD dst_base+0(FP), R8
MOVD R8, R7
MOVD offset+24(FP), R11
MOVD length+32(FP), R3
loop0:
// for length >= 68 { etc }
CMPW $68, R3
BLT step1
// Emit a length 64 copy, encoded as 3 bytes.
MOVD $0xfe, R2
MOVB R2, 0(R8)
MOVW R11, 1(R8)
ADD $3, R8, R8
SUB $64, R3, R3
B loop0
step1:
// if length > 64 { etc }
CMP $64, R3
BLE step2
// Emit a length 60 copy, encoded as 3 bytes.
MOVD $0xee, R2
MOVB R2, 0(R8)
MOVW R11, 1(R8)
ADD $3, R8, R8
SUB $60, R3, R3
step2:
// if length >= 12 || offset >= 2048 { goto step3 }
CMP $12, R3
BGE step3
CMPW $2048, R11
BGE step3
// Emit the remaining copy, encoded as 2 bytes.
MOVB R11, 1(R8)
LSRW $3, R11, R11
AND $0xe0, R11, R11
SUB $4, R3, R3
LSLW $2, R3
AND $0xff, R3, R3
ORRW R3, R11, R11
ORRW $1, R11, R11
MOVB R11, 0(R8)
ADD $2, R8, R8
// Return the number of bytes written.
SUB R7, R8, R8
MOVD R8, ret+40(FP)
RET
step3:
// Emit the remaining copy, encoded as 3 bytes.
SUB $1, R3, R3
AND $0xff, R3, R3
LSLW $2, R3, R3
ORRW $2, R3, R3
MOVB R3, 0(R8)
MOVW R11, 1(R8)
ADD $3, R8, R8
// Return the number of bytes written.
SUB R7, R8, R8
MOVD R8, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func extendMatch(src []byte, i, j int) int
//
// All local variables fit into registers. The register allocation:
// - R6 &src[0]
// - R7 &src[j]
// - R13 &src[len(src) - 8]
// - R14 &src[len(src)]
// - R15 &src[i]
//
// The unusual register allocation of local variables, such as R15 for a source
// pointer, matches the allocation used at the call site in encodeBlock, which
// makes it easier to manually inline this function.
TEXT ·extendMatch(SB), NOSPLIT, $0-48
MOVD src_base+0(FP), R6
MOVD src_len+8(FP), R14
MOVD i+24(FP), R15
MOVD j+32(FP), R7
ADD R6, R14, R14
ADD R6, R15, R15
ADD R6, R7, R7
MOVD R14, R13
SUB $8, R13, R13
cmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMP R13, R7
BHI cmp1
MOVD (R15), R3
MOVD (R7), R4
CMP R4, R3
BNE bsf
ADD $8, R15, R15
ADD $8, R7, R7
B cmp8
bsf:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs.
// RBIT reverses the bit order, then CLZ counts the leading zeros, the
// combination of which finds the least significant bit which is set.
// The arm64 architecture is little-endian, and the shift by 3 converts
// a bit index to a byte index.
EOR R3, R4, R4
RBIT R4, R4
CLZ R4, R4
ADD R4>>3, R7, R7
// Convert from &src[ret] to ret.
SUB R6, R7, R7
MOVD R7, ret+40(FP)
RET
cmp1:
// In src's tail, compare 1 byte at a time.
CMP R7, R14
BLS extendMatchEnd
MOVB (R15), R3
MOVB (R7), R4
CMP R4, R3
BNE extendMatchEnd
ADD $1, R15, R15
ADD $1, R7, R7
B cmp1
extendMatchEnd:
// Convert from &src[ret] to ret.
SUB R6, R7, R7
MOVD R7, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func encodeBlock(dst, src []byte) (d int)
//
// All local variables fit into registers, other than "var table". The register
// allocation:
// - R3 . .
// - R4 . .
// - R5 64 shift
// - R6 72 &src[0], tableSize
// - R7 80 &src[s]
// - R8 88 &dst[d]
// - R9 96 sLimit
// - R10 . &src[nextEmit]
// - R11 104 prevHash, currHash, nextHash, offset
// - R12 112 &src[base], skip
// - R13 . &src[nextS], &src[len(src) - 8]
// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x
// - R15 120 candidate
// - R16 . hash constant, 0x1e35a7bd
// - R17 . &table
// - . 128 table
//
// The second column (64, 72, etc) is the stack offset to spill the registers
// when calling other functions. We could pack this slightly tighter, but it's
// simpler to have a dedicated spill map independent of the function called.
//
// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill
// local variables (registers) during calls gives 32768 + 64 + 64 = 32896.
TEXT ·encodeBlock(SB), 0, $32896-56
MOVD dst_base+0(FP), R8
MOVD src_base+24(FP), R7
MOVD src_len+32(FP), R14
// shift, tableSize := uint32(32-8), 1<<8
MOVD $24, R5
MOVD $256, R6
MOVW $0xa7bd, R16
MOVKW $(0x1e35<<16), R16
calcShift:
// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
// shift--
// }
MOVD $16384, R2
CMP R2, R6
BGE varTable
CMP R14, R6
BGE varTable
SUB $1, R5, R5
LSL $1, R6, R6
B calcShift
varTable:
// var table [maxTableSize]uint16
//
// In the asm code, unlike the Go code, we can zero-initialize only the
// first tableSize elements. Each uint16 element is 2 bytes and each
// iterations writes 64 bytes, so we can do only tableSize/32 writes
// instead of the 2048 writes that would zero-initialize all of table's
// 32768 bytes. This clear could overrun the first tableSize elements, but
// it won't overrun the allocated stack size.
ADD $128, RSP, R17
MOVD R17, R4
// !!! R6 = &src[tableSize]
ADD R6<<1, R17, R6
memclr:
STP.P (ZR, ZR), 64(R4)
STP (ZR, ZR), -48(R4)
STP (ZR, ZR), -32(R4)
STP (ZR, ZR), -16(R4)
CMP R4, R6
BHI memclr
// !!! R6 = &src[0]
MOVD R7, R6
// sLimit := len(src) - inputMargin
MOVD R14, R9
SUB $15, R9, R9
// !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't
// change for the rest of the function.
MOVD R5, 64(RSP)
MOVD R6, 72(RSP)
MOVD R9, 96(RSP)
// nextEmit := 0
MOVD R6, R10
// s := 1
ADD $1, R7, R7
// nextHash := hash(load32(src, s), shift)
MOVW 0(R7), R11
MULW R16, R11, R11
LSRW R5, R11, R11
outer:
// for { etc }
// skip := 32
MOVD $32, R12
// nextS := s
MOVD R7, R13
// candidate := 0
MOVD $0, R15
inner0:
// for { etc }
// s := nextS
MOVD R13, R7
// bytesBetweenHashLookups := skip >> 5
MOVD R12, R14
LSR $5, R14, R14
// nextS = s + bytesBetweenHashLookups
ADD R14, R13, R13
// skip += bytesBetweenHashLookups
ADD R14, R12, R12
// if nextS > sLimit { goto emitRemainder }
MOVD R13, R3
SUB R6, R3, R3
CMP R9, R3
BHI emitRemainder
// candidate = int(table[nextHash])
MOVHU 0(R17)(R11<<1), R15
// table[nextHash] = uint16(s)
MOVD R7, R3
SUB R6, R3, R3
MOVH R3, 0(R17)(R11<<1)
// nextHash = hash(load32(src, nextS), shift)
MOVW 0(R13), R11
MULW R16, R11
LSRW R5, R11, R11
// if load32(src, s) != load32(src, candidate) { continue } break
MOVW 0(R7), R3
MOVW (R6)(R15), R4
CMPW R4, R3
BNE inner0
fourByteMatch:
// As per the encode_other.go code:
//
// A 4-byte match has been found. We'll later see etc.
// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
// on inputMargin in encode.go.
MOVD R7, R3
SUB R10, R3, R3
CMP $16, R3
BLE emitLiteralFastPath
// ----------------------------------------
// Begin inline of the emitLiteral call.
//
// d += emitLiteral(dst[d:], src[nextEmit:s])
MOVW R3, R4
SUBW $1, R4, R4
MOVW $60, R2
CMPW R2, R4
BLT inlineEmitLiteralOneByte
MOVW $256, R2
CMPW R2, R4
BLT inlineEmitLiteralTwoBytes
inlineEmitLiteralThreeBytes:
MOVD $0xf4, R1
MOVB R1, 0(R8)
MOVW R4, 1(R8)
ADD $3, R8, R8
B inlineEmitLiteralMemmove
inlineEmitLiteralTwoBytes:
MOVD $0xf0, R1
MOVB R1, 0(R8)
MOVB R4, 1(R8)
ADD $2, R8, R8
B inlineEmitLiteralMemmove
inlineEmitLiteralOneByte:
LSLW $2, R4, R4
MOVB R4, 0(R8)
ADD $1, R8, R8
inlineEmitLiteralMemmove:
// Spill local variables (registers) onto the stack; call; unspill.
//
// copy(dst[i:], lit)
//
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
// R8, R10 and R3 as arguments.
MOVD R8, 8(RSP)
MOVD R10, 16(RSP)
MOVD R3, 24(RSP)
// Finish the "d +=" part of "d += emitLiteral(etc)".
ADD R3, R8, R8
MOVD R7, 80(RSP)
MOVD R8, 88(RSP)
MOVD R15, 120(RSP)
CALL runtime·memmove(SB)
MOVD 64(RSP), R5
MOVD 72(RSP), R6
MOVD 80(RSP), R7
MOVD 88(RSP), R8
MOVD 96(RSP), R9
MOVD 120(RSP), R15
ADD $128, RSP, R17
MOVW $0xa7bd, R16
MOVKW $(0x1e35<<16), R16
B inner1
inlineEmitLiteralEnd:
// End inline of the emitLiteral call.
// ----------------------------------------
emitLiteralFastPath:
// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
MOVB R3, R4
SUBW $1, R4, R4
AND $0xff, R4, R4
LSLW $2, R4, R4
MOVB R4, (R8)
ADD $1, R8, R8
// !!! Implement the copy from lit to dst as a 16-byte load and store.
// (Encode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only len(lit) bytes, but that's
// OK. Subsequent iterations will fix up the overrun.
//
// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
LDP 0(R10), (R0, R1)
STP (R0, R1), 0(R8)
ADD R3, R8, R8
inner1:
// for { etc }
// base := s
MOVD R7, R12
// !!! offset := base - candidate
MOVD R12, R11
SUB R15, R11, R11
SUB R6, R11, R11
// ----------------------------------------
// Begin inline of the extendMatch call.
//
// s = extendMatch(src, candidate+4, s+4)
// !!! R14 = &src[len(src)]
MOVD src_len+32(FP), R14
ADD R6, R14, R14
// !!! R13 = &src[len(src) - 8]
MOVD R14, R13
SUB $8, R13, R13
// !!! R15 = &src[candidate + 4]
ADD $4, R15, R15
ADD R6, R15, R15
// !!! s += 4
ADD $4, R7, R7
inlineExtendMatchCmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMP R13, R7
BHI inlineExtendMatchCmp1
MOVD (R15), R3
MOVD (R7), R4
CMP R4, R3
BNE inlineExtendMatchBSF
ADD $8, R15, R15
ADD $8, R7, R7
B inlineExtendMatchCmp8
inlineExtendMatchBSF:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs.
// RBIT reverses the bit order, then CLZ counts the leading zeros, the
// combination of which finds the least significant bit which is set.
// The arm64 architecture is little-endian, and the shift by 3 converts
// a bit index to a byte index.
EOR R3, R4, R4
RBIT R4, R4
CLZ R4, R4
ADD R4>>3, R7, R7
B inlineExtendMatchEnd
inlineExtendMatchCmp1:
// In src's tail, compare 1 byte at a time.
CMP R7, R14
BLS inlineExtendMatchEnd
MOVB (R15), R3
MOVB (R7), R4
CMP R4, R3
BNE inlineExtendMatchEnd
ADD $1, R15, R15
ADD $1, R7, R7
B inlineExtendMatchCmp1
inlineExtendMatchEnd:
// End inline of the extendMatch call.
// ----------------------------------------
// ----------------------------------------
// Begin inline of the emitCopy call.
//
// d += emitCopy(dst[d:], base-candidate, s-base)
// !!! length := s - base
MOVD R7, R3
SUB R12, R3, R3
inlineEmitCopyLoop0:
// for length >= 68 { etc }
MOVW $68, R2
CMPW R2, R3
BLT inlineEmitCopyStep1
// Emit a length 64 copy, encoded as 3 bytes.
MOVD $0xfe, R1
MOVB R1, 0(R8)
MOVW R11, 1(R8)
ADD $3, R8, R8
SUBW $64, R3, R3
B inlineEmitCopyLoop0
inlineEmitCopyStep1:
// if length > 64 { etc }
MOVW $64, R2
CMPW R2, R3
BLE inlineEmitCopyStep2
// Emit a length 60 copy, encoded as 3 bytes.
MOVD $0xee, R1
MOVB R1, 0(R8)
MOVW R11, 1(R8)
ADD $3, R8, R8
SUBW $60, R3, R3
inlineEmitCopyStep2:
// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
MOVW $12, R2
CMPW R2, R3
BGE inlineEmitCopyStep3
MOVW $2048, R2
CMPW R2, R11
BGE inlineEmitCopyStep3
// Emit the remaining copy, encoded as 2 bytes.
MOVB R11, 1(R8)
LSRW $8, R11, R11
LSLW $5, R11, R11
SUBW $4, R3, R3
AND $0xff, R3, R3
LSLW $2, R3, R3
ORRW R3, R11, R11
ORRW $1, R11, R11
MOVB R11, 0(R8)
ADD $2, R8, R8
B inlineEmitCopyEnd
inlineEmitCopyStep3:
// Emit the remaining copy, encoded as 3 bytes.
SUBW $1, R3, R3
LSLW $2, R3, R3
ORRW $2, R3, R3
MOVB R3, 0(R8)
MOVW R11, 1(R8)
ADD $3, R8, R8
inlineEmitCopyEnd:
// End inline of the emitCopy call.
// ----------------------------------------
// nextEmit = s
MOVD R7, R10
// if s >= sLimit { goto emitRemainder }
MOVD R7, R3
SUB R6, R3, R3
CMP R3, R9
BLS emitRemainder
// As per the encode_other.go code:
//
// We could immediately etc.
// x := load64(src, s-1)
MOVD -1(R7), R14
// prevHash := hash(uint32(x>>0), shift)
MOVW R14, R11
MULW R16, R11, R11
LSRW R5, R11, R11
// table[prevHash] = uint16(s-1)
MOVD R7, R3
SUB R6, R3, R3
SUB $1, R3, R3
MOVHU R3, 0(R17)(R11<<1)
// currHash := hash(uint32(x>>8), shift)
LSR $8, R14, R14
MOVW R14, R11
MULW R16, R11, R11
LSRW R5, R11, R11
// candidate = int(table[currHash])
MOVHU 0(R17)(R11<<1), R15
// table[currHash] = uint16(s)
ADD $1, R3, R3
MOVHU R3, 0(R17)(R11<<1)
// if uint32(x>>8) == load32(src, candidate) { continue }
MOVW (R6)(R15), R4
CMPW R4, R14
BEQ inner1
// nextHash = hash(uint32(x>>16), shift)
LSR $8, R14, R14
MOVW R14, R11
MULW R16, R11, R11
LSRW R5, R11, R11
// s++
ADD $1, R7, R7
// break out of the inner1 for loop, i.e. continue the outer loop.
B outer
emitRemainder:
// if nextEmit < len(src) { etc }
MOVD src_len+32(FP), R3
ADD R6, R3, R3
CMP R3, R10
BEQ encodeBlockEnd
// d += emitLiteral(dst[d:], src[nextEmit:])
//
// Push args.
MOVD R8, 8(RSP)
MOVD $0, 16(RSP) // Unnecessary, as the callee ignores it, but conservative.
MOVD $0, 24(RSP) // Unnecessary, as the callee ignores it, but conservative.
MOVD R10, 32(RSP)
SUB R10, R3, R3
MOVD R3, 40(RSP)
MOVD R3, 48(RSP) // Unnecessary, as the callee ignores it, but conservative.
// Spill local variables (registers) onto the stack; call; unspill.
MOVD R8, 88(RSP)
CALL ·emitLiteral(SB)
MOVD 88(RSP), R8
// Finish the "d +=" part of "d += emitLiteral(etc)".
MOVD 56(RSP), R1
ADD R1, R8, R8
encodeBlockEnd:
MOVD dst_base+0(FP), R3
SUB R3, R8, R8
MOVD R8, d+48(FP)
RET

View File

@ -1,30 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
// +build amd64 arm64
package snappy
// emitLiteral has the same semantics as in encode_other.go.
//
//go:noescape
func emitLiteral(dst, lit []byte) int
// emitCopy has the same semantics as in encode_other.go.
//
//go:noescape
func emitCopy(dst []byte, offset, length int) int
// extendMatch has the same semantics as in encode_other.go.
//
//go:noescape
func extendMatch(src []byte, i, j int) int
// encodeBlock has the same semantics as in encode_other.go.
//
//go:noescape
func encodeBlock(dst, src []byte) (d int)

View File

@ -1,238 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64,!arm64 appengine !gc noasm
package snappy
func load32(b []byte, i int) uint32 {
b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load64(b []byte, i int) uint64 {
b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
// dst is long enough to hold the encoded bytes
// 1 <= len(lit) && len(lit) <= 65536
func emitLiteral(dst, lit []byte) int {
i, n := 0, uint(len(lit)-1)
switch {
case n < 60:
dst[0] = uint8(n)<<2 | tagLiteral
i = 1
case n < 1<<8:
dst[0] = 60<<2 | tagLiteral
dst[1] = uint8(n)
i = 2
default:
dst[0] = 61<<2 | tagLiteral
dst[1] = uint8(n)
dst[2] = uint8(n >> 8)
i = 3
}
return i + copy(dst[i:], lit)
}
// emitCopy writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
// dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= 65535
// 4 <= length && length <= 65535
func emitCopy(dst []byte, offset, length int) int {
i := 0
// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
// threshold for this loop is a little higher (at 68 = 64 + 4), and the
// length emitted down below is is a little lower (at 60 = 64 - 4), because
// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
// 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
// tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
// encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
for length >= 68 {
// Emit a length 64 copy, encoded as 3 bytes.
dst[i+0] = 63<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
i += 3
length -= 64
}
if length > 64 {
// Emit a length 60 copy, encoded as 3 bytes.
dst[i+0] = 59<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
i += 3
length -= 60
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.
dst[i+0] = uint8(length-1)<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
return i + 3
}
// Emit the remaining copy, encoded as 2 bytes.
dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
dst[i+1] = uint8(offset)
return i + 2
}
// extendMatch returns the largest k such that k <= len(src) and that
// src[i:i+k-j] and src[j:k] have the same contents.
//
// It assumes that:
// 0 <= i && i < j && j <= len(src)
func extendMatch(src []byte, i, j int) int {
for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
}
return j
}
func hash(u, shift uint32) uint32 {
return (u * 0x1e35a7bd) >> shift
}
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlock(dst, src []byte) (d int) {
// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
// The table element type is uint16, as s < sLimit and sLimit < len(src)
// and len(src) <= maxBlockSize and maxBlockSize == 65536.
const (
maxTableSize = 1 << 14
// tableMask is redundant, but helps the compiler eliminate bounds
// checks.
tableMask = maxTableSize - 1
)
shift := uint32(32 - 8)
for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
shift--
}
// In Go, all array elements are zero-initialized, so there is no advantage
// to a smaller tableSize per se. However, it matches the C++ algorithm,
// and in the asm versions of this code, we can get away with zeroing only
// the first tableSize elements.
var table [maxTableSize]uint16
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 1
nextHash := hash(load32(src, s), shift)
for {
// Copied from the C++ snappy implementation:
//
// Heuristic match skipping: If 32 bytes are scanned with no matches
// found, start looking only at every other byte. If 32 more bytes are
// scanned (or skipped), look at every third byte, etc.. When a match
// is found, immediately go back to looking at every byte. This is a
// small loss (~5% performance, ~0.1% density) for compressible data
// due to more bookkeeping, but for non-compressible data (such as
// JPEG) it's a huge win since the compressor quickly "realizes" the
// data is incompressible and doesn't bother looking for matches
// everywhere.
//
// The "skip" variable keeps track of how many bytes there are since
// the last match; dividing it by 32 (ie. right-shifting by five) gives
// the number of bytes to move ahead for each iteration.
skip := 32
nextS := s
candidate := 0
for {
s = nextS
bytesBetweenHashLookups := skip >> 5
nextS = s + bytesBetweenHashLookups
skip += bytesBetweenHashLookups
if nextS > sLimit {
goto emitRemainder
}
candidate = int(table[nextHash&tableMask])
table[nextHash&tableMask] = uint16(s)
nextHash = hash(load32(src, nextS), shift)
if load32(src, s) == load32(src, candidate) {
break
}
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
d += emitLiteral(dst[d:], src[nextEmit:s])
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
base := s
// Extend the 4-byte match as long as possible.
//
// This is an inlined version of:
// s = extendMatch(src, candidate+4, s+4)
s += 4
for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
}
d += emitCopy(dst[d:], base-candidate, s-base)
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load64(src, s-1)
prevHash := hash(uint32(x>>0), shift)
table[prevHash&tableMask] = uint16(s - 1)
currHash := hash(uint32(x>>8), shift)
candidate = int(table[currHash&tableMask])
table[currHash&tableMask] = uint16(s)
if uint32(x>>8) != load32(src, candidate) {
nextHash = hash(uint32(x>>16), shift)
s++
break
}
}
}
emitRemainder:
if nextEmit < len(src) {
d += emitLiteral(dst[d:], src[nextEmit:])
}
return d
}

View File

@ -1,98 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package snappy implements the Snappy compression format. It aims for very
// high speeds and reasonable compression.
//
// There are actually two Snappy formats: block and stream. They are related,
// but different: trying to decompress block-compressed data as a Snappy stream
// will fail, and vice versa. The block format is the Decode and Encode
// functions and the stream format is the Reader and Writer types.
//
// The block format, the more common case, is used when the complete size (the
// number of bytes) of the original data is known upfront, at the time
// compression starts. The stream format, also known as the framing format, is
// for when that isn't always true.
//
// The canonical, C++ implementation is at https://github.com/google/snappy and
// it only implements the block format.
package snappy // import "github.com/golang/snappy"
import (
"hash/crc32"
)
/*
Each encoded block begins with the varint-encoded length of the decoded data,
followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
first byte of each chunk is broken into its 2 least and 6 most significant bits
called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
Zero means a literal tag. All other values mean a copy tag.
For literal tags:
- If m < 60, the next 1 + m bytes are literal bytes.
- Otherwise, let n be the little-endian unsigned integer denoted by the next
m - 59 bytes. The next 1 + n bytes after that are literal bytes.
For copy tags, length bytes are copied from offset bytes ago, in the style of
Lempel-Ziv compression algorithms. In particular:
- For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
of the offset. The next byte is bits 0-7 of the offset.
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
The length is 1 + m. The offset is the little-endian unsigned integer
denoted by the next 2 bytes.
- For l == 3, this tag is a legacy format that is no longer issued by most
encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
[1, 65). The length is 1 + m. The offset is the little-endian unsigned
integer denoted by the next 4 bytes.
*/
const (
tagLiteral = 0x00
tagCopy1 = 0x01
tagCopy2 = 0x02
tagCopy4 = 0x03
)
const (
checksumSize = 4
chunkHeaderSize = 4
magicChunk = "\xff\x06\x00\x00" + magicBody
magicBody = "sNaPpY"
// maxBlockSize is the maximum size of the input to encodeBlock. It is not
// part of the wire format per se, but some parts of the encoder assume
// that an offset fits into a uint16.
//
// Also, for the framing format (Writer type instead of Encode function),
// https://github.com/google/snappy/blob/master/framing_format.txt says
// that "the uncompressed data in a chunk must be no longer than 65536
// bytes".
maxBlockSize = 65536
// maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is
// hard coded to be a const instead of a variable, so that obufLen can also
// be a const. Their equivalence is confirmed by
// TestMaxEncodedLenOfMaxBlockSize.
maxEncodedLenOfMaxBlockSize = 76490
obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize
obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize
)
const (
chunkTypeCompressedData = 0x00
chunkTypeUncompressedData = 0x01
chunkTypePadding = 0xfe
chunkTypeStreamIdentifier = 0xff
)
var crcTable = crc32.MakeTable(crc32.Castagnoli)
// crc implements the checksum specified in section 3 of
// https://github.com/google/snappy/blob/master/framing_format.txt
func crc(b []byte) uint32 {
c := crc32.Update(0, crcTable, b)
return uint32(c>>15|c<<17) + 0xa282ead8
}

View File

@ -1,36 +0,0 @@
# Created by https://www.gitignore.io/api/macos
### macOS ###
*.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# End of https://www.gitignore.io/api/macos
cmd/*/*exe
.idea
fuzz/*.zip

View File

@ -1,28 +0,0 @@
Copyright (c) 2015, Pierre Curto
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of xxHash nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,92 +0,0 @@
# lz4 : LZ4 compression in pure Go
[![Go Reference](https://pkg.go.dev/badge/github.com/pierrec/lz4/v4.svg)](https://pkg.go.dev/github.com/pierrec/lz4/v4)
[![CI](https://github.com/pierrec/lz4/workflows/ci/badge.svg)](https://github.com/pierrec/lz4/actions)
[![Go Report Card](https://goreportcard.com/badge/github.com/pierrec/lz4)](https://goreportcard.com/report/github.com/pierrec/lz4)
[![GitHub tag (latest SemVer)](https://img.shields.io/github/tag/pierrec/lz4.svg?style=social)](https://github.com/pierrec/lz4/tags)
## Overview
This package provides a streaming interface to [LZ4 data streams](http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html) as well as low level compress and uncompress functions for LZ4 data blocks.
The implementation is based on the reference C [one](https://github.com/lz4/lz4).
## Install
Assuming you have the go toolchain installed:
```
go get github.com/pierrec/lz4/v4
```
There is a command line interface tool to compress and decompress LZ4 files.
```
go install github.com/pierrec/lz4/v4/cmd/lz4c@latest
```
Usage
```
Usage of lz4c:
-version
print the program version
Subcommands:
Compress the given files or from stdin to stdout.
compress [arguments] [<file name> ...]
-bc
enable block checksum
-l int
compression level (0=fastest)
-sc
disable stream checksum
-size string
block max size [64K,256K,1M,4M] (default "4M")
Uncompress the given files or from stdin to stdout.
uncompress [arguments] [<file name> ...]
```
## Example
```
// Compress and uncompress an input string.
s := "hello world"
r := strings.NewReader(s)
// The pipe will uncompress the data from the writer.
pr, pw := io.Pipe()
zw := lz4.NewWriter(pw)
zr := lz4.NewReader(pr)
go func() {
// Compress the input string.
_, _ = io.Copy(zw, r)
_ = zw.Close() // Make sure the writer is closed
_ = pw.Close() // Terminate the pipe
}()
_, _ = io.Copy(os.Stdout, zr)
// Output:
// hello world
```
## Contributing
Contributions are very welcome for bug fixing, performance improvements...!
- Open an issue with a proper description
- Send a pull request with appropriate test case(s)
## Contributors
Thanks to all [contributors](https://github.com/pierrec/lz4/graphs/contributors) so far!
Special thanks to [@Zariel](https://github.com/Zariel) for his asm implementation of the decoder.
Special thanks to [@greatroar](https://github.com/greatroar) for his work on the asm implementations of the decoder for amd64 and arm64.
Special thanks to [@klauspost](https://github.com/klauspost) for his work on optimizing the code.

View File

@ -1,222 +0,0 @@
package lz4
import (
"errors"
"io"
"github.com/pierrec/lz4/v4/internal/lz4block"
"github.com/pierrec/lz4/v4/internal/lz4errors"
"github.com/pierrec/lz4/v4/internal/lz4stream"
)
type crState int
const (
crStateInitial crState = iota
crStateReading
crStateFlushing
crStateDone
)
type CompressingReader struct {
state crState
src io.ReadCloser // source reader
level lz4block.CompressionLevel // how hard to try
frame *lz4stream.Frame // frame being built
in []byte
out ovWriter
handler func(int)
}
// NewCompressingReader creates a reader which reads compressed data from
// raw stream. This makes it a logical opposite of a normal lz4.Reader.
// We require an io.ReadCloser as an underlying source for compatibility
// with Go's http.Request.
func NewCompressingReader(src io.ReadCloser) *CompressingReader {
zrd := &CompressingReader {
frame: lz4stream.NewFrame(),
}
_ = zrd.Apply(DefaultBlockSizeOption, DefaultChecksumOption, defaultOnBlockDone)
zrd.Reset(src)
return zrd
}
// Source exposes the underlying source stream for introspection and control.
func (zrd *CompressingReader) Source() io.ReadCloser {
return zrd.src
}
// Close simply invokes the underlying stream Close method. This method is
// provided for the benefit of Go http client/server, which relies on Close
// for goroutine termination.
func (zrd *CompressingReader) Close() error {
return zrd.src.Close()
}
// Apply applies useful options to the lz4 encoder.
func (zrd *CompressingReader) Apply(options ...Option) (err error) {
if zrd.state != crStateInitial {
return lz4errors.ErrOptionClosedOrError
}
zrd.Reset(zrd.src)
for _, o := range options {
if err = o(zrd); err != nil {
return
}
}
return
}
func (*CompressingReader) private() {}
func (zrd *CompressingReader) init() error {
zrd.frame.InitW(&zrd.out, 1, false)
size := zrd.frame.Descriptor.Flags.BlockSizeIndex()
zrd.in = size.Get()
return zrd.frame.Descriptor.Write(zrd.frame, &zrd.out)
}
// Read allows reading of lz4 compressed data
func (zrd *CompressingReader) Read(p []byte) (n int, err error) {
defer func() {
if err != nil {
zrd.state = crStateDone
}
}()
if !zrd.out.reset(p) {
return len(p), nil
}
switch zrd.state {
case crStateInitial:
err = zrd.init()
if err != nil {
return
}
zrd.state = crStateReading
case crStateDone:
return 0, errors.New("This reader is done")
case crStateFlushing:
if zrd.out.dataPos > 0 {
n = zrd.out.dataPos
zrd.out.data = nil
zrd.out.dataPos = 0
return
} else {
zrd.state = crStateDone
return 0, io.EOF
}
}
for zrd.state == crStateReading {
block := zrd.frame.Blocks.Block
var rCount int
rCount, err = io.ReadFull(zrd.src, zrd.in)
switch err {
case nil:
err = block.Compress(
zrd.frame, zrd.in[ : rCount], zrd.level,
).Write(zrd.frame, &zrd.out)
zrd.handler(len(block.Data))
if err != nil {
return
}
if zrd.out.dataPos == len(zrd.out.data) {
n = zrd.out.dataPos
zrd.out.dataPos = 0
zrd.out.data = nil
return
}
case io.EOF, io.ErrUnexpectedEOF: // read may be partial
if rCount > 0 {
err = block.Compress(
zrd.frame, zrd.in[ : rCount], zrd.level,
).Write(zrd.frame, &zrd.out)
zrd.handler(len(block.Data))
if err != nil {
return
}
}
err = zrd.frame.CloseW(&zrd.out, 1)
if err != nil {
return
}
zrd.state = crStateFlushing
n = zrd.out.dataPos
zrd.out.dataPos = 0
zrd.out.data = nil
return
default:
return
}
}
err = lz4errors.ErrInternalUnhandledState
return
}
// Reset makes the stream usable again; mostly handy to reuse lz4 encoder
// instances.
func (zrd *CompressingReader) Reset(src io.ReadCloser) {
zrd.frame.Reset(1)
zrd.state = crStateInitial
zrd.src = src
zrd.out.clear()
}
type ovWriter struct {
data []byte
ov []byte
dataPos int
ovPos int
}
func (wr *ovWriter) Write(p []byte) (n int, err error) {
count := copy(wr.data[wr.dataPos : ], p)
wr.dataPos += count
if count < len(p) {
wr.ov = append(wr.ov, p[count : ]...)
}
return len(p), nil
}
func (wr *ovWriter) reset(out []byte) bool {
ovRem := len(wr.ov) - wr.ovPos
if ovRem >= len(out) {
wr.ovPos += copy(out, wr.ov[wr.ovPos : ])
return false
}
if ovRem > 0 {
copy(out, wr.ov[wr.ovPos : ])
wr.ov = wr.ov[ : 0]
wr.ovPos = 0
wr.dataPos = ovRem
} else if wr.ovPos > 0 {
wr.ov = wr.ov[ : 0]
wr.ovPos = 0
wr.dataPos = 0
}
wr.data = out
return true
}
func (wr *ovWriter) clear() {
wr.data = nil
wr.dataPos = 0
wr.ov = wr.ov[ : 0]
wr.ovPos = 0
}

View File

@ -1,481 +0,0 @@
package lz4block
import (
"encoding/binary"
"math/bits"
"sync"
"github.com/pierrec/lz4/v4/internal/lz4errors"
)
const (
// The following constants are used to setup the compression algorithm.
minMatch = 4 // the minimum size of the match sequence size (4 bytes)
winSizeLog = 16 // LZ4 64Kb window size limit
winSize = 1 << winSizeLog
winMask = winSize - 1 // 64Kb window of previous data for dependent blocks
// hashLog determines the size of the hash table used to quickly find a previous match position.
// Its value influences the compression speed and memory usage, the lower the faster,
// but at the expense of the compression ratio.
// 16 seems to be the best compromise for fast compression.
hashLog = 16
htSize = 1 << hashLog
mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes.
)
func recoverBlock(e *error) {
if r := recover(); r != nil && *e == nil {
*e = lz4errors.ErrInvalidSourceShortBuffer
}
}
// blockHash hashes the lower 6 bytes into a value < htSize.
func blockHash(x uint64) uint32 {
const prime6bytes = 227718039650203
return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog))
}
func CompressBlockBound(n int) int {
return n + n/255 + 16
}
func UncompressBlock(src, dst, dict []byte) (int, error) {
if len(src) == 0 {
return 0, nil
}
if di := decodeBlock(dst, src, dict); di >= 0 {
return di, nil
}
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
type Compressor struct {
// Offsets are at most 64kiB, so we can store only the lower 16 bits of
// match positions: effectively, an offset from some 64kiB block boundary.
//
// When we retrieve such an offset, we interpret it as relative to the last
// block boundary si &^ 0xffff, or the one before, (si &^ 0xffff) - 0x10000,
// depending on which of these is inside the current window. If a table
// entry was generated more than 64kiB back in the input, we find out by
// inspecting the input stream.
table [htSize]uint16
// Bitmap indicating which positions in the table are in use.
// This allows us to quickly reset the table for reuse,
// without having to zero everything.
inUse [htSize / 32]uint32
}
// Get returns the position of a presumptive match for the hash h.
// The match may be a false positive due to a hash collision or an old entry.
// If si < winSize, the return value may be negative.
func (c *Compressor) get(h uint32, si int) int {
h &= htSize - 1
i := 0
if c.inUse[h/32]&(1<<(h%32)) != 0 {
i = int(c.table[h])
}
i += si &^ winMask
if i >= si {
// Try previous 64kiB block (negative when in first block).
i -= winSize
}
return i
}
func (c *Compressor) put(h uint32, si int) {
h &= htSize - 1
c.table[h] = uint16(si)
c.inUse[h/32] |= 1 << (h % 32)
}
func (c *Compressor) reset() { c.inUse = [htSize / 32]uint32{} }
var compressorPool = sync.Pool{New: func() interface{} { return new(Compressor) }}
func CompressBlock(src, dst []byte) (int, error) {
c := compressorPool.Get().(*Compressor)
n, err := c.CompressBlock(src, dst)
compressorPool.Put(c)
return n, err
}
func (c *Compressor) CompressBlock(src, dst []byte) (int, error) {
// Zero out reused table to avoid non-deterministic output (issue #65).
c.reset()
// Return 0, nil only if the destination buffer size is < CompressBlockBound.
isNotCompressible := len(dst) < CompressBlockBound(len(src))
// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
// This significantly speeds up incompressible data and usually has very small impact on compression.
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
const adaptSkipLog = 7
// si: Current position of the search.
// anchor: Position of the current literals.
var si, di, anchor int
sn := len(src) - mfLimit
if sn <= 0 {
goto lastLiterals
}
// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
for si < sn {
// Hash the next 6 bytes (sequence)...
match := binary.LittleEndian.Uint64(src[si:])
h := blockHash(match)
h2 := blockHash(match >> 8)
// We check a match at s, s+1 and s+2 and pick the first one we get.
// Checking 3 only requires us to load the source one.
ref := c.get(h, si)
ref2 := c.get(h2, si+1)
c.put(h, si)
c.put(h2, si+1)
offset := si - ref
if offset <= 0 || offset >= winSize || uint32(match) != binary.LittleEndian.Uint32(src[ref:]) {
// No match. Start calculating another hash.
// The processor can usually do this out-of-order.
h = blockHash(match >> 16)
ref3 := c.get(h, si+2)
// Check the second match at si+1
si += 1
offset = si - ref2
if offset <= 0 || offset >= winSize || uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
// No match. Check the third match at si+2
si += 1
offset = si - ref3
c.put(h, si)
if offset <= 0 || offset >= winSize || uint32(match>>16) != binary.LittleEndian.Uint32(src[ref3:]) {
// Skip one extra byte (at si+3) before we check 3 matches again.
si += 2 + (si-anchor)>>adaptSkipLog
continue
}
}
}
// Match found.
lLen := si - anchor // Literal length.
// We already matched 4 bytes.
mLen := 4
// Extend backwards if we can, reducing literals.
tOff := si - offset - 1
for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] {
si--
tOff--
lLen--
mLen++
}
// Add the match length, so we continue search at the end.
// Use mLen to store the offset base.
si, mLen = si+mLen, si+minMatch
// Find the longest match by looking by batches of 8 bytes.
for si+8 <= sn {
x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
if x == 0 {
si += 8
} else {
// Stop is first non-zero byte.
si += bits.TrailingZeros64(x) >> 3
break
}
}
mLen = si - mLen
if di >= len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
if mLen < 0xF {
dst[di] = byte(mLen)
} else {
dst[di] = 0xF
}
// Encode literals length.
if lLen < 0xF {
dst[di] |= byte(lLen << 4)
} else {
dst[di] |= 0xF0
di++
l := lLen - 0xF
for ; l >= 0xFF && di < len(dst); l -= 0xFF {
dst[di] = 0xFF
di++
}
if di >= len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
dst[di] = byte(l)
}
di++
// Literals.
if di+lLen > len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
di += lLen + 2
anchor = si
// Encode offset.
if di > len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
// Encode match length part 2.
if mLen >= 0xF {
for mLen -= 0xF; mLen >= 0xFF && di < len(dst); mLen -= 0xFF {
dst[di] = 0xFF
di++
}
if di >= len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
dst[di] = byte(mLen)
di++
}
// Check if we can load next values.
if si >= sn {
break
}
// Hash match end-2
h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
c.put(h, si-2)
}
lastLiterals:
if isNotCompressible && anchor == 0 {
// Incompressible.
return 0, nil
}
// Last literals.
if di >= len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
lLen := len(src) - anchor
if lLen < 0xF {
dst[di] = byte(lLen << 4)
} else {
dst[di] = 0xF0
di++
for lLen -= 0xF; lLen >= 0xFF && di < len(dst); lLen -= 0xFF {
dst[di] = 0xFF
di++
}
if di >= len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
dst[di] = byte(lLen)
}
di++
// Write the last literals.
if isNotCompressible && di >= anchor {
// Incompressible.
return 0, nil
}
if di+len(src)-anchor > len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
return di, nil
}
// blockHash hashes 4 bytes into a value < winSize.
func blockHashHC(x uint32) uint32 {
const hasher uint32 = 2654435761 // Knuth multiplicative hash.
return x * hasher >> (32 - winSizeLog)
}
type CompressorHC struct {
// hashTable: stores the last position found for a given hash
// chainTable: stores previous positions for a given hash
hashTable, chainTable [htSize]int
needsReset bool
}
var compressorHCPool = sync.Pool{New: func() interface{} { return new(CompressorHC) }}
func CompressBlockHC(src, dst []byte, depth CompressionLevel) (int, error) {
c := compressorHCPool.Get().(*CompressorHC)
n, err := c.CompressBlock(src, dst, depth)
compressorHCPool.Put(c)
return n, err
}
func (c *CompressorHC) CompressBlock(src, dst []byte, depth CompressionLevel) (_ int, err error) {
if c.needsReset {
// Zero out reused table to avoid non-deterministic output (issue #65).
c.hashTable = [htSize]int{}
c.chainTable = [htSize]int{}
}
c.needsReset = true // Only false on first call.
defer recoverBlock(&err)
// Return 0, nil only if the destination buffer size is < CompressBlockBound.
isNotCompressible := len(dst) < CompressBlockBound(len(src))
// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
// This significantly speeds up incompressible data and usually has very small impact on compression.
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
const adaptSkipLog = 7
var si, di, anchor int
sn := len(src) - mfLimit
if sn <= 0 {
goto lastLiterals
}
if depth == 0 {
depth = winSize
}
for si < sn {
// Hash the next 4 bytes (sequence).
match := binary.LittleEndian.Uint32(src[si:])
h := blockHashHC(match)
// Follow the chain until out of window and give the longest match.
mLen := 0
offset := 0
for next, try := c.hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next, try = c.chainTable[next&winMask], try-1 {
// The first (mLen==0) or next byte (mLen>=minMatch) at current match length
// must match to improve on the match length.
if src[next+mLen] != src[si+mLen] {
continue
}
ml := 0
// Compare the current position with a previous with the same hash.
for ml < sn-si {
x := binary.LittleEndian.Uint64(src[next+ml:]) ^ binary.LittleEndian.Uint64(src[si+ml:])
if x == 0 {
ml += 8
} else {
// Stop is first non-zero byte.
ml += bits.TrailingZeros64(x) >> 3
break
}
}
if ml < minMatch || ml <= mLen {
// Match too small (<minMath) or smaller than the current match.
continue
}
// Found a longer match, keep its position and length.
mLen = ml
offset = si - next
// Try another previous position with the same hash.
}
c.chainTable[si&winMask] = c.hashTable[h]
c.hashTable[h] = si
// No match found.
if mLen == 0 {
si += 1 + (si-anchor)>>adaptSkipLog
continue
}
// Match found.
// Update hash/chain tables with overlapping bytes:
// si already hashed, add everything from si+1 up to the match length.
winStart := si + 1
if ws := si + mLen - winSize; ws > winStart {
winStart = ws
}
for si, ml := winStart, si+mLen; si < ml; {
match >>= 8
match |= uint32(src[si+3]) << 24
h := blockHashHC(match)
c.chainTable[si&winMask] = c.hashTable[h]
c.hashTable[h] = si
si++
}
lLen := si - anchor
si += mLen
mLen -= minMatch // Match length does not include minMatch.
if mLen < 0xF {
dst[di] = byte(mLen)
} else {
dst[di] = 0xF
}
// Encode literals length.
if lLen < 0xF {
dst[di] |= byte(lLen << 4)
} else {
dst[di] |= 0xF0
di++
l := lLen - 0xF
for ; l >= 0xFF; l -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(l)
}
di++
// Literals.
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
di += lLen
anchor = si
// Encode offset.
di += 2
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
// Encode match length part 2.
if mLen >= 0xF {
for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(mLen)
di++
}
}
if isNotCompressible && anchor == 0 {
// Incompressible.
return 0, nil
}
// Last literals.
lastLiterals:
lLen := len(src) - anchor
if lLen < 0xF {
dst[di] = byte(lLen << 4)
} else {
dst[di] = 0xF0
di++
lLen -= 0xF
for ; lLen >= 0xFF; lLen -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(lLen)
}
di++
// Write the last literals.
if isNotCompressible && di >= anchor {
// Incompressible.
return 0, nil
}
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
return di, nil
}

View File

@ -1,87 +0,0 @@
// Package lz4block provides LZ4 BlockSize types and pools of buffers.
package lz4block
import "sync"
const (
Block64Kb uint32 = 1 << (16 + iota*2)
Block256Kb
Block1Mb
Block4Mb
Block8Mb = 2 * Block4Mb
)
var (
BlockPool64K = sync.Pool{New: func() interface{} { return make([]byte, Block64Kb) }}
BlockPool256K = sync.Pool{New: func() interface{} { return make([]byte, Block256Kb) }}
BlockPool1M = sync.Pool{New: func() interface{} { return make([]byte, Block1Mb) }}
BlockPool4M = sync.Pool{New: func() interface{} { return make([]byte, Block4Mb) }}
BlockPool8M = sync.Pool{New: func() interface{} { return make([]byte, Block8Mb) }}
)
func Index(b uint32) BlockSizeIndex {
switch b {
case Block64Kb:
return 4
case Block256Kb:
return 5
case Block1Mb:
return 6
case Block4Mb:
return 7
case Block8Mb: // only valid in legacy mode
return 3
}
return 0
}
func IsValid(b uint32) bool {
return Index(b) > 0
}
type BlockSizeIndex uint8
func (b BlockSizeIndex) IsValid() bool {
switch b {
case 4, 5, 6, 7:
return true
}
return false
}
func (b BlockSizeIndex) Get() []byte {
var buf interface{}
switch b {
case 4:
buf = BlockPool64K.Get()
case 5:
buf = BlockPool256K.Get()
case 6:
buf = BlockPool1M.Get()
case 7:
buf = BlockPool4M.Get()
case 3:
buf = BlockPool8M.Get()
}
return buf.([]byte)
}
func Put(buf []byte) {
// Safeguard: do not allow invalid buffers.
switch c := cap(buf); uint32(c) {
case Block64Kb:
BlockPool64K.Put(buf[:c])
case Block256Kb:
BlockPool256K.Put(buf[:c])
case Block1Mb:
BlockPool1M.Put(buf[:c])
case Block4Mb:
BlockPool4M.Put(buf[:c])
case Block8Mb:
BlockPool8M.Put(buf[:c])
}
}
type CompressionLevel uint32
const Fast CompressionLevel = 0

View File

@ -1,448 +0,0 @@
// +build !appengine
// +build gc
// +build !noasm
#include "go_asm.h"
#include "textflag.h"
// AX scratch
// BX scratch
// CX literal and match lengths
// DX token, match offset
//
// DI &dst
// SI &src
// R8 &dst + len(dst)
// R9 &src + len(src)
// R11 &dst
// R12 short output end
// R13 short input end
// R14 &dict
// R15 len(dict)
// func decodeBlock(dst, src, dict []byte) int
TEXT ·decodeBlock(SB), NOSPLIT, $48-80
MOVQ dst_base+0(FP), DI
MOVQ DI, R11
MOVQ dst_len+8(FP), R8
ADDQ DI, R8
MOVQ src_base+24(FP), SI
MOVQ src_len+32(FP), R9
CMPQ R9, $0
JE err_corrupt
ADDQ SI, R9
MOVQ dict_base+48(FP), R14
MOVQ dict_len+56(FP), R15
// shortcut ends
// short output end
MOVQ R8, R12
SUBQ $32, R12
// short input end
MOVQ R9, R13
SUBQ $16, R13
XORL CX, CX
loop:
// token := uint32(src[si])
MOVBLZX (SI), DX
INCQ SI
// lit_len = token >> 4
// if lit_len > 0
// CX = lit_len
MOVL DX, CX
SHRL $4, CX
// if lit_len != 0xF
CMPL CX, $0xF
JEQ lit_len_loop
CMPQ DI, R12
JAE copy_literal
CMPQ SI, R13
JAE copy_literal
// copy shortcut
// A two-stage shortcut for the most common case:
// 1) If the literal length is 0..14, and there is enough space,
// enter the shortcut and copy 16 bytes on behalf of the literals
// (in the fast mode, only 8 bytes can be safely copied this way).
// 2) Further if the match length is 4..18, copy 18 bytes in a similar
// manner; but we ensure that there's enough space in the output for
// those 18 bytes earlier, upon entering the shortcut (in other words,
// there is a combined check for both stages).
// copy literal
MOVOU (SI), X0
MOVOU X0, (DI)
ADDQ CX, DI
ADDQ CX, SI
MOVL DX, CX
ANDL $0xF, CX
// The second stage: prepare for match copying, decode full info.
// If it doesn't work out, the info won't be wasted.
// offset := uint16(data[:2])
MOVWLZX (SI), DX
TESTL DX, DX
JE err_corrupt
ADDQ $2, SI
JC err_short_buf
MOVQ DI, AX
SUBQ DX, AX
JC err_corrupt
CMPQ AX, DI
JA err_short_buf
// if we can't do the second stage then jump straight to read the
// match length, we already have the offset.
CMPL CX, $0xF
JEQ match_len_loop_pre
CMPL DX, $8
JLT match_len_loop_pre
CMPQ AX, R11
JB match_len_loop_pre
// memcpy(op + 0, match + 0, 8);
MOVQ (AX), BX
MOVQ BX, (DI)
// memcpy(op + 8, match + 8, 8);
MOVQ 8(AX), BX
MOVQ BX, 8(DI)
// memcpy(op +16, match +16, 2);
MOVW 16(AX), BX
MOVW BX, 16(DI)
LEAQ const_minMatch(DI)(CX*1), DI
// shortcut complete, load next token
JMP loopcheck
// Read the rest of the literal length:
// do { BX = src[si++]; lit_len += BX } while (BX == 0xFF).
lit_len_loop:
CMPQ SI, R9
JAE err_short_buf
MOVBLZX (SI), BX
INCQ SI
ADDQ BX, CX
CMPB BX, $0xFF
JE lit_len_loop
copy_literal:
// bounds check src and dst
MOVQ SI, AX
ADDQ CX, AX
JC err_short_buf
CMPQ AX, R9
JA err_short_buf
MOVQ DI, BX
ADDQ CX, BX
JC err_short_buf
CMPQ BX, R8
JA err_short_buf
// Copy literals of <=48 bytes through the XMM registers.
CMPQ CX, $48
JGT memmove_lit
// if len(dst[di:]) < 48
MOVQ R8, AX
SUBQ DI, AX
CMPQ AX, $48
JLT memmove_lit
// if len(src[si:]) < 48
MOVQ R9, BX
SUBQ SI, BX
CMPQ BX, $48
JLT memmove_lit
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU X0, (DI)
MOVOU X1, 16(DI)
MOVOU X2, 32(DI)
ADDQ CX, SI
ADDQ CX, DI
JMP finish_lit_copy
memmove_lit:
// memmove(to, from, len)
MOVQ DI, 0(SP)
MOVQ SI, 8(SP)
MOVQ CX, 16(SP)
// Spill registers. Increment SI, DI now so we don't need to save CX.
ADDQ CX, DI
ADDQ CX, SI
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
MOVL DX, 40(SP)
CALL runtime·memmove(SB)
// restore registers
MOVQ 24(SP), DI
MOVQ 32(SP), SI
MOVL 40(SP), DX
// recalc initial values
MOVQ dst_base+0(FP), R8
MOVQ R8, R11
ADDQ dst_len+8(FP), R8
MOVQ src_base+24(FP), R9
ADDQ src_len+32(FP), R9
MOVQ dict_base+48(FP), R14
MOVQ dict_len+56(FP), R15
MOVQ R8, R12
SUBQ $32, R12
MOVQ R9, R13
SUBQ $16, R13
finish_lit_copy:
// CX := mLen
// free up DX to use for offset
MOVL DX, CX
ANDL $0xF, CX
CMPQ SI, R9
JAE end
// offset
// si += 2
// DX := int(src[si-2]) | int(src[si-1])<<8
ADDQ $2, SI
JC err_short_buf
CMPQ SI, R9
JA err_short_buf
MOVWQZX -2(SI), DX
// 0 offset is invalid
TESTL DX, DX
JEQ err_corrupt
match_len_loop_pre:
// if mlen != 0xF
CMPB CX, $0xF
JNE copy_match
// do { BX = src[si++]; mlen += BX } while (BX == 0xFF).
match_len_loop:
CMPQ SI, R9
JAE err_short_buf
MOVBLZX (SI), BX
INCQ SI
ADDQ BX, CX
CMPB BX, $0xFF
JE match_len_loop
copy_match:
ADDQ $const_minMatch, CX
// check we have match_len bytes left in dst
// di+match_len < len(dst)
MOVQ DI, AX
ADDQ CX, AX
JC err_short_buf
CMPQ AX, R8
JA err_short_buf
// DX = offset
// CX = match_len
// BX = &dst + (di - offset)
MOVQ DI, BX
SUBQ DX, BX
// check BX is within dst
// if BX < &dst
JC copy_match_from_dict
CMPQ BX, R11
JBE copy_match_from_dict
// if offset + match_len < di
LEAQ (BX)(CX*1), AX
CMPQ DI, AX
JA copy_interior_match
// AX := len(dst[:di])
// MOVQ DI, AX
// SUBQ R11, AX
// copy 16 bytes at a time
// if di-offset < 16 copy 16-(di-offset) bytes to di
// then do the remaining
copy_match_loop:
// for match_len >= 0
// dst[di] = dst[i]
// di++
// i++
MOVB (BX), AX
MOVB AX, (DI)
INCQ DI
INCQ BX
DECQ CX
JNZ copy_match_loop
JMP loopcheck
copy_interior_match:
CMPQ CX, $16
JGT memmove_match
// if len(dst[di:]) < 16
MOVQ R8, AX
SUBQ DI, AX
CMPQ AX, $16
JLT memmove_match
MOVOU (BX), X0
MOVOU X0, (DI)
ADDQ CX, DI
XORL CX, CX
JMP loopcheck
copy_match_from_dict:
// CX = match_len
// BX = &dst + (di - offset)
// AX = offset - di = dict_bytes_available => count of bytes potentially covered by the dictionary
MOVQ R11, AX
SUBQ BX, AX
// BX = len(dict) - dict_bytes_available
MOVQ R15, BX
SUBQ AX, BX
JS err_short_dict
ADDQ R14, BX
// if match_len > dict_bytes_available, match fits entirely within external dictionary : just copy
CMPQ CX, AX
JLT memmove_match
// The match stretches over the dictionary and our block
// 1) copy what comes from the dictionary
// AX = dict_bytes_available = copy_size
// BX = &dict_end - copy_size
// CX = match_len
// memmove(to, from, len)
MOVQ DI, 0(SP)
MOVQ BX, 8(SP)
MOVQ AX, 16(SP)
// store extra stuff we want to recover
// spill
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
MOVQ CX, 40(SP)
CALL runtime·memmove(SB)
// restore registers
MOVQ 16(SP), AX // copy_size
MOVQ 24(SP), DI
MOVQ 32(SP), SI
MOVQ 40(SP), CX // match_len
// recalc initial values
MOVQ dst_base+0(FP), R8
MOVQ R8, R11 // TODO: make these sensible numbers
ADDQ dst_len+8(FP), R8
MOVQ src_base+24(FP), R9
ADDQ src_len+32(FP), R9
MOVQ dict_base+48(FP), R14
MOVQ dict_len+56(FP), R15
MOVQ R8, R12
SUBQ $32, R12
MOVQ R9, R13
SUBQ $16, R13
// di+=copy_size
ADDQ AX, DI
// 2) copy the rest from the current block
// CX = match_len - copy_size = rest_size
SUBQ AX, CX
MOVQ R11, BX
// check if we have a copy overlap
// AX = &dst + rest_size
MOVQ CX, AX
ADDQ BX, AX
// if &dst + rest_size > di, copy byte by byte
CMPQ AX, DI
JA copy_match_loop
memmove_match:
// memmove(to, from, len)
MOVQ DI, 0(SP)
MOVQ BX, 8(SP)
MOVQ CX, 16(SP)
// Spill registers. Increment DI now so we don't need to save CX.
ADDQ CX, DI
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
CALL runtime·memmove(SB)
// restore registers
MOVQ 24(SP), DI
MOVQ 32(SP), SI
// recalc initial values
MOVQ dst_base+0(FP), R8
MOVQ R8, R11 // TODO: make these sensible numbers
ADDQ dst_len+8(FP), R8
MOVQ src_base+24(FP), R9
ADDQ src_len+32(FP), R9
MOVQ R8, R12
SUBQ $32, R12
MOVQ R9, R13
SUBQ $16, R13
MOVQ dict_base+48(FP), R14
MOVQ dict_len+56(FP), R15
XORL CX, CX
loopcheck:
// for si < len(src)
CMPQ SI, R9
JB loop
end:
// Remaining length must be zero.
TESTQ CX, CX
JNE err_corrupt
SUBQ R11, DI
MOVQ DI, ret+72(FP)
RET
err_corrupt:
MOVQ $-1, ret+72(FP)
RET
err_short_buf:
MOVQ $-2, ret+72(FP)
RET
err_short_dict:
MOVQ $-3, ret+72(FP)
RET

View File

@ -1,231 +0,0 @@
// +build gc
// +build !noasm
#include "go_asm.h"
#include "textflag.h"
// Register allocation.
#define dst R0
#define dstorig R1
#define src R2
#define dstend R3
#define srcend R4
#define match R5 // Match address.
#define dictend R6
#define token R7
#define len R8 // Literal and match lengths.
#define offset R7 // Match offset; overlaps with token.
#define tmp1 R9
#define tmp2 R11
#define tmp3 R12
// func decodeBlock(dst, src, dict []byte) int
TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $-4-40
MOVW dst_base +0(FP), dst
MOVW dst_len +4(FP), dstend
MOVW src_base +12(FP), src
MOVW src_len +16(FP), srcend
CMP $0, srcend
BEQ shortSrc
ADD dst, dstend
ADD src, srcend
MOVW dst, dstorig
loop:
// Read token. Extract literal length.
MOVBU.P 1(src), token
MOVW token >> 4, len
CMP $15, len
BNE readLitlenDone
readLitlenLoop:
CMP src, srcend
BEQ shortSrc
MOVBU.P 1(src), tmp1
ADD.S tmp1, len
BVS shortDst
CMP $255, tmp1
BEQ readLitlenLoop
readLitlenDone:
CMP $0, len
BEQ copyLiteralDone
// Bounds check dst+len and src+len.
ADD.S dst, len, tmp1
ADD.CC.S src, len, tmp2
BCS shortSrc
CMP dstend, tmp1
//BHI shortDst // Uncomment for distinct error codes.
CMP.LS srcend, tmp2
BHI shortSrc
// Copy literal.
CMP $4, len
BLO copyLiteralFinish
// Copy 0-3 bytes until src is aligned.
TST $1, src
MOVBU.NE.P 1(src), tmp1
MOVB.NE.P tmp1, 1(dst)
SUB.NE $1, len
TST $2, src
MOVHU.NE.P 2(src), tmp2
MOVB.NE.P tmp2, 1(dst)
MOVW.NE tmp2 >> 8, tmp1
MOVB.NE.P tmp1, 1(dst)
SUB.NE $2, len
B copyLiteralLoopCond
copyLiteralLoop:
// Aligned load, unaligned write.
MOVW.P 4(src), tmp1
MOVW tmp1 >> 8, tmp2
MOVB tmp2, 1(dst)
MOVW tmp1 >> 16, tmp3
MOVB tmp3, 2(dst)
MOVW tmp1 >> 24, tmp2
MOVB tmp2, 3(dst)
MOVB.P tmp1, 4(dst)
copyLiteralLoopCond:
// Loop until len-4 < 0.
SUB.S $4, len
BPL copyLiteralLoop
copyLiteralFinish:
// Copy remaining 0-3 bytes.
// At this point, len may be < 0, but len&3 is still accurate.
TST $1, len
MOVB.NE.P 1(src), tmp3
MOVB.NE.P tmp3, 1(dst)
TST $2, len
MOVB.NE.P 2(src), tmp1
MOVB.NE.P tmp1, 2(dst)
MOVB.NE -1(src), tmp2
MOVB.NE tmp2, -1(dst)
copyLiteralDone:
// Initial part of match length.
// This frees up the token register for reuse as offset.
AND $15, token, len
CMP src, srcend
BEQ end
// Read offset.
ADD.S $2, src
BCS shortSrc
CMP srcend, src
BHI shortSrc
MOVBU -2(src), offset
MOVBU -1(src), tmp1
ORR.S tmp1 << 8, offset
BEQ corrupt
// Read rest of match length.
CMP $15, len
BNE readMatchlenDone
readMatchlenLoop:
CMP src, srcend
BEQ shortSrc
MOVBU.P 1(src), tmp1
ADD.S tmp1, len
BVS shortDst
CMP $255, tmp1
BEQ readMatchlenLoop
readMatchlenDone:
// Bounds check dst+len+minMatch.
ADD.S dst, len, tmp1
ADD.CC.S $const_minMatch, tmp1
BCS shortDst
CMP dstend, tmp1
BHI shortDst
RSB dst, offset, match
CMP dstorig, match
BGE copyMatch4
// match < dstorig means the match starts in the dictionary,
// at len(dict) - offset + (dst - dstorig).
MOVW dict_base+24(FP), match
MOVW dict_len +28(FP), dictend
ADD $const_minMatch, len
RSB dst, dstorig, tmp1
RSB dictend, offset, tmp2
ADD.S tmp2, tmp1
BMI shortDict
ADD match, dictend
ADD tmp1, match
copyDict:
MOVBU.P 1(match), tmp1
MOVB.P tmp1, 1(dst)
SUB.S $1, len
CMP.NE match, dictend
BNE copyDict
// If the match extends beyond the dictionary, the rest is at dstorig.
CMP $0, len
BEQ copyMatchDone
MOVW dstorig, match
B copyMatch
// Copy a regular match.
// Since len+minMatch is at least four, we can do a 4× unrolled
// byte copy loop. Using MOVW instead of four byte loads is faster,
// but to remain portable we'd have to align match first, which is
// too expensive. By alternating loads and stores, we also handle
// the case offset < 4.
copyMatch4:
SUB.S $4, len
MOVBU.P 4(match), tmp1
MOVB.P tmp1, 4(dst)
MOVBU -3(match), tmp2
MOVB tmp2, -3(dst)
MOVBU -2(match), tmp3
MOVB tmp3, -2(dst)
MOVBU -1(match), tmp1
MOVB tmp1, -1(dst)
BPL copyMatch4
// Restore len, which is now negative.
ADD.S $4, len
BEQ copyMatchDone
copyMatch:
// Finish with a byte-at-a-time copy.
SUB.S $1, len
MOVBU.P 1(match), tmp2
MOVB.P tmp2, 1(dst)
BNE copyMatch
copyMatchDone:
CMP src, srcend
BNE loop
end:
CMP $0, len
BNE corrupt
SUB dstorig, dst, tmp1
MOVW tmp1, ret+36(FP)
RET
// The error cases have distinct labels so we can put different
// return codes here when debugging, or if the error returns need to
// be changed.
shortDict:
shortDst:
shortSrc:
corrupt:
MOVW $-1, tmp1
MOVW tmp1, ret+36(FP)
RET

View File

@ -1,241 +0,0 @@
// +build gc
// +build !noasm
// This implementation assumes that strict alignment checking is turned off.
// The Go compiler makes the same assumption.
#include "go_asm.h"
#include "textflag.h"
// Register allocation.
#define dst R0
#define dstorig R1
#define src R2
#define dstend R3
#define dstend16 R4 // dstend - 16
#define srcend R5
#define srcend16 R6 // srcend - 16
#define match R7 // Match address.
#define dict R8
#define dictlen R9
#define dictend R10
#define token R11
#define len R12 // Literal and match lengths.
#define lenRem R13
#define offset R14 // Match offset.
#define tmp1 R15
#define tmp2 R16
#define tmp3 R17
#define tmp4 R19
// func decodeBlock(dst, src, dict []byte) int
TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $0-80
LDP dst_base+0(FP), (dst, dstend)
ADD dst, dstend
MOVD dst, dstorig
LDP src_base+24(FP), (src, srcend)
CBZ srcend, shortSrc
ADD src, srcend
// dstend16 = max(dstend-16, 0) and similarly for srcend16.
SUBS $16, dstend, dstend16
CSEL LO, ZR, dstend16, dstend16
SUBS $16, srcend, srcend16
CSEL LO, ZR, srcend16, srcend16
LDP dict_base+48(FP), (dict, dictlen)
ADD dict, dictlen, dictend
loop:
// Read token. Extract literal length.
MOVBU.P 1(src), token
LSR $4, token, len
CMP $15, len
BNE readLitlenDone
readLitlenLoop:
CMP src, srcend
BEQ shortSrc
MOVBU.P 1(src), tmp1
ADDS tmp1, len
BVS shortDst
CMP $255, tmp1
BEQ readLitlenLoop
readLitlenDone:
CBZ len, copyLiteralDone
// Bounds check dst+len and src+len.
ADDS dst, len, tmp1
BCS shortSrc
ADDS src, len, tmp2
BCS shortSrc
CMP dstend, tmp1
BHI shortDst
CMP srcend, tmp2
BHI shortSrc
// Copy literal.
SUBS $16, len
BLO copyLiteralShort
copyLiteralLoop:
LDP.P 16(src), (tmp1, tmp2)
STP.P (tmp1, tmp2), 16(dst)
SUBS $16, len
BPL copyLiteralLoop
// Copy (final part of) literal of length 0-15.
// If we have >=16 bytes left in src and dst, just copy 16 bytes.
copyLiteralShort:
CMP dstend16, dst
CCMP LO, src, srcend16, $0b0010 // 0010 = preserve carry (LO).
BHS copyLiteralShortEnd
AND $15, len
LDP (src), (tmp1, tmp2)
ADD len, src
STP (tmp1, tmp2), (dst)
ADD len, dst
B copyLiteralDone
// Safe but slow copy near the end of src, dst.
copyLiteralShortEnd:
TBZ $3, len, 3(PC)
MOVD.P 8(src), tmp1
MOVD.P tmp1, 8(dst)
TBZ $2, len, 3(PC)
MOVW.P 4(src), tmp2
MOVW.P tmp2, 4(dst)
TBZ $1, len, 3(PC)
MOVH.P 2(src), tmp3
MOVH.P tmp3, 2(dst)
TBZ $0, len, 3(PC)
MOVBU.P 1(src), tmp4
MOVB.P tmp4, 1(dst)
copyLiteralDone:
// Initial part of match length.
AND $15, token, len
CMP src, srcend
BEQ end
// Read offset.
ADDS $2, src
BCS shortSrc
CMP srcend, src
BHI shortSrc
MOVHU -2(src), offset
CBZ offset, corrupt
// Read rest of match length.
CMP $15, len
BNE readMatchlenDone
readMatchlenLoop:
CMP src, srcend
BEQ shortSrc
MOVBU.P 1(src), tmp1
ADDS tmp1, len
BVS shortDst
CMP $255, tmp1
BEQ readMatchlenLoop
readMatchlenDone:
ADD $const_minMatch, len
// Bounds check dst+len.
ADDS dst, len, tmp2
BCS shortDst
CMP dstend, tmp2
BHI shortDst
SUB offset, dst, match
CMP dstorig, match
BHS copyMatchTry8
// match < dstorig means the match starts in the dictionary,
// at len(dict) - offset + (dst - dstorig).
SUB dstorig, dst, tmp1
SUB offset, dictlen, tmp2
ADDS tmp2, tmp1
BMI shortDict
ADD dict, tmp1, match
copyDict:
MOVBU.P 1(match), tmp3
MOVB.P tmp3, 1(dst)
SUBS $1, len
CCMP NE, dictend, match, $0b0100 // 0100 sets the Z (EQ) flag.
BNE copyDict
CBZ len, copyMatchDone
// If the match extends beyond the dictionary, the rest is at dstorig.
// Recompute the offset for the next check.
MOVD dstorig, match
SUB dstorig, dst, offset
copyMatchTry8:
// Copy doublewords if both len and offset are at least eight.
// A 16-at-a-time loop doesn't provide a further speedup.
CMP $8, len
CCMP HS, offset, $8, $0
BLO copyMatchTry4
AND $7, len, lenRem
SUB $8, len
copyMatchLoop8:
MOVD.P 8(match), tmp1
MOVD.P tmp1, 8(dst)
SUBS $8, len
BPL copyMatchLoop8
MOVD (match)(len), tmp2 // match+len == match+lenRem-8.
ADD lenRem, dst
MOVD $0, len
MOVD tmp2, -8(dst)
B copyMatchDone
copyMatchTry4:
// Copy words if both len and offset are at least four.
CMP $4, len
CCMP HS, offset, $4, $0
BLO copyMatchLoop1
MOVWU.P 4(match), tmp2
MOVWU.P tmp2, 4(dst)
SUBS $4, len
BEQ copyMatchDone
copyMatchLoop1:
// Byte-at-a-time copy for small offsets <= 3.
MOVBU.P 1(match), tmp2
MOVB.P tmp2, 1(dst)
SUBS $1, len
BNE copyMatchLoop1
copyMatchDone:
CMP src, srcend
BNE loop
end:
CBNZ len, corrupt
SUB dstorig, dst, tmp1
MOVD tmp1, ret+72(FP)
RET
// The error cases have distinct labels so we can put different
// return codes here when debugging, or if the error returns need to
// be changed.
shortDict:
shortDst:
shortSrc:
corrupt:
MOVD $-1, tmp1
MOVD tmp1, ret+72(FP)
RET

View File

@ -1,10 +0,0 @@
//go:build (amd64 || arm || arm64) && !appengine && gc && !noasm
// +build amd64 arm arm64
// +build !appengine
// +build gc
// +build !noasm
package lz4block
//go:noescape
func decodeBlock(dst, src, dict []byte) int

View File

@ -1,139 +0,0 @@
//go:build (!amd64 && !arm && !arm64) || appengine || !gc || noasm
// +build !amd64,!arm,!arm64 appengine !gc noasm
package lz4block
import (
"encoding/binary"
)
func decodeBlock(dst, src, dict []byte) (ret int) {
// Restrict capacities so we don't read or write out of bounds.
dst = dst[:len(dst):len(dst)]
src = src[:len(src):len(src)]
const hasError = -2
if len(src) == 0 {
return hasError
}
defer func() {
if recover() != nil {
ret = hasError
}
}()
var si, di uint
for si < uint(len(src)) {
// Literals and match lengths (token).
b := uint(src[si])
si++
// Literals.
if lLen := b >> 4; lLen > 0 {
switch {
case lLen < 0xF && si+16 < uint(len(src)):
// Shortcut 1
// if we have enough room in src and dst, and the literals length
// is small enough (0..14) then copy all 16 bytes, even if not all
// are part of the literals.
copy(dst[di:], src[si:si+16])
si += lLen
di += lLen
if mLen := b & 0xF; mLen < 0xF {
// Shortcut 2
// if the match length (4..18) fits within the literals, then copy
// all 18 bytes, even if not all are part of the literals.
mLen += 4
if offset := u16(src[si:]); mLen <= offset && offset < di {
i := di - offset
// The remaining buffer may not hold 18 bytes.
// See https://github.com/pierrec/lz4/issues/51.
if end := i + 18; end <= uint(len(dst)) {
copy(dst[di:], dst[i:end])
si += 2
di += mLen
continue
}
}
}
case lLen == 0xF:
for {
x := uint(src[si])
if lLen += x; int(lLen) < 0 {
return hasError
}
si++
if x != 0xFF {
break
}
}
fallthrough
default:
copy(dst[di:di+lLen], src[si:si+lLen])
si += lLen
di += lLen
}
}
mLen := b & 0xF
if si == uint(len(src)) && mLen == 0 {
break
} else if si >= uint(len(src)) {
return hasError
}
offset := u16(src[si:])
if offset == 0 {
return hasError
}
si += 2
// Match.
mLen += minMatch
if mLen == minMatch+0xF {
for {
x := uint(src[si])
if mLen += x; int(mLen) < 0 {
return hasError
}
si++
if x != 0xFF {
break
}
}
}
// Copy the match.
if di < offset {
// The match is beyond our block, meaning the first part
// is in the dictionary.
fromDict := dict[uint(len(dict))+di-offset:]
n := uint(copy(dst[di:di+mLen], fromDict))
di += n
if mLen -= n; mLen == 0 {
continue
}
// We copied n = offset-di bytes from the dictionary,
// then set di = di+n = offset, so the following code
// copies from dst[di-offset:] = dst[0:].
}
expanded := dst[di-offset:]
if mLen > offset {
// Efficiently copy the match dst[di-offset:di] into the dst slice.
bytesToCopy := offset * (mLen / offset)
for n := offset; n <= bytesToCopy+offset; n *= 2 {
copy(expanded[n:], expanded[:n])
}
di += bytesToCopy
mLen -= bytesToCopy
}
di += uint(copy(dst[di:di+mLen], expanded[:mLen]))
}
return int(di)
}
func u16(p []byte) uint { return uint(binary.LittleEndian.Uint16(p)) }

View File

@ -1,19 +0,0 @@
package lz4errors
type Error string
func (e Error) Error() string { return string(e) }
const (
ErrInvalidSourceShortBuffer Error = "lz4: invalid source or destination buffer too short"
ErrInvalidFrame Error = "lz4: bad magic number"
ErrInternalUnhandledState Error = "lz4: unhandled state"
ErrInvalidHeaderChecksum Error = "lz4: invalid header checksum"
ErrInvalidBlockChecksum Error = "lz4: invalid block checksum"
ErrInvalidFrameChecksum Error = "lz4: invalid frame checksum"
ErrOptionInvalidCompressionLevel Error = "lz4: invalid compression level"
ErrOptionClosedOrError Error = "lz4: cannot apply options on closed or in error object"
ErrOptionInvalidBlockSize Error = "lz4: invalid block size"
ErrOptionNotApplicable Error = "lz4: option not applicable"
ErrWriterNotClosed Error = "lz4: writer not closed"
)

View File

@ -1,348 +0,0 @@
package lz4stream
import (
"encoding/binary"
"fmt"
"io"
"sync"
"github.com/pierrec/lz4/v4/internal/lz4block"
"github.com/pierrec/lz4/v4/internal/lz4errors"
"github.com/pierrec/lz4/v4/internal/xxh32"
)
type Blocks struct {
Block *FrameDataBlock
Blocks chan chan *FrameDataBlock
mu sync.Mutex
err error
}
func (b *Blocks) initW(f *Frame, dst io.Writer, num int) {
if num == 1 {
b.Blocks = nil
b.Block = NewFrameDataBlock(f)
return
}
b.Block = nil
if cap(b.Blocks) != num {
b.Blocks = make(chan chan *FrameDataBlock, num)
}
// goroutine managing concurrent block compression goroutines.
go func() {
// Process next block compression item.
for c := range b.Blocks {
// Read the next compressed block result.
// Waiting here ensures that the blocks are output in the order they were sent.
// The incoming channel is always closed as it indicates to the caller that
// the block has been processed.
block := <-c
if block == nil {
// Notify the block compression routine that we are done with its result.
// This is used when a sentinel block is sent to terminate the compression.
close(c)
return
}
// Do not attempt to write the block upon any previous failure.
if b.err == nil {
// Write the block.
if err := block.Write(f, dst); err != nil {
// Keep the first error.
b.err = err
// All pending compression goroutines need to shut down, so we need to keep going.
}
}
close(c)
}
}()
}
func (b *Blocks) close(f *Frame, num int) error {
if num == 1 {
if b.Block != nil {
b.Block.Close(f)
}
err := b.err
b.err = nil
return err
}
if b.Blocks == nil {
err := b.err
b.err = nil
return err
}
c := make(chan *FrameDataBlock)
b.Blocks <- c
c <- nil
<-c
err := b.err
b.err = nil
return err
}
// ErrorR returns any error set while uncompressing a stream.
func (b *Blocks) ErrorR() error {
b.mu.Lock()
defer b.mu.Unlock()
return b.err
}
// initR returns a channel that streams the uncompressed blocks if in concurrent
// mode and no error. When the channel is closed, check for any error with b.ErrorR.
//
// If not in concurrent mode, the uncompressed block is b.Block and the returned error
// needs to be checked.
func (b *Blocks) initR(f *Frame, num int, src io.Reader) (chan []byte, error) {
size := f.Descriptor.Flags.BlockSizeIndex()
if num == 1 {
b.Blocks = nil
b.Block = NewFrameDataBlock(f)
return nil, nil
}
b.Block = nil
blocks := make(chan chan []byte, num)
// data receives the uncompressed blocks.
data := make(chan []byte)
// Read blocks from the source sequentially
// and uncompress them concurrently.
// In legacy mode, accrue the uncompress sizes in cum.
var cum uint32
go func() {
var cumx uint32
var err error
for b.ErrorR() == nil {
block := NewFrameDataBlock(f)
cumx, err = block.Read(f, src, 0)
if err != nil {
block.Close(f)
break
}
// Recheck for an error as reading may be slow and uncompressing is expensive.
if b.ErrorR() != nil {
block.Close(f)
break
}
c := make(chan []byte)
blocks <- c
go func() {
defer block.Close(f)
data, err := block.Uncompress(f, size.Get(), nil, false)
if err != nil {
b.closeR(err)
// Close the block channel to indicate an error.
close(c)
} else {
c <- data
}
}()
}
// End the collection loop and the data channel.
c := make(chan []byte)
blocks <- c
c <- nil // signal the collection loop that we are done
<-c // wait for the collect loop to complete
if f.isLegacy() && cum == cumx {
err = io.EOF
}
b.closeR(err)
close(data)
}()
// Collect the uncompressed blocks and make them available
// on the returned channel.
go func(leg bool) {
defer close(blocks)
skipBlocks := false
for c := range blocks {
buf, ok := <-c
if !ok {
// A closed channel indicates an error.
// All remaining channels should be discarded.
skipBlocks = true
continue
}
if buf == nil {
// Signal to end the loop.
close(c)
return
}
if skipBlocks {
// A previous error has occurred, skipping remaining channels.
continue
}
// Perform checksum now as the blocks are received in order.
if f.Descriptor.Flags.ContentChecksum() {
_, _ = f.checksum.Write(buf)
}
if leg {
cum += uint32(len(buf))
}
data <- buf
close(c)
}
}(f.isLegacy())
return data, nil
}
// closeR safely sets the error on b if not already set.
func (b *Blocks) closeR(err error) {
b.mu.Lock()
if b.err == nil {
b.err = err
}
b.mu.Unlock()
}
func NewFrameDataBlock(f *Frame) *FrameDataBlock {
buf := f.Descriptor.Flags.BlockSizeIndex().Get()
return &FrameDataBlock{Data: buf, data: buf}
}
type FrameDataBlock struct {
Size DataBlockSize
Data []byte // compressed or uncompressed data (.data or .src)
Checksum uint32
data []byte // buffer for compressed data
src []byte // uncompressed data
err error // used in concurrent mode
}
func (b *FrameDataBlock) Close(f *Frame) {
b.Size = 0
b.Checksum = 0
b.err = nil
if b.data != nil {
// Block was not already closed.
lz4block.Put(b.data)
b.Data = nil
b.data = nil
b.src = nil
}
}
// Block compression errors are ignored since the buffer is sized appropriately.
func (b *FrameDataBlock) Compress(f *Frame, src []byte, level lz4block.CompressionLevel) *FrameDataBlock {
data := b.data
if f.isLegacy() {
data = data[:cap(data)]
} else {
data = data[:len(src)] // trigger the incompressible flag in CompressBlock
}
var n int
switch level {
case lz4block.Fast:
n, _ = lz4block.CompressBlock(src, data)
default:
n, _ = lz4block.CompressBlockHC(src, data, level)
}
if n == 0 {
b.Size.UncompressedSet(true)
b.Data = src
} else {
b.Size.UncompressedSet(false)
b.Data = data[:n]
}
b.Size.sizeSet(len(b.Data))
b.src = src // keep track of the source for content checksum
if f.Descriptor.Flags.BlockChecksum() {
b.Checksum = xxh32.ChecksumZero(src)
}
return b
}
func (b *FrameDataBlock) Write(f *Frame, dst io.Writer) error {
// Write is called in the same order as blocks are compressed,
// so content checksum must be done here.
if f.Descriptor.Flags.ContentChecksum() {
_, _ = f.checksum.Write(b.src)
}
buf := f.buf[:]
binary.LittleEndian.PutUint32(buf, uint32(b.Size))
if _, err := dst.Write(buf[:4]); err != nil {
return err
}
if _, err := dst.Write(b.Data); err != nil {
return err
}
if b.Checksum == 0 {
return nil
}
binary.LittleEndian.PutUint32(buf, b.Checksum)
_, err := dst.Write(buf[:4])
return err
}
// Read updates b with the next block data, size and checksum if available.
func (b *FrameDataBlock) Read(f *Frame, src io.Reader, cum uint32) (uint32, error) {
x, err := f.readUint32(src)
if err != nil {
return 0, err
}
if f.isLegacy() {
switch x {
case frameMagicLegacy:
// Concatenated legacy frame.
return b.Read(f, src, cum)
case cum:
// Only works in non concurrent mode, for concurrent mode
// it is handled separately.
// Linux kernel format appends the total uncompressed size at the end.
return 0, io.EOF
}
} else if x == 0 {
// Marker for end of stream.
return 0, io.EOF
}
b.Size = DataBlockSize(x)
size := b.Size.size()
if size > cap(b.data) {
return x, lz4errors.ErrOptionInvalidBlockSize
}
b.data = b.data[:size]
if _, err := io.ReadFull(src, b.data); err != nil {
return x, err
}
if f.Descriptor.Flags.BlockChecksum() {
sum, err := f.readUint32(src)
if err != nil {
return 0, err
}
b.Checksum = sum
}
return x, nil
}
func (b *FrameDataBlock) Uncompress(f *Frame, dst, dict []byte, sum bool) ([]byte, error) {
if b.Size.Uncompressed() {
n := copy(dst, b.data)
dst = dst[:n]
} else {
n, err := lz4block.UncompressBlock(b.data, dst, dict)
if err != nil {
return nil, err
}
dst = dst[:n]
}
if f.Descriptor.Flags.BlockChecksum() {
if c := xxh32.ChecksumZero(dst); c != b.Checksum {
err := fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidBlockChecksum, c, b.Checksum)
return nil, err
}
}
if sum && f.Descriptor.Flags.ContentChecksum() {
_, _ = f.checksum.Write(dst)
}
return dst, nil
}
func (f *Frame) readUint32(r io.Reader) (x uint32, err error) {
if _, err = io.ReadFull(r, f.buf[:4]); err != nil {
return
}
x = binary.LittleEndian.Uint32(f.buf[:4])
return
}

View File

@ -1,204 +0,0 @@
// Package lz4stream provides the types that support reading and writing LZ4 data streams.
package lz4stream
import (
"encoding/binary"
"fmt"
"io"
"io/ioutil"
"github.com/pierrec/lz4/v4/internal/lz4block"
"github.com/pierrec/lz4/v4/internal/lz4errors"
"github.com/pierrec/lz4/v4/internal/xxh32"
)
//go:generate go run gen.go
const (
frameMagic uint32 = 0x184D2204
frameSkipMagic uint32 = 0x184D2A50
frameMagicLegacy uint32 = 0x184C2102
)
func NewFrame() *Frame {
return &Frame{}
}
type Frame struct {
buf [15]byte // frame descriptor needs at most 4(magic)+4+8+1=11 bytes
Magic uint32
Descriptor FrameDescriptor
Blocks Blocks
Checksum uint32
checksum xxh32.XXHZero
}
// Reset allows reusing the Frame.
// The Descriptor configuration is not modified.
func (f *Frame) Reset(num int) {
f.Magic = 0
f.Descriptor.Checksum = 0
f.Descriptor.ContentSize = 0
_ = f.Blocks.close(f, num)
f.Checksum = 0
}
func (f *Frame) InitW(dst io.Writer, num int, legacy bool) {
if legacy {
f.Magic = frameMagicLegacy
idx := lz4block.Index(lz4block.Block8Mb)
f.Descriptor.Flags.BlockSizeIndexSet(idx)
} else {
f.Magic = frameMagic
f.Descriptor.initW()
}
f.Blocks.initW(f, dst, num)
f.checksum.Reset()
}
func (f *Frame) CloseW(dst io.Writer, num int) error {
if err := f.Blocks.close(f, num); err != nil {
return err
}
if f.isLegacy() {
return nil
}
buf := f.buf[:0]
// End mark (data block size of uint32(0)).
buf = append(buf, 0, 0, 0, 0)
if f.Descriptor.Flags.ContentChecksum() {
buf = f.checksum.Sum(buf)
}
_, err := dst.Write(buf)
return err
}
func (f *Frame) isLegacy() bool {
return f.Magic == frameMagicLegacy
}
func (f *Frame) ParseHeaders(src io.Reader) error {
if f.Magic > 0 {
// Header already read.
return nil
}
newFrame:
var err error
if f.Magic, err = f.readUint32(src); err != nil {
return err
}
switch m := f.Magic; {
case m == frameMagic || m == frameMagicLegacy:
// All 16 values of frameSkipMagic are valid.
case m>>8 == frameSkipMagic>>8:
skip, err := f.readUint32(src)
if err != nil {
return err
}
if _, err := io.CopyN(ioutil.Discard, src, int64(skip)); err != nil {
return err
}
goto newFrame
default:
return lz4errors.ErrInvalidFrame
}
if err := f.Descriptor.initR(f, src); err != nil {
return err
}
f.checksum.Reset()
return nil
}
func (f *Frame) InitR(src io.Reader, num int) (chan []byte, error) {
return f.Blocks.initR(f, num, src)
}
func (f *Frame) CloseR(src io.Reader) (err error) {
if f.isLegacy() {
return nil
}
if !f.Descriptor.Flags.ContentChecksum() {
return nil
}
if f.Checksum, err = f.readUint32(src); err != nil {
return err
}
if c := f.checksum.Sum32(); c != f.Checksum {
return fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidFrameChecksum, c, f.Checksum)
}
return nil
}
type FrameDescriptor struct {
Flags DescriptorFlags
ContentSize uint64
Checksum uint8
}
func (fd *FrameDescriptor) initW() {
fd.Flags.VersionSet(1)
fd.Flags.BlockIndependenceSet(true)
}
func (fd *FrameDescriptor) Write(f *Frame, dst io.Writer) error {
if fd.Checksum > 0 {
// Header already written.
return nil
}
buf := f.buf[:4]
// Write the magic number here even though it belongs to the Frame.
binary.LittleEndian.PutUint32(buf, f.Magic)
if !f.isLegacy() {
buf = buf[:4+2]
binary.LittleEndian.PutUint16(buf[4:], uint16(fd.Flags))
if fd.Flags.Size() {
buf = buf[:4+2+8]
binary.LittleEndian.PutUint64(buf[4+2:], fd.ContentSize)
}
fd.Checksum = descriptorChecksum(buf[4:])
buf = append(buf, fd.Checksum)
}
_, err := dst.Write(buf)
return err
}
func (fd *FrameDescriptor) initR(f *Frame, src io.Reader) error {
if f.isLegacy() {
idx := lz4block.Index(lz4block.Block8Mb)
f.Descriptor.Flags.BlockSizeIndexSet(idx)
return nil
}
// Read the flags and the checksum, hoping that there is not content size.
buf := f.buf[:3]
if _, err := io.ReadFull(src, buf); err != nil {
return err
}
descr := binary.LittleEndian.Uint16(buf)
fd.Flags = DescriptorFlags(descr)
if fd.Flags.Size() {
// Append the 8 missing bytes.
buf = buf[:3+8]
if _, err := io.ReadFull(src, buf[3:]); err != nil {
return err
}
fd.ContentSize = binary.LittleEndian.Uint64(buf[2:])
}
fd.Checksum = buf[len(buf)-1] // the checksum is the last byte
buf = buf[:len(buf)-1] // all descriptor fields except checksum
if c := descriptorChecksum(buf); fd.Checksum != c {
return fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidHeaderChecksum, c, fd.Checksum)
}
// Validate the elements that can be.
if idx := fd.Flags.BlockSizeIndex(); !idx.IsValid() {
return lz4errors.ErrOptionInvalidBlockSize
}
return nil
}
func descriptorChecksum(buf []byte) byte {
return byte(xxh32.ChecksumZero(buf) >> 8)
}

View File

@ -1,103 +0,0 @@
// Code generated by `gen.exe`. DO NOT EDIT.
package lz4stream
import "github.com/pierrec/lz4/v4/internal/lz4block"
// DescriptorFlags is defined as follow:
// field bits
// ----- ----
// _ 2
// ContentChecksum 1
// Size 1
// BlockChecksum 1
// BlockIndependence 1
// Version 2
// _ 4
// BlockSizeIndex 3
// _ 1
type DescriptorFlags uint16
// Getters.
func (x DescriptorFlags) ContentChecksum() bool { return x>>2&1 != 0 }
func (x DescriptorFlags) Size() bool { return x>>3&1 != 0 }
func (x DescriptorFlags) BlockChecksum() bool { return x>>4&1 != 0 }
func (x DescriptorFlags) BlockIndependence() bool { return x>>5&1 != 0 }
func (x DescriptorFlags) Version() uint16 { return uint16(x >> 6 & 0x3) }
func (x DescriptorFlags) BlockSizeIndex() lz4block.BlockSizeIndex {
return lz4block.BlockSizeIndex(x >> 12 & 0x7)
}
// Setters.
func (x *DescriptorFlags) ContentChecksumSet(v bool) *DescriptorFlags {
const b = 1 << 2
if v {
*x = *x&^b | b
} else {
*x &^= b
}
return x
}
func (x *DescriptorFlags) SizeSet(v bool) *DescriptorFlags {
const b = 1 << 3
if v {
*x = *x&^b | b
} else {
*x &^= b
}
return x
}
func (x *DescriptorFlags) BlockChecksumSet(v bool) *DescriptorFlags {
const b = 1 << 4
if v {
*x = *x&^b | b
} else {
*x &^= b
}
return x
}
func (x *DescriptorFlags) BlockIndependenceSet(v bool) *DescriptorFlags {
const b = 1 << 5
if v {
*x = *x&^b | b
} else {
*x &^= b
}
return x
}
func (x *DescriptorFlags) VersionSet(v uint16) *DescriptorFlags {
*x = *x&^(0x3<<6) | (DescriptorFlags(v) & 0x3 << 6)
return x
}
func (x *DescriptorFlags) BlockSizeIndexSet(v lz4block.BlockSizeIndex) *DescriptorFlags {
*x = *x&^(0x7<<12) | (DescriptorFlags(v) & 0x7 << 12)
return x
}
// Code generated by `gen.exe`. DO NOT EDIT.
// DataBlockSize is defined as follow:
// field bits
// ----- ----
// size 31
// Uncompressed 1
type DataBlockSize uint32
// Getters.
func (x DataBlockSize) size() int { return int(x & 0x7FFFFFFF) }
func (x DataBlockSize) Uncompressed() bool { return x>>31&1 != 0 }
// Setters.
func (x *DataBlockSize) sizeSet(v int) *DataBlockSize {
*x = *x&^0x7FFFFFFF | DataBlockSize(v)&0x7FFFFFFF
return x
}
func (x *DataBlockSize) UncompressedSet(v bool) *DataBlockSize {
const b = 1 << 31
if v {
*x = *x&^b | b
} else {
*x &^= b
}
return x
}

View File

@ -1,212 +0,0 @@
// Package xxh32 implements the very fast XXH hashing algorithm (32 bits version).
// (ported from the reference implementation https://github.com/Cyan4973/xxHash/)
package xxh32
import (
"encoding/binary"
)
const (
prime1 uint32 = 2654435761
prime2 uint32 = 2246822519
prime3 uint32 = 3266489917
prime4 uint32 = 668265263
prime5 uint32 = 374761393
primeMask = 0xFFFFFFFF
prime1plus2 = uint32((uint64(prime1) + uint64(prime2)) & primeMask) // 606290984
prime1minus = uint32((-int64(prime1)) & primeMask) // 1640531535
)
// XXHZero represents an xxhash32 object with seed 0.
type XXHZero struct {
v [4]uint32
totalLen uint64
buf [16]byte
bufused int
}
// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (xxh XXHZero) Sum(b []byte) []byte {
h32 := xxh.Sum32()
return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24))
}
// Reset resets the Hash to its initial state.
func (xxh *XXHZero) Reset() {
xxh.v[0] = prime1plus2
xxh.v[1] = prime2
xxh.v[2] = 0
xxh.v[3] = prime1minus
xxh.totalLen = 0
xxh.bufused = 0
}
// Size returns the number of bytes returned by Sum().
func (xxh *XXHZero) Size() int {
return 4
}
// BlockSizeIndex gives the minimum number of bytes accepted by Write().
func (xxh *XXHZero) BlockSize() int {
return 1
}
// Write adds input bytes to the Hash.
// It never returns an error.
func (xxh *XXHZero) Write(input []byte) (int, error) {
if xxh.totalLen == 0 {
xxh.Reset()
}
n := len(input)
m := xxh.bufused
xxh.totalLen += uint64(n)
r := len(xxh.buf) - m
if n < r {
copy(xxh.buf[m:], input)
xxh.bufused += len(input)
return n, nil
}
var buf *[16]byte
if m != 0 {
// some data left from previous update
buf = &xxh.buf
c := copy(buf[m:], input)
n -= c
input = input[c:]
}
update(&xxh.v, buf, input)
xxh.bufused = copy(xxh.buf[:], input[n-n%16:])
return n, nil
}
// Portable version of update. This updates v by processing all of buf
// (if not nil) and all full 16-byte blocks of input.
func updateGo(v *[4]uint32, buf *[16]byte, input []byte) {
// Causes compiler to work directly from registers instead of stack:
v1, v2, v3, v4 := v[0], v[1], v[2], v[3]
if buf != nil {
v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime2) * prime1
}
for ; len(input) >= 16; input = input[16:] {
sub := input[:16] //BCE hint for compiler
v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
}
v[0], v[1], v[2], v[3] = v1, v2, v3, v4
}
// Sum32 returns the 32 bits Hash value.
func (xxh *XXHZero) Sum32() uint32 {
h32 := uint32(xxh.totalLen)
if h32 >= 16 {
h32 += rol1(xxh.v[0]) + rol7(xxh.v[1]) + rol12(xxh.v[2]) + rol18(xxh.v[3])
} else {
h32 += prime5
}
p := 0
n := xxh.bufused
buf := xxh.buf
for n := n - 4; p <= n; p += 4 {
h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime3
h32 = rol17(h32) * prime4
}
for ; p < n; p++ {
h32 += uint32(buf[p]) * prime5
h32 = rol11(h32) * prime1
}
h32 ^= h32 >> 15
h32 *= prime2
h32 ^= h32 >> 13
h32 *= prime3
h32 ^= h32 >> 16
return h32
}
// Portable version of ChecksumZero.
func checksumZeroGo(input []byte) uint32 {
n := len(input)
h32 := uint32(n)
if n < 16 {
h32 += prime5
} else {
v1 := prime1plus2
v2 := prime2
v3 := uint32(0)
v4 := prime1minus
p := 0
for n := n - 16; p <= n; p += 16 {
sub := input[p:][:16] //BCE hint for compiler
v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
}
input = input[p:]
n -= p
h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
}
p := 0
for n := n - 4; p <= n; p += 4 {
h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime3
h32 = rol17(h32) * prime4
}
for p < n {
h32 += uint32(input[p]) * prime5
h32 = rol11(h32) * prime1
p++
}
h32 ^= h32 >> 15
h32 *= prime2
h32 ^= h32 >> 13
h32 *= prime3
h32 ^= h32 >> 16
return h32
}
func rol1(u uint32) uint32 {
return u<<1 | u>>31
}
func rol7(u uint32) uint32 {
return u<<7 | u>>25
}
func rol11(u uint32) uint32 {
return u<<11 | u>>21
}
func rol12(u uint32) uint32 {
return u<<12 | u>>20
}
func rol13(u uint32) uint32 {
return u<<13 | u>>19
}
func rol17(u uint32) uint32 {
return u<<17 | u>>15
}
func rol18(u uint32) uint32 {
return u<<18 | u>>14
}

View File

@ -1,11 +0,0 @@
// +build !noasm
package xxh32
// ChecksumZero returns the 32-bit hash of input.
//
//go:noescape
func ChecksumZero(input []byte) uint32
//go:noescape
func update(v *[4]uint32, buf *[16]byte, input []byte)

View File

@ -1,251 +0,0 @@
// +build !noasm
#include "go_asm.h"
#include "textflag.h"
// Register allocation.
#define p R0
#define n R1
#define h R2
#define v1 R2 // Alias for h.
#define v2 R3
#define v3 R4
#define v4 R5
#define x1 R6
#define x2 R7
#define x3 R8
#define x4 R9
// We need the primes in registers. The 16-byte loop only uses prime{1,2}.
#define prime1r R11
#define prime2r R12
#define prime3r R3 // The rest can alias v{2-4}.
#define prime4r R4
#define prime5r R5
// Update round macros. These read from and increment p.
#define round16aligned \
MOVM.IA.W (p), [x1, x2, x3, x4] \
\
MULA x1, prime2r, v1, v1 \
MULA x2, prime2r, v2, v2 \
MULA x3, prime2r, v3, v3 \
MULA x4, prime2r, v4, v4 \
\
MOVW v1 @> 19, v1 \
MOVW v2 @> 19, v2 \
MOVW v3 @> 19, v3 \
MOVW v4 @> 19, v4 \
\
MUL prime1r, v1 \
MUL prime1r, v2 \
MUL prime1r, v3 \
MUL prime1r, v4 \
#define round16unaligned \
MOVBU.P 16(p), x1 \
MOVBU -15(p), x2 \
ORR x2 << 8, x1 \
MOVBU -14(p), x3 \
MOVBU -13(p), x4 \
ORR x4 << 8, x3 \
ORR x3 << 16, x1 \
\
MULA x1, prime2r, v1, v1 \
MOVW v1 @> 19, v1 \
MUL prime1r, v1 \
\
MOVBU -12(p), x1 \
MOVBU -11(p), x2 \
ORR x2 << 8, x1 \
MOVBU -10(p), x3 \
MOVBU -9(p), x4 \
ORR x4 << 8, x3 \
ORR x3 << 16, x1 \
\
MULA x1, prime2r, v2, v2 \
MOVW v2 @> 19, v2 \
MUL prime1r, v2 \
\
MOVBU -8(p), x1 \
MOVBU -7(p), x2 \
ORR x2 << 8, x1 \
MOVBU -6(p), x3 \
MOVBU -5(p), x4 \
ORR x4 << 8, x3 \
ORR x3 << 16, x1 \
\
MULA x1, prime2r, v3, v3 \
MOVW v3 @> 19, v3 \
MUL prime1r, v3 \
\
MOVBU -4(p), x1 \
MOVBU -3(p), x2 \
ORR x2 << 8, x1 \
MOVBU -2(p), x3 \
MOVBU -1(p), x4 \
ORR x4 << 8, x3 \
ORR x3 << 16, x1 \
\
MULA x1, prime2r, v4, v4 \
MOVW v4 @> 19, v4 \
MUL prime1r, v4 \
// func ChecksumZero([]byte) uint32
TEXT ·ChecksumZero(SB), NOFRAME|NOSPLIT, $-4-16
MOVW input_base+0(FP), p
MOVW input_len+4(FP), n
MOVW $const_prime1, prime1r
MOVW $const_prime2, prime2r
// Set up h for n < 16. It's tempting to say {ADD prime5, n, h}
// here, but that's a pseudo-op that generates a load through R11.
MOVW $const_prime5, prime5r
ADD prime5r, n, h
CMP $0, n
BEQ end
// We let n go negative so we can do comparisons with SUB.S
// instead of separate CMP.
SUB.S $16, n
BMI loop16done
ADD prime1r, prime2r, v1
MOVW prime2r, v2
MOVW $0, v3
RSB $0, prime1r, v4
TST $3, p
BNE loop16unaligned
loop16aligned:
SUB.S $16, n
round16aligned
BPL loop16aligned
B loop16finish
loop16unaligned:
SUB.S $16, n
round16unaligned
BPL loop16unaligned
loop16finish:
MOVW v1 @> 31, h
ADD v2 @> 25, h
ADD v3 @> 20, h
ADD v4 @> 14, h
// h += len(input) with v2 as temporary.
MOVW input_len+4(FP), v2
ADD v2, h
loop16done:
ADD $16, n // Restore number of bytes left.
SUB.S $4, n
MOVW $const_prime3, prime3r
BMI loop4done
MOVW $const_prime4, prime4r
TST $3, p
BNE loop4unaligned
loop4aligned:
SUB.S $4, n
MOVW.P 4(p), x1
MULA prime3r, x1, h, h
MOVW h @> 15, h
MUL prime4r, h
BPL loop4aligned
B loop4done
loop4unaligned:
SUB.S $4, n
MOVBU.P 4(p), x1
MOVBU -3(p), x2
ORR x2 << 8, x1
MOVBU -2(p), x3
ORR x3 << 16, x1
MOVBU -1(p), x4
ORR x4 << 24, x1
MULA prime3r, x1, h, h
MOVW h @> 15, h
MUL prime4r, h
BPL loop4unaligned
loop4done:
ADD.S $4, n // Restore number of bytes left.
BEQ end
MOVW $const_prime5, prime5r
loop1:
SUB.S $1, n
MOVBU.P 1(p), x1
MULA prime5r, x1, h, h
MOVW h @> 21, h
MUL prime1r, h
BNE loop1
end:
MOVW $const_prime3, prime3r
EOR h >> 15, h
MUL prime2r, h
EOR h >> 13, h
MUL prime3r, h
EOR h >> 16, h
MOVW h, ret+12(FP)
RET
// func update(v *[4]uint64, buf *[16]byte, p []byte)
TEXT ·update(SB), NOFRAME|NOSPLIT, $-4-20
MOVW v+0(FP), p
MOVM.IA (p), [v1, v2, v3, v4]
MOVW $const_prime1, prime1r
MOVW $const_prime2, prime2r
// Process buf, if not nil.
MOVW buf+4(FP), p
CMP $0, p
BEQ noBuffered
round16aligned
noBuffered:
MOVW input_base +8(FP), p
MOVW input_len +12(FP), n
SUB.S $16, n
BMI end
TST $3, p
BNE loop16unaligned
loop16aligned:
SUB.S $16, n
round16aligned
BPL loop16aligned
B end
loop16unaligned:
SUB.S $16, n
round16unaligned
BPL loop16unaligned
end:
MOVW v+0(FP), p
MOVM.IA [v1, v2, v3, v4], (p)
RET

View File

@ -1,10 +0,0 @@
// +build !arm noasm
package xxh32
// ChecksumZero returns the 32-bit hash of input.
func ChecksumZero(input []byte) uint32 { return checksumZeroGo(input) }
func update(v *[4]uint32, buf *[16]byte, input []byte) {
updateGo(v, buf, input)
}

View File

@ -1,157 +0,0 @@
// Package lz4 implements reading and writing lz4 compressed data.
//
// The package supports both the LZ4 stream format,
// as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html,
// and the LZ4 block format, defined at
// http://fastcompression.blogspot.fr/2011/05/lz4-explained.html.
//
// See https://github.com/lz4/lz4 for the reference C implementation.
package lz4
import (
"github.com/pierrec/lz4/v4/internal/lz4block"
"github.com/pierrec/lz4/v4/internal/lz4errors"
)
func _() {
// Safety checks for duplicated elements.
var x [1]struct{}
_ = x[lz4block.CompressionLevel(Fast)-lz4block.Fast]
_ = x[Block64Kb-BlockSize(lz4block.Block64Kb)]
_ = x[Block256Kb-BlockSize(lz4block.Block256Kb)]
_ = x[Block1Mb-BlockSize(lz4block.Block1Mb)]
_ = x[Block4Mb-BlockSize(lz4block.Block4Mb)]
}
// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
func CompressBlockBound(n int) int {
return lz4block.CompressBlockBound(n)
}
// UncompressBlock uncompresses the source buffer into the destination one,
// and returns the uncompressed size.
//
// The destination buffer must be sized appropriately.
//
// An error is returned if the source data is invalid or the destination buffer is too small.
func UncompressBlock(src, dst []byte) (int, error) {
return lz4block.UncompressBlock(src, dst, nil)
}
// UncompressBlockWithDict uncompresses the source buffer into the destination one using a
// dictionary, and returns the uncompressed size.
//
// The destination buffer must be sized appropriately.
//
// An error is returned if the source data is invalid or the destination buffer is too small.
func UncompressBlockWithDict(src, dst, dict []byte) (int, error) {
return lz4block.UncompressBlock(src, dst, dict)
}
// A Compressor compresses data into the LZ4 block format.
// It uses a fast compression algorithm.
//
// A Compressor is not safe for concurrent use by multiple goroutines.
//
// Use a Writer to compress into the LZ4 stream format.
type Compressor struct{ c lz4block.Compressor }
// CompressBlock compresses the source buffer src into the destination dst.
//
// If compression is successful, the first return value is the size of the
// compressed data, which is always >0.
//
// If dst has length at least CompressBlockBound(len(src)), compression always
// succeeds. Otherwise, the first return value is zero. The error return is
// non-nil if the compressed data does not fit in dst, but it might fit in a
// larger buffer that is still smaller than CompressBlockBound(len(src)). The
// return value (0, nil) means the data is likely incompressible and a buffer
// of length CompressBlockBound(len(src)) should be passed in.
func (c *Compressor) CompressBlock(src, dst []byte) (int, error) {
return c.c.CompressBlock(src, dst)
}
// CompressBlock compresses the source buffer into the destination one.
// This is the fast version of LZ4 compression and also the default one.
//
// The argument hashTable is scratch space for a hash table used by the
// compressor. If provided, it should have length at least 1<<16. If it is
// shorter (or nil), CompressBlock allocates its own hash table.
//
// The size of the compressed data is returned.
//
// If the destination buffer size is lower than CompressBlockBound and
// the compressed size is 0 and no error, then the data is incompressible.
//
// An error is returned if the destination buffer is too small.
// CompressBlock is equivalent to Compressor.CompressBlock.
// The final argument is ignored and should be set to nil.
//
// This function is deprecated. Use a Compressor instead.
func CompressBlock(src, dst []byte, _ []int) (int, error) {
return lz4block.CompressBlock(src, dst)
}
// A CompressorHC compresses data into the LZ4 block format.
// Its compression ratio is potentially better than that of a Compressor,
// but it is also slower and requires more memory.
//
// A Compressor is not safe for concurrent use by multiple goroutines.
//
// Use a Writer to compress into the LZ4 stream format.
type CompressorHC struct {
// Level is the maximum search depth for compression.
// Values <= 0 mean no maximum.
Level CompressionLevel
c lz4block.CompressorHC
}
// CompressBlock compresses the source buffer src into the destination dst.
//
// If compression is successful, the first return value is the size of the
// compressed data, which is always >0.
//
// If dst has length at least CompressBlockBound(len(src)), compression always
// succeeds. Otherwise, the first return value is zero. The error return is
// non-nil if the compressed data does not fit in dst, but it might fit in a
// larger buffer that is still smaller than CompressBlockBound(len(src)). The
// return value (0, nil) means the data is likely incompressible and a buffer
// of length CompressBlockBound(len(src)) should be passed in.
func (c *CompressorHC) CompressBlock(src, dst []byte) (int, error) {
return c.c.CompressBlock(src, dst, lz4block.CompressionLevel(c.Level))
}
// CompressBlockHC is equivalent to CompressorHC.CompressBlock.
// The final two arguments are ignored and should be set to nil.
//
// This function is deprecated. Use a CompressorHC instead.
func CompressBlockHC(src, dst []byte, depth CompressionLevel, _, _ []int) (int, error) {
return lz4block.CompressBlockHC(src, dst, lz4block.CompressionLevel(depth))
}
const (
// ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed
// block is corrupted or the destination buffer is not large enough for the uncompressed data.
ErrInvalidSourceShortBuffer = lz4errors.ErrInvalidSourceShortBuffer
// ErrInvalidFrame is returned when reading an invalid LZ4 archive.
ErrInvalidFrame = lz4errors.ErrInvalidFrame
// ErrInternalUnhandledState is an internal error.
ErrInternalUnhandledState = lz4errors.ErrInternalUnhandledState
// ErrInvalidHeaderChecksum is returned when reading a frame.
ErrInvalidHeaderChecksum = lz4errors.ErrInvalidHeaderChecksum
// ErrInvalidBlockChecksum is returned when reading a frame.
ErrInvalidBlockChecksum = lz4errors.ErrInvalidBlockChecksum
// ErrInvalidFrameChecksum is returned when reading a frame.
ErrInvalidFrameChecksum = lz4errors.ErrInvalidFrameChecksum
// ErrOptionInvalidCompressionLevel is returned when the supplied compression level is invalid.
ErrOptionInvalidCompressionLevel = lz4errors.ErrOptionInvalidCompressionLevel
// ErrOptionClosedOrError is returned when an option is applied to a closed or in error object.
ErrOptionClosedOrError = lz4errors.ErrOptionClosedOrError
// ErrOptionInvalidBlockSize is returned when
ErrOptionInvalidBlockSize = lz4errors.ErrOptionInvalidBlockSize
// ErrOptionNotApplicable is returned when trying to apply an option to an object not supporting it.
ErrOptionNotApplicable = lz4errors.ErrOptionNotApplicable
// ErrWriterNotClosed is returned when attempting to reset an unclosed writer.
ErrWriterNotClosed = lz4errors.ErrWriterNotClosed
)

View File

@ -1,242 +0,0 @@
package lz4
import (
"fmt"
"reflect"
"runtime"
"github.com/pierrec/lz4/v4/internal/lz4block"
"github.com/pierrec/lz4/v4/internal/lz4errors"
)
//go:generate go run golang.org/x/tools/cmd/stringer -type=BlockSize,CompressionLevel -output options_gen.go
type (
applier interface {
Apply(...Option) error
private()
}
// Option defines the parameters to setup an LZ4 Writer or Reader.
Option func(applier) error
)
// String returns a string representation of the option with its parameter(s).
func (o Option) String() string {
return o(nil).Error()
}
// Default options.
var (
DefaultBlockSizeOption = BlockSizeOption(Block4Mb)
DefaultChecksumOption = ChecksumOption(true)
DefaultConcurrency = ConcurrencyOption(1)
defaultOnBlockDone = OnBlockDoneOption(nil)
)
const (
Block64Kb BlockSize = 1 << (16 + iota*2)
Block256Kb
Block1Mb
Block4Mb
)
// BlockSizeIndex defines the size of the blocks to be compressed.
type BlockSize uint32
// BlockSizeOption defines the maximum size of compressed blocks (default=Block4Mb).
func BlockSizeOption(size BlockSize) Option {
return func(a applier) error {
switch w := a.(type) {
case nil:
s := fmt.Sprintf("BlockSizeOption(%s)", size)
return lz4errors.Error(s)
case *Writer:
size := uint32(size)
if !lz4block.IsValid(size) {
return fmt.Errorf("%w: %d", lz4errors.ErrOptionInvalidBlockSize, size)
}
w.frame.Descriptor.Flags.BlockSizeIndexSet(lz4block.Index(size))
return nil
case *CompressingReader:
size := uint32(size)
if !lz4block.IsValid(size) {
return fmt.Errorf("%w: %d", lz4errors.ErrOptionInvalidBlockSize, size)
}
w.frame.Descriptor.Flags.BlockSizeIndexSet(lz4block.Index(size))
return nil
}
return lz4errors.ErrOptionNotApplicable
}
}
// BlockChecksumOption enables or disables block checksum (default=false).
func BlockChecksumOption(flag bool) Option {
return func(a applier) error {
switch w := a.(type) {
case nil:
s := fmt.Sprintf("BlockChecksumOption(%v)", flag)
return lz4errors.Error(s)
case *Writer:
w.frame.Descriptor.Flags.BlockChecksumSet(flag)
return nil
case *CompressingReader:
w.frame.Descriptor.Flags.BlockChecksumSet(flag)
return nil
}
return lz4errors.ErrOptionNotApplicable
}
}
// ChecksumOption enables/disables all blocks or content checksum (default=true).
func ChecksumOption(flag bool) Option {
return func(a applier) error {
switch w := a.(type) {
case nil:
s := fmt.Sprintf("ChecksumOption(%v)", flag)
return lz4errors.Error(s)
case *Writer:
w.frame.Descriptor.Flags.ContentChecksumSet(flag)
return nil
case *CompressingReader:
w.frame.Descriptor.Flags.ContentChecksumSet(flag)
return nil
}
return lz4errors.ErrOptionNotApplicable
}
}
// SizeOption sets the size of the original uncompressed data (default=0). It is useful to know the size of the
// whole uncompressed data stream.
func SizeOption(size uint64) Option {
return func(a applier) error {
switch w := a.(type) {
case nil:
s := fmt.Sprintf("SizeOption(%d)", size)
return lz4errors.Error(s)
case *Writer:
w.frame.Descriptor.Flags.SizeSet(size > 0)
w.frame.Descriptor.ContentSize = size
return nil
case *CompressingReader:
w.frame.Descriptor.Flags.SizeSet(size > 0)
w.frame.Descriptor.ContentSize = size
return nil
}
return lz4errors.ErrOptionNotApplicable
}
}
// ConcurrencyOption sets the number of go routines used for compression.
// If n <= 0, then the output of runtime.GOMAXPROCS(0) is used.
func ConcurrencyOption(n int) Option {
if n <= 0 {
n = runtime.GOMAXPROCS(0)
}
return func(a applier) error {
switch rw := a.(type) {
case nil:
s := fmt.Sprintf("ConcurrencyOption(%d)", n)
return lz4errors.Error(s)
case *Writer:
rw.num = n
return nil
case *Reader:
rw.num = n
return nil
}
return lz4errors.ErrOptionNotApplicable
}
}
// CompressionLevel defines the level of compression to use. The higher the better, but slower, compression.
type CompressionLevel uint32
const (
Fast CompressionLevel = 0
Level1 CompressionLevel = 1 << (8 + iota)
Level2
Level3
Level4
Level5
Level6
Level7
Level8
Level9
)
// CompressionLevelOption defines the compression level (default=Fast).
func CompressionLevelOption(level CompressionLevel) Option {
return func(a applier) error {
switch w := a.(type) {
case nil:
s := fmt.Sprintf("CompressionLevelOption(%s)", level)
return lz4errors.Error(s)
case *Writer:
switch level {
case Fast, Level1, Level2, Level3, Level4, Level5, Level6, Level7, Level8, Level9:
default:
return fmt.Errorf("%w: %d", lz4errors.ErrOptionInvalidCompressionLevel, level)
}
w.level = lz4block.CompressionLevel(level)
return nil
case *CompressingReader:
switch level {
case Fast, Level1, Level2, Level3, Level4, Level5, Level6, Level7, Level8, Level9:
default:
return fmt.Errorf("%w: %d", lz4errors.ErrOptionInvalidCompressionLevel, level)
}
w.level = lz4block.CompressionLevel(level)
return nil
}
return lz4errors.ErrOptionNotApplicable
}
}
func onBlockDone(int) {}
// OnBlockDoneOption is triggered when a block has been processed. For a Writer, it is when is has been compressed,
// for a Reader, it is when it has been uncompressed.
func OnBlockDoneOption(handler func(size int)) Option {
if handler == nil {
handler = onBlockDone
}
return func(a applier) error {
switch rw := a.(type) {
case nil:
s := fmt.Sprintf("OnBlockDoneOption(%s)", reflect.TypeOf(handler).String())
return lz4errors.Error(s)
case *Writer:
rw.handler = handler
return nil
case *Reader:
rw.handler = handler
return nil
case *CompressingReader:
rw.handler = handler
return nil
}
return lz4errors.ErrOptionNotApplicable
}
}
// LegacyOption provides support for writing LZ4 frames in the legacy format.
//
// See https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md#legacy-frame.
//
// NB. compressed Linux kernel images use a tweaked LZ4 legacy format where
// the compressed stream is followed by the original (uncompressed) size of
// the kernel (https://events.static.linuxfound.org/sites/events/files/lcjpcojp13_klee.pdf).
// This is also supported as a special case.
func LegacyOption(legacy bool) Option {
return func(a applier) error {
switch rw := a.(type) {
case nil:
s := fmt.Sprintf("LegacyOption(%v)", legacy)
return lz4errors.Error(s)
case *Writer:
rw.legacy = legacy
return nil
}
return lz4errors.ErrOptionNotApplicable
}
}

View File

@ -1,92 +0,0 @@
// Code generated by "stringer -type=BlockSize,CompressionLevel -output options_gen.go"; DO NOT EDIT.
package lz4
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[Block64Kb-65536]
_ = x[Block256Kb-262144]
_ = x[Block1Mb-1048576]
_ = x[Block4Mb-4194304]
}
const (
_BlockSize_name_0 = "Block64Kb"
_BlockSize_name_1 = "Block256Kb"
_BlockSize_name_2 = "Block1Mb"
_BlockSize_name_3 = "Block4Mb"
)
func (i BlockSize) String() string {
switch {
case i == 65536:
return _BlockSize_name_0
case i == 262144:
return _BlockSize_name_1
case i == 1048576:
return _BlockSize_name_2
case i == 4194304:
return _BlockSize_name_3
default:
return "BlockSize(" + strconv.FormatInt(int64(i), 10) + ")"
}
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[Fast-0]
_ = x[Level1-512]
_ = x[Level2-1024]
_ = x[Level3-2048]
_ = x[Level4-4096]
_ = x[Level5-8192]
_ = x[Level6-16384]
_ = x[Level7-32768]
_ = x[Level8-65536]
_ = x[Level9-131072]
}
const (
_CompressionLevel_name_0 = "Fast"
_CompressionLevel_name_1 = "Level1"
_CompressionLevel_name_2 = "Level2"
_CompressionLevel_name_3 = "Level3"
_CompressionLevel_name_4 = "Level4"
_CompressionLevel_name_5 = "Level5"
_CompressionLevel_name_6 = "Level6"
_CompressionLevel_name_7 = "Level7"
_CompressionLevel_name_8 = "Level8"
_CompressionLevel_name_9 = "Level9"
)
func (i CompressionLevel) String() string {
switch {
case i == 0:
return _CompressionLevel_name_0
case i == 512:
return _CompressionLevel_name_1
case i == 1024:
return _CompressionLevel_name_2
case i == 2048:
return _CompressionLevel_name_3
case i == 4096:
return _CompressionLevel_name_4
case i == 8192:
return _CompressionLevel_name_5
case i == 16384:
return _CompressionLevel_name_6
case i == 32768:
return _CompressionLevel_name_7
case i == 65536:
return _CompressionLevel_name_8
case i == 131072:
return _CompressionLevel_name_9
default:
return "CompressionLevel(" + strconv.FormatInt(int64(i), 10) + ")"
}
}

View File

@ -1,275 +0,0 @@
package lz4
import (
"bytes"
"io"
"github.com/pierrec/lz4/v4/internal/lz4block"
"github.com/pierrec/lz4/v4/internal/lz4errors"
"github.com/pierrec/lz4/v4/internal/lz4stream"
)
var readerStates = []aState{
noState: newState,
errorState: newState,
newState: readState,
readState: closedState,
closedState: newState,
}
// NewReader returns a new LZ4 frame decoder.
func NewReader(r io.Reader) *Reader {
return newReader(r, false)
}
func newReader(r io.Reader, legacy bool) *Reader {
zr := &Reader{frame: lz4stream.NewFrame()}
zr.state.init(readerStates)
_ = zr.Apply(DefaultConcurrency, defaultOnBlockDone)
zr.Reset(r)
return zr
}
// Reader allows reading an LZ4 stream.
type Reader struct {
state _State
src io.Reader // source reader
num int // concurrency level
frame *lz4stream.Frame // frame being read
data []byte // block buffer allocated in non concurrent mode
reads chan []byte // pending data
idx int // size of pending data
handler func(int)
cum uint32
dict []byte
}
func (*Reader) private() {}
func (r *Reader) Apply(options ...Option) (err error) {
defer r.state.check(&err)
switch r.state.state {
case newState:
case errorState:
return r.state.err
default:
return lz4errors.ErrOptionClosedOrError
}
for _, o := range options {
if err = o(r); err != nil {
return
}
}
return
}
// Size returns the size of the underlying uncompressed data, if set in the stream.
func (r *Reader) Size() int {
switch r.state.state {
case readState, closedState:
if r.frame.Descriptor.Flags.Size() {
return int(r.frame.Descriptor.ContentSize)
}
}
return 0
}
func (r *Reader) isNotConcurrent() bool {
return r.num == 1
}
func (r *Reader) init() error {
err := r.frame.ParseHeaders(r.src)
if err != nil {
return err
}
if !r.frame.Descriptor.Flags.BlockIndependence() {
// We can't decompress dependent blocks concurrently.
// Instead of throwing an error to the user, silently drop concurrency
r.num = 1
}
data, err := r.frame.InitR(r.src, r.num)
if err != nil {
return err
}
r.reads = data
r.idx = 0
size := r.frame.Descriptor.Flags.BlockSizeIndex()
r.data = size.Get()
r.cum = 0
return nil
}
func (r *Reader) Read(buf []byte) (n int, err error) {
defer r.state.check(&err)
switch r.state.state {
case readState:
case closedState, errorState:
return 0, r.state.err
case newState:
// First initialization.
if err = r.init(); r.state.next(err) {
return
}
default:
return 0, r.state.fail()
}
for len(buf) > 0 {
var bn int
if r.idx == 0 {
if r.isNotConcurrent() {
bn, err = r.read(buf)
} else {
lz4block.Put(r.data)
r.data = <-r.reads
if len(r.data) == 0 {
// No uncompressed data: something went wrong or we are done.
err = r.frame.Blocks.ErrorR()
}
}
switch err {
case nil:
case io.EOF:
if er := r.frame.CloseR(r.src); er != nil {
err = er
}
lz4block.Put(r.data)
r.data = nil
return
default:
return
}
}
if bn == 0 {
// Fill buf with buffered data.
bn = copy(buf, r.data[r.idx:])
r.idx += bn
if r.idx == len(r.data) {
// All data read, get ready for the next Read.
r.idx = 0
}
}
buf = buf[bn:]
n += bn
r.handler(bn)
}
return
}
// read uncompresses the next block as follow:
// - if buf has enough room, the block is uncompressed into it directly
// and the lenght of used space is returned
// - else, the uncompress data is stored in r.data and 0 is returned
func (r *Reader) read(buf []byte) (int, error) {
block := r.frame.Blocks.Block
_, err := block.Read(r.frame, r.src, r.cum)
if err != nil {
return 0, err
}
var direct bool
dst := r.data[:cap(r.data)]
if len(buf) >= len(dst) {
// Uncompress directly into buf.
direct = true
dst = buf
}
dst, err = block.Uncompress(r.frame, dst, r.dict, true)
if err != nil {
return 0, err
}
if !r.frame.Descriptor.Flags.BlockIndependence() {
if len(r.dict)+len(dst) > 128*1024 {
preserveSize := 64*1024 - len(dst)
if preserveSize < 0 {
preserveSize = 0
}
r.dict = r.dict[len(r.dict)-preserveSize:]
}
r.dict = append(r.dict, dst...)
}
r.cum += uint32(len(dst))
if direct {
return len(dst), nil
}
r.data = dst
return 0, nil
}
// Reset clears the state of the Reader r such that it is equivalent to its
// initial state from NewReader, but instead reading from reader.
// No access to reader is performed.
func (r *Reader) Reset(reader io.Reader) {
if r.data != nil {
lz4block.Put(r.data)
r.data = nil
}
r.frame.Reset(r.num)
r.state.reset()
r.src = reader
r.reads = nil
}
// WriteTo efficiently uncompresses the data from the Reader underlying source to w.
func (r *Reader) WriteTo(w io.Writer) (n int64, err error) {
switch r.state.state {
case closedState, errorState:
return 0, r.state.err
case newState:
if err = r.init(); r.state.next(err) {
return
}
default:
return 0, r.state.fail()
}
defer r.state.nextd(&err)
var data []byte
if r.isNotConcurrent() {
size := r.frame.Descriptor.Flags.BlockSizeIndex()
data = size.Get()
defer lz4block.Put(data)
}
for {
var bn int
var dst []byte
if r.isNotConcurrent() {
bn, err = r.read(data)
dst = data[:bn]
} else {
lz4block.Put(dst)
dst = <-r.reads
bn = len(dst)
if bn == 0 {
// No uncompressed data: something went wrong or we are done.
err = r.frame.Blocks.ErrorR()
}
}
switch err {
case nil:
case io.EOF:
err = r.frame.CloseR(r.src)
return
default:
return
}
r.handler(bn)
bn, err = w.Write(dst)
n += int64(bn)
if err != nil {
return
}
}
}
// ValidFrameHeader returns a bool indicating if the given bytes slice matches a LZ4 header.
func ValidFrameHeader(in []byte) (bool, error) {
f := lz4stream.NewFrame()
err := f.ParseHeaders(bytes.NewReader(in))
if err == nil {
return true, nil
}
if err == lz4errors.ErrInvalidFrame {
return false, nil
}
return false, err
}

View File

@ -1,75 +0,0 @@
package lz4
import (
"errors"
"fmt"
"io"
"github.com/pierrec/lz4/v4/internal/lz4errors"
)
//go:generate go run golang.org/x/tools/cmd/stringer -type=aState -output state_gen.go
const (
noState aState = iota // uninitialized reader
errorState // unrecoverable error encountered
newState // instantiated object
readState // reading data
writeState // writing data
closedState // all done
)
type (
aState uint8
_State struct {
states []aState
state aState
err error
}
)
func (s *_State) init(states []aState) {
s.states = states
s.state = states[0]
}
func (s *_State) reset() {
s.state = s.states[0]
s.err = nil
}
// next sets the state to the next one unless it is passed a non nil error.
// It returns whether or not it is in error.
func (s *_State) next(err error) bool {
if err != nil {
s.err = fmt.Errorf("%s: %w", s.state, err)
s.state = errorState
return true
}
s.state = s.states[s.state]
return false
}
// nextd is like next but for defers.
func (s *_State) nextd(errp *error) bool {
return errp != nil && s.next(*errp)
}
// check sets s in error if not already in error and if the error is not nil or io.EOF,
func (s *_State) check(errp *error) {
if s.state == errorState || errp == nil {
return
}
if err := *errp; err != nil {
s.err = fmt.Errorf("%w[%s]", err, s.state)
if !errors.Is(err, io.EOF) {
s.state = errorState
}
}
}
func (s *_State) fail() error {
s.state = errorState
s.err = fmt.Errorf("%w[%s]", lz4errors.ErrInternalUnhandledState, s.state)
return s.err
}

View File

@ -1,28 +0,0 @@
// Code generated by "stringer -type=aState -output state_gen.go"; DO NOT EDIT.
package lz4
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[noState-0]
_ = x[errorState-1]
_ = x[newState-2]
_ = x[readState-3]
_ = x[writeState-4]
_ = x[closedState-5]
}
const _aState_name = "noStateerrorStatenewStatereadStatewriteStateclosedState"
var _aState_index = [...]uint8{0, 7, 17, 25, 34, 44, 55}
func (i aState) String() string {
if i >= aState(len(_aState_index)-1) {
return "aState(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _aState_name[_aState_index[i]:_aState_index[i+1]]
}

View File

@ -1,242 +0,0 @@
package lz4
import (
"io"
"github.com/pierrec/lz4/v4/internal/lz4block"
"github.com/pierrec/lz4/v4/internal/lz4errors"
"github.com/pierrec/lz4/v4/internal/lz4stream"
)
var writerStates = []aState{
noState: newState,
newState: writeState,
writeState: closedState,
closedState: newState,
errorState: newState,
}
// NewWriter returns a new LZ4 frame encoder.
func NewWriter(w io.Writer) *Writer {
zw := &Writer{frame: lz4stream.NewFrame()}
zw.state.init(writerStates)
_ = zw.Apply(DefaultBlockSizeOption, DefaultChecksumOption, DefaultConcurrency, defaultOnBlockDone)
zw.Reset(w)
return zw
}
// Writer allows writing an LZ4 stream.
type Writer struct {
state _State
src io.Writer // destination writer
level lz4block.CompressionLevel // how hard to try
num int // concurrency level
frame *lz4stream.Frame // frame being built
data []byte // pending data
idx int // size of pending data
handler func(int)
legacy bool
}
func (*Writer) private() {}
func (w *Writer) Apply(options ...Option) (err error) {
defer w.state.check(&err)
switch w.state.state {
case newState:
case errorState:
return w.state.err
default:
return lz4errors.ErrOptionClosedOrError
}
w.Reset(w.src)
for _, o := range options {
if err = o(w); err != nil {
return
}
}
return
}
func (w *Writer) isNotConcurrent() bool {
return w.num == 1
}
// init sets up the Writer when in newState. It does not change the Writer state.
func (w *Writer) init() error {
w.frame.InitW(w.src, w.num, w.legacy)
size := w.frame.Descriptor.Flags.BlockSizeIndex()
w.data = size.Get()
w.idx = 0
return w.frame.Descriptor.Write(w.frame, w.src)
}
func (w *Writer) Write(buf []byte) (n int, err error) {
defer w.state.check(&err)
switch w.state.state {
case writeState:
case closedState, errorState:
return 0, w.state.err
case newState:
if err = w.init(); w.state.next(err) {
return
}
default:
return 0, w.state.fail()
}
zn := len(w.data)
for len(buf) > 0 {
if w.isNotConcurrent() && w.idx == 0 && len(buf) >= zn {
// Avoid a copy as there is enough data for a block.
if err = w.write(buf[:zn], false); err != nil {
return
}
n += zn
buf = buf[zn:]
continue
}
// Accumulate the data to be compressed.
m := copy(w.data[w.idx:], buf)
n += m
w.idx += m
buf = buf[m:]
if w.idx < len(w.data) {
// Buffer not filled.
return
}
// Buffer full.
if err = w.write(w.data, true); err != nil {
return
}
if !w.isNotConcurrent() {
size := w.frame.Descriptor.Flags.BlockSizeIndex()
w.data = size.Get()
}
w.idx = 0
}
return
}
func (w *Writer) write(data []byte, safe bool) error {
if w.isNotConcurrent() {
block := w.frame.Blocks.Block
err := block.Compress(w.frame, data, w.level).Write(w.frame, w.src)
w.handler(len(block.Data))
return err
}
c := make(chan *lz4stream.FrameDataBlock)
w.frame.Blocks.Blocks <- c
go func(c chan *lz4stream.FrameDataBlock, data []byte, safe bool) {
b := lz4stream.NewFrameDataBlock(w.frame)
c <- b.Compress(w.frame, data, w.level)
<-c
w.handler(len(b.Data))
b.Close(w.frame)
if safe {
// safe to put it back as the last usage of it was FrameDataBlock.Write() called before c is closed
lz4block.Put(data)
}
}(c, data, safe)
return nil
}
// Flush any buffered data to the underlying writer immediately.
func (w *Writer) Flush() (err error) {
switch w.state.state {
case writeState:
case errorState:
return w.state.err
case newState:
if err = w.init(); w.state.next(err) {
return
}
default:
return nil
}
if w.idx > 0 {
// Flush pending data, disable w.data freeing as it is done later on.
if err = w.write(w.data[:w.idx], false); err != nil {
return err
}
w.idx = 0
}
return nil
}
// Close closes the Writer, flushing any unwritten data to the underlying writer
// without closing it.
func (w *Writer) Close() error {
if err := w.Flush(); err != nil {
return err
}
err := w.frame.CloseW(w.src, w.num)
// It is now safe to free the buffer.
if w.data != nil {
lz4block.Put(w.data)
w.data = nil
}
return err
}
// Reset clears the state of the Writer w such that it is equivalent to its
// initial state from NewWriter, but instead writing to writer.
// Reset keeps the previous options unless overwritten by the supplied ones.
// No access to writer is performed.
//
// w.Close must be called before Reset or pending data may be dropped.
func (w *Writer) Reset(writer io.Writer) {
w.frame.Reset(w.num)
w.state.reset()
w.src = writer
}
// ReadFrom efficiently reads from r and compressed into the Writer destination.
func (w *Writer) ReadFrom(r io.Reader) (n int64, err error) {
switch w.state.state {
case closedState, errorState:
return 0, w.state.err
case newState:
if err = w.init(); w.state.next(err) {
return
}
default:
return 0, w.state.fail()
}
defer w.state.check(&err)
size := w.frame.Descriptor.Flags.BlockSizeIndex()
var done bool
var rn int
data := size.Get()
if w.isNotConcurrent() {
// Keep the same buffer for the whole process.
defer lz4block.Put(data)
}
for !done {
rn, err = io.ReadFull(r, data)
switch err {
case nil:
case io.EOF, io.ErrUnexpectedEOF: // read may be partial
done = true
default:
return
}
n += int64(rn)
err = w.write(data[:rn], true)
if err != nil {
return
}
w.handler(rn)
if !done && !w.isNotConcurrent() {
// The buffer will be returned automatically by go routines (safe=true)
// so get a new one fo the next round.
data = size.Get()
}
}
return
}

10
vendor/modules.txt vendored
View File

@ -8,9 +8,6 @@ github.com/alecthomas/chroma/v2/styles
## explicit; go 1.13 ## explicit; go 1.13
github.com/dlclark/regexp2 github.com/dlclark/regexp2
github.com/dlclark/regexp2/syntax github.com/dlclark/regexp2/syntax
# github.com/golang/snappy v0.0.4
## explicit
github.com/golang/snappy
# github.com/inconshreveable/mousetrap v1.1.0 # github.com/inconshreveable/mousetrap v1.1.0
## explicit; go 1.18 ## explicit; go 1.18
github.com/inconshreveable/mousetrap github.com/inconshreveable/mousetrap
@ -26,13 +23,6 @@ github.com/klauspost/compress/internal/cpuinfo
github.com/klauspost/compress/internal/snapref github.com/klauspost/compress/internal/snapref
github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd
github.com/klauspost/compress/zstd/internal/xxhash github.com/klauspost/compress/zstd/internal/xxhash
# github.com/pierrec/lz4/v4 v4.1.21
## explicit; go 1.14
github.com/pierrec/lz4/v4
github.com/pierrec/lz4/v4/internal/lz4block
github.com/pierrec/lz4/v4/internal/lz4errors
github.com/pierrec/lz4/v4/internal/lz4stream
github.com/pierrec/lz4/v4/internal/xxh32
# github.com/spf13/cobra v1.8.0 # github.com/spf13/cobra v1.8.0
## explicit; go 1.15 ## explicit; go 1.15
github.com/spf13/cobra github.com/spf13/cobra