Add support for snappy and lz4 compression

2024-01-05 21:30:08 -06:00 · 2024-01-05 21:30:08 -06:00 · 965d3deb8d
parent 2b6aa85302
commit 965d3deb8d
47 changed files with 7774 additions and 6 deletions
--- a/cmd/errors.go
+++ b/cmd/errors.go
@ -16,7 +16,7 @@ import (
 var (
 	ErrInvalidAdminPrefix    = errors.New("admin path must match the pattern " + AllowedCharacters)
-	ErrInvalidCompression    = errors.New("supported compression formats: flate, gzip, lzw, none, zlib, zstd")
+	ErrInvalidCompression    = errors.New("supported compression formats: flate, gzip, lz4, lzw, none, snappy, zlib, zstd")
 	ErrInvalidConcurrency    = errors.New("concurrency limit must be a positive integer")
 	ErrInvalidFileCountRange = errors.New("maximum file count limit must be greater than or equal to minimum file count limit")
 	ErrInvalidFileCountValue = errors.New("file count limits must be non-negative integers no greater than 2147483647")
--- a/cmd/index.go
+++ b/cmd/index.go
@ -17,8 +17,10 @@ import (
 	"sync"
 	"time"
 	"github.com/golang/snappy"
 	"github.com/julienschmidt/httprouter"
 	"github.com/klauspost/compress/zstd"
 	lz4 "github.com/pierrec/lz4/v4"
 	"seedno.de/seednode/roulette/types"
 )
@ -91,16 +93,20 @@ func (index *fileIndex) isEmpty() bool {
 	return length == 0
 }
-func getReader(format string, file io.Reader) (io.ReadCloser, error) {
+func getReader(format string, file io.Reader) (io.Reader, error) {
 	switch format {
 	case "flate":
 		return flate.NewReader(file), nil
 	case "gzip":
 		return gzip.NewReader(file)
 	case "lz4":
 		return lz4.NewReader(file), nil
 	case "lzw":
 		return lzw.NewReader(file, lzw.LSB, 8), nil
 	case "none":
 		return io.NopCloser(file), nil
 	case "snappy":
 		return snappy.NewReader(file), nil
 	case "zlib":
 		return zlib.NewReader(file)
 	case "zstd":
@ -117,11 +123,22 @@ func getWriter(format string, file io.WriteCloser) (io.WriteCloser, error) {
 	case "flate":
 		return flate.NewWriter(file, flate.DefaultCompression)
 	case "gzip":
-		return gzip.NewWriter(file), nil
+		return gzip.NewWriterLevel(file, gzip.BestCompression)
 	case "lz4":
 		encoder := lz4.NewWriter(file)
 		err := encoder.Apply(lz4.CompressionLevelOption(lz4.Level9))
 		if err != nil {
 			return file, err
 		}
 		return encoder, nil
 	case "lzw":
 		return lzw.NewWriter(file, lzw.LSB, 8), nil
 	case "none":
 		return file, nil
 	case "snappy":
 		return snappy.NewBufferedWriter(file), nil
 	case "zlib":
 		return zlib.NewWriter(file), nil
 	case "zstd":
@ -202,7 +219,7 @@ func (index *fileIndex) Import(path string) error {
 	if err != nil {
 		return err
 	}
-	defer reader.Close()
+	//defer reader.Close()
 	dec := gob.NewDecoder(reader)
--- a/cmd/root.go
+++ b/cmd/root.go
@ -15,7 +15,7 @@ import (
 const (
 	AllowedCharacters string = `^[A-z0-9.\-_]+$`
-	ReleaseVersion    string = "3.10.0"
+	ReleaseVersion    string = "3.11.0"
 )
 var (
@ -62,8 +62,10 @@ var (
 	CompressionFormats = []string{
 		"flate",
 		"gzip",
 		"lz4",
 		"lzw",
 		"none",
 		"snappy",
 		"zlib",
 		"zstd",
 	}
@ -133,7 +135,7 @@ func init() {
 	rootCmd.Flags().BoolVar(&CaseSensitive, "case-sensitive", false, "use case-sensitive matching for filters")
 	rootCmd.Flags().BoolVar(&Code, "code", false, "enable support for source code files")
 	rootCmd.Flags().StringVar(&CodeTheme, "code-theme", "solarized-dark256", "theme for source code syntax highlighting")
-	rootCmd.Flags().StringVar(&Compression, "compression", "zstd", "compression format to use for index (flate, gzip, lzw, none, zlib, zstd)")
+	rootCmd.Flags().StringVar(&Compression, "compression", "zstd", "compression format to use for index (flate, gzip, lz5, lzw, none, snappy, zlib, zstd)")
 	rootCmd.Flags().IntVar(&Concurrency, "concurrency", 8192, "maximum concurrency for scan threads")
 	rootCmd.Flags().BoolVar(&DisableButtons, "disable-buttons", false, "disable first/prev/next/last buttons")
 	rootCmd.Flags().BoolVar(&ExitOnError, "exit-on-error", false, "shut down webserver on error, instead of just printing error")
--- a/go.mod
+++ b/go.mod
@ -4,8 +4,10 @@ go 1.21
 require (
 	github.com/alecthomas/chroma/v2 v2.12.0
 	github.com/golang/snappy v0.0.4
 	github.com/julienschmidt/httprouter v1.3.0
 	github.com/klauspost/compress v1.17.4
 	github.com/pierrec/lz4/v4 v4.1.19
 	github.com/spf13/cobra v1.8.0
 	github.com/yosssi/gohtml v0.0.0-20201013000340-ee4748c638f4
 	golang.org/x/image v0.14.0
--- a/go.sum
+++ b/go.sum
@ -7,6 +7,8 @@ github.com/alecthomas/repr v0.2.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW5
 github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
 github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
 github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
 github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@ -15,6 +17,8 @@ github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4d
 github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=
 github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
 github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
 github.com/pierrec/lz4/v4 v4.1.19 h1:tYLzDnjDXh9qIxSTKHwXwOYmm9d887Y7Y1ZkyXYHAN4=
 github.com/pierrec/lz4/v4 v4.1.19/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
 github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
--- a/vendor/github.com/golang/snappy/.gitignore
+++ b/vendor/github.com/golang/snappy/.gitignore
@ -0,0 +1,16 @@
 cmd/snappytool/snappytool
 testdata/bench
 # These explicitly listed benchmark data files are for an obsolete version of
 # snappy_test.go.
 testdata/alice29.txt
 testdata/asyoulik.txt
 testdata/fireworks.jpeg
 testdata/geo.protodata
 testdata/html
 testdata/html_x_4
 testdata/kppkn.gtb
 testdata/lcet10.txt
 testdata/paper-100k.pdf
 testdata/plrabn12.txt
 testdata/urls.10K
--- a/vendor/github.com/golang/snappy/AUTHORS
+++ b/vendor/github.com/golang/snappy/AUTHORS
@ -0,0 +1,18 @@
 # This is the official list of Snappy-Go authors for copyright purposes.
 # This file is distinct from the CONTRIBUTORS files.
 # See the latter for an explanation.
 # Names should be added to this file as
 #	Name or Organization <email address>
 # The email address is not required for organizations.
 # Please keep the list sorted.
 Amazon.com, Inc
 Damian Gryski <dgryski@gmail.com>
 Eric Buth <eric@topos.com>
 Google Inc.
 Jan Mercl <0xjnml@gmail.com>
 Klaus Post <klauspost@gmail.com>
 Rodolfo Carvalho <rhcarvalho@gmail.com>
 Sebastien Binet <seb.binet@gmail.com>
--- a/vendor/github.com/golang/snappy/CONTRIBUTORS
+++ b/vendor/github.com/golang/snappy/CONTRIBUTORS
@ -0,0 +1,41 @@
 # This is the official list of people who can contribute
 # (and typically have contributed) code to the Snappy-Go repository.
 # The AUTHORS file lists the copyright holders; this file
 # lists people.  For example, Google employees are listed here
 # but not in AUTHORS, because Google holds the copyright.
 #
 # The submission process automatically checks to make sure
 # that people submitting code are listed in this file (by email address).
 #
 # Names should be added to this file only after verifying that
 # the individual or the individual's organization has agreed to
 # the appropriate Contributor License Agreement, found here:
 #
 #     http://code.google.com/legal/individual-cla-v1.0.html
 #     http://code.google.com/legal/corporate-cla-v1.0.html
 #
 # The agreement for individuals can be filled out on the web.
 #
 # When adding J Random Contributor's name to this file,
 # either J's name or J's organization's name should be
 # added to the AUTHORS file, depending on whether the
 # individual or corporate CLA was used.
 # Names should be added to this file like so:
 #     Name <email address>
 # Please keep the list sorted.
 Alex Legg <alexlegg@google.com>
 Damian Gryski <dgryski@gmail.com>
 Eric Buth <eric@topos.com>
 Jan Mercl <0xjnml@gmail.com>
 Jonathan Swinney <jswinney@amazon.com>
 Kai Backman <kaib@golang.org>
 Klaus Post <klauspost@gmail.com>
 Marc-Antoine Ruel <maruel@chromium.org>
 Nigel Tao <nigeltao@golang.org>
 Rob Pike <r@golang.org>
 Rodolfo Carvalho <rhcarvalho@gmail.com>
 Russ Cox <rsc@golang.org>
 Sebastien Binet <seb.binet@gmail.com>
--- a/vendor/github.com/golang/snappy/LICENSE
+++ b/vendor/github.com/golang/snappy/LICENSE
@ -0,0 +1,27 @@
 Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
 met:
   * Redistributions of source code must retain the above copyright
 notice, this list of conditions and the following disclaimer.
   * Redistributions in binary form must reproduce the above
 copyright notice, this list of conditions and the following disclaimer
 in the documentation and/or other materials provided with the
 distribution.
   * Neither the name of Google Inc. nor the names of its
 contributors may be used to endorse or promote products derived from
 this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/vendor/github.com/golang/snappy/README
+++ b/vendor/github.com/golang/snappy/README
@ -0,0 +1,107 @@
 The Snappy compression format in the Go programming language.
 To download and install from source:
 $ go get github.com/golang/snappy
 Unless otherwise noted, the Snappy-Go source files are distributed
 under the BSD-style license found in the LICENSE file.
 Benchmarks.
 The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
 or so files, the same set used by the C++ Snappy code (github.com/google/snappy
 and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
 3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
 "go test -test.bench=."
 _UFlat0-8         2.19GB/s ± 0%  html
 _UFlat1-8         1.41GB/s ± 0%  urls
 _UFlat2-8         23.5GB/s ± 2%  jpg
 _UFlat3-8         1.91GB/s ± 0%  jpg_200
 _UFlat4-8         14.0GB/s ± 1%  pdf
 _UFlat5-8         1.97GB/s ± 0%  html4
 _UFlat6-8          814MB/s ± 0%  txt1
 _UFlat7-8          785MB/s ± 0%  txt2
 _UFlat8-8          857MB/s ± 0%  txt3
 _UFlat9-8          719MB/s ± 1%  txt4
 _UFlat10-8        2.84GB/s ± 0%  pb
 _UFlat11-8        1.05GB/s ± 0%  gaviota
 _ZFlat0-8         1.04GB/s ± 0%  html
 _ZFlat1-8          534MB/s ± 0%  urls
 _ZFlat2-8         15.7GB/s ± 1%  jpg
 _ZFlat3-8          740MB/s ± 3%  jpg_200
 _ZFlat4-8         9.20GB/s ± 1%  pdf
 _ZFlat5-8          991MB/s ± 0%  html4
 _ZFlat6-8          379MB/s ± 0%  txt1
 _ZFlat7-8          352MB/s ± 0%  txt2
 _ZFlat8-8          396MB/s ± 1%  txt3
 _ZFlat9-8          327MB/s ± 1%  txt4
 _ZFlat10-8        1.33GB/s ± 1%  pb
 _ZFlat11-8         605MB/s ± 1%  gaviota
 "go test -test.bench=. -tags=noasm"
 _UFlat0-8          621MB/s ± 2%  html
 _UFlat1-8          494MB/s ± 1%  urls
 _UFlat2-8         23.2GB/s ± 1%  jpg
 _UFlat3-8         1.12GB/s ± 1%  jpg_200
 _UFlat4-8         4.35GB/s ± 1%  pdf
 _UFlat5-8          609MB/s ± 0%  html4
 _UFlat6-8          296MB/s ± 0%  txt1
 _UFlat7-8          288MB/s ± 0%  txt2
 _UFlat8-8          309MB/s ± 1%  txt3
 _UFlat9-8          280MB/s ± 1%  txt4
 _UFlat10-8         753MB/s ± 0%  pb
 _UFlat11-8         400MB/s ± 0%  gaviota
 _ZFlat0-8          409MB/s ± 1%  html
 _ZFlat1-8          250MB/s ± 1%  urls
 _ZFlat2-8         12.3GB/s ± 1%  jpg
 _ZFlat3-8          132MB/s ± 0%  jpg_200
 _ZFlat4-8         2.92GB/s ± 0%  pdf
 _ZFlat5-8          405MB/s ± 1%  html4
 _ZFlat6-8          179MB/s ± 1%  txt1
 _ZFlat7-8          170MB/s ± 1%  txt2
 _ZFlat8-8          189MB/s ± 1%  txt3
 _ZFlat9-8          164MB/s ± 1%  txt4
 _ZFlat10-8         479MB/s ± 1%  pb
 _ZFlat11-8         270MB/s ± 1%  gaviota
 For comparison (Go's encoded output is byte-for-byte identical to C++'s), here
 are the numbers from C++ Snappy's
 make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log
 BM_UFlat/0     2.4GB/s  html
 BM_UFlat/1     1.4GB/s  urls
 BM_UFlat/2    21.8GB/s  jpg
 BM_UFlat/3     1.5GB/s  jpg_200
 BM_UFlat/4    13.3GB/s  pdf
 BM_UFlat/5     2.1GB/s  html4
 BM_UFlat/6     1.0GB/s  txt1
 BM_UFlat/7   959.4MB/s  txt2
 BM_UFlat/8     1.0GB/s  txt3
 BM_UFlat/9   864.5MB/s  txt4
 BM_UFlat/10    2.9GB/s  pb
 BM_UFlat/11    1.2GB/s  gaviota
 BM_ZFlat/0   944.3MB/s  html (22.31 %)
 BM_ZFlat/1   501.6MB/s  urls (47.78 %)
 BM_ZFlat/2    14.3GB/s  jpg (99.95 %)
 BM_ZFlat/3   538.3MB/s  jpg_200 (73.00 %)
 BM_ZFlat/4     8.3GB/s  pdf (83.30 %)
 BM_ZFlat/5   903.5MB/s  html4 (22.52 %)
 BM_ZFlat/6   336.0MB/s  txt1 (57.88 %)
 BM_ZFlat/7   312.3MB/s  txt2 (61.91 %)
 BM_ZFlat/8   353.1MB/s  txt3 (54.99 %)
 BM_ZFlat/9   289.9MB/s  txt4 (66.26 %)
 BM_ZFlat/10    1.2GB/s  pb (19.68 %)
 BM_ZFlat/11  527.4MB/s  gaviota (37.72 %)
--- a/vendor/github.com/golang/snappy/decode.go
+++ b/vendor/github.com/golang/snappy/decode.go
@ -0,0 +1,264 @@
 // Copyright 2011 The Snappy-Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package snappy
 import (
 	"encoding/binary"
 	"errors"
 	"io"
 )
 var (
 	// ErrCorrupt reports that the input is invalid.
 	ErrCorrupt = errors.New("snappy: corrupt input")
 	// ErrTooLarge reports that the uncompressed length is too large.
 	ErrTooLarge = errors.New("snappy: decoded block is too large")
 	// ErrUnsupported reports that the input isn't supported.
 	ErrUnsupported = errors.New("snappy: unsupported input")
 	errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
 )
 // DecodedLen returns the length of the decoded block.
 func DecodedLen(src []byte) (int, error) {
 	v, _, err := decodedLen(src)
 	return v, err
 }
 // decodedLen returns the length of the decoded block and the number of bytes
 // that the length header occupied.
 func decodedLen(src []byte) (blockLen, headerLen int, err error) {
 	v, n := binary.Uvarint(src)
 	if n <= 0 || v > 0xffffffff {
 		return 0, 0, ErrCorrupt
 	}
 	const wordSize = 32 << (^uint(0) >> 32 & 1)
 	if wordSize == 32 && v > 0x7fffffff {
 		return 0, 0, ErrTooLarge
 	}
 	return int(v), n, nil
 }
 const (
 	decodeErrCodeCorrupt                  = 1
 	decodeErrCodeUnsupportedLiteralLength = 2
 )
 // Decode returns the decoded form of src. The returned slice may be a sub-
 // slice of dst if dst was large enough to hold the entire decoded block.
 // Otherwise, a newly allocated slice will be returned.
 //
 // The dst and src must not overlap. It is valid to pass a nil dst.
 //
 // Decode handles the Snappy block format, not the Snappy stream format.
 func Decode(dst, src []byte) ([]byte, error) {
 	dLen, s, err := decodedLen(src)
 	if err != nil {
 		return nil, err
 	}
 	if dLen <= len(dst) {
 		dst = dst[:dLen]
 	} else {
 		dst = make([]byte, dLen)
 	}
 	switch decode(dst, src[s:]) {
 	case 0:
 		return dst, nil
 	case decodeErrCodeUnsupportedLiteralLength:
 		return nil, errUnsupportedLiteralLength
 	}
 	return nil, ErrCorrupt
 }
 // NewReader returns a new Reader that decompresses from r, using the framing
 // format described at
 // https://github.com/google/snappy/blob/master/framing_format.txt
 func NewReader(r io.Reader) *Reader {
 	return &Reader{
 		r:       r,
 		decoded: make([]byte, maxBlockSize),
 		buf:     make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize),
 	}
 }
 // Reader is an io.Reader that can read Snappy-compressed bytes.
 //
 // Reader handles the Snappy stream format, not the Snappy block format.
 type Reader struct {
 	r       io.Reader
 	err     error
 	decoded []byte
 	buf     []byte
 	// decoded[i:j] contains decoded bytes that have not yet been passed on.
 	i, j       int
 	readHeader bool
 }
 // Reset discards any buffered data, resets all state, and switches the Snappy
 // reader to read from r. This permits reusing a Reader rather than allocating
 // a new one.
 func (r *Reader) Reset(reader io.Reader) {
 	r.r = reader
 	r.err = nil
 	r.i = 0
 	r.j = 0
 	r.readHeader = false
 }
 func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
 	if _, r.err = io.ReadFull(r.r, p); r.err != nil {
 		if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
 			r.err = ErrCorrupt
 		}
 		return false
 	}
 	return true
 }
 func (r *Reader) fill() error {
 	for r.i >= r.j {
 		if !r.readFull(r.buf[:4], true) {
 			return r.err
 		}
 		chunkType := r.buf[0]
 		if !r.readHeader {
 			if chunkType != chunkTypeStreamIdentifier {
 				r.err = ErrCorrupt
 				return r.err
 			}
 			r.readHeader = true
 		}
 		chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
 		if chunkLen > len(r.buf) {
 			r.err = ErrUnsupported
 			return r.err
 		}
 		// The chunk types are specified at
 		// https://github.com/google/snappy/blob/master/framing_format.txt
 		switch chunkType {
 		case chunkTypeCompressedData:
 			// Section 4.2. Compressed data (chunk type 0x00).
 			if chunkLen < checksumSize {
 				r.err = ErrCorrupt
 				return r.err
 			}
 			buf := r.buf[:chunkLen]
 			if !r.readFull(buf, false) {
 				return r.err
 			}
 			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
 			buf = buf[checksumSize:]
 			n, err := DecodedLen(buf)
 			if err != nil {
 				r.err = err
 				return r.err
 			}
 			if n > len(r.decoded) {
 				r.err = ErrCorrupt
 				return r.err
 			}
 			if _, err := Decode(r.decoded, buf); err != nil {
 				r.err = err
 				return r.err
 			}
 			if crc(r.decoded[:n]) != checksum {
 				r.err = ErrCorrupt
 				return r.err
 			}
 			r.i, r.j = 0, n
 			continue
 		case chunkTypeUncompressedData:
 			// Section 4.3. Uncompressed data (chunk type 0x01).
 			if chunkLen < checksumSize {
 				r.err = ErrCorrupt
 				return r.err
 			}
 			buf := r.buf[:checksumSize]
 			if !r.readFull(buf, false) {
 				return r.err
 			}
 			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
 			// Read directly into r.decoded instead of via r.buf.
 			n := chunkLen - checksumSize
 			if n > len(r.decoded) {
 				r.err = ErrCorrupt
 				return r.err
 			}
 			if !r.readFull(r.decoded[:n], false) {
 				return r.err
 			}
 			if crc(r.decoded[:n]) != checksum {
 				r.err = ErrCorrupt
 				return r.err
 			}
 			r.i, r.j = 0, n
 			continue
 		case chunkTypeStreamIdentifier:
 			// Section 4.1. Stream identifier (chunk type 0xff).
 			if chunkLen != len(magicBody) {
 				r.err = ErrCorrupt
 				return r.err
 			}
 			if !r.readFull(r.buf[:len(magicBody)], false) {
 				return r.err
 			}
 			for i := 0; i < len(magicBody); i++ {
 				if r.buf[i] != magicBody[i] {
 					r.err = ErrCorrupt
 					return r.err
 				}
 			}
 			continue
 		}
 		if chunkType <= 0x7f {
 			// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
 			r.err = ErrUnsupported
 			return r.err
 		}
 		// Section 4.4 Padding (chunk type 0xfe).
 		// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
 		if !r.readFull(r.buf[:chunkLen], false) {
 			return r.err
 		}
 	}
 	return nil
 }
 // Read satisfies the io.Reader interface.
 func (r *Reader) Read(p []byte) (int, error) {
 	if r.err != nil {
 		return 0, r.err
 	}
 	if err := r.fill(); err != nil {
 		return 0, err
 	}
 	n := copy(p, r.decoded[r.i:r.j])
 	r.i += n
 	return n, nil
 }
 // ReadByte satisfies the io.ByteReader interface.
 func (r *Reader) ReadByte() (byte, error) {
 	if r.err != nil {
 		return 0, r.err
 	}
 	if err := r.fill(); err != nil {
 		return 0, err
 	}
 	c := r.decoded[r.i]
 	r.i++
 	return c, nil
 }
--- a/vendor/github.com/golang/snappy/decode_amd64.s
+++ b/vendor/github.com/golang/snappy/decode_amd64.s
@ -0,0 +1,490 @@
 // Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !appengine
 // +build gc
 // +build !noasm
 #include "textflag.h"
 // The asm code generally follows the pure Go code in decode_other.go, except
 // where marked with a "!!!".
 // func decode(dst, src []byte) int
 //
 // All local variables fit into registers. The non-zero stack size is only to
 // spill registers and push args when issuing a CALL. The register allocation:
 //	- AX	scratch
 //	- BX	scratch
 //	- CX	length or x
 //	- DX	offset
 //	- SI	&src[s]
 //	- DI	&dst[d]
 //	+ R8	dst_base
 //	+ R9	dst_len
 //	+ R10	dst_base + dst_len
 //	+ R11	src_base
 //	+ R12	src_len
 //	+ R13	src_base + src_len
 //	- R14	used by doCopy
 //	- R15	used by doCopy
 //
 // The registers R8-R13 (marked with a "+") are set at the start of the
 // function, and after a CALL returns, and are not otherwise modified.
 //
 // The d variable is implicitly DI - R8,  and len(dst)-d is R10 - DI.
 // The s variable is implicitly SI - R11, and len(src)-s is R13 - SI.
 TEXT ·decode(SB), NOSPLIT, $48-56
 	// Initialize SI, DI and R8-R13.
 	MOVQ dst_base+0(FP), R8
 	MOVQ dst_len+8(FP), R9
 	MOVQ R8, DI
 	MOVQ R8, R10
 	ADDQ R9, R10
 	MOVQ src_base+24(FP), R11
 	MOVQ src_len+32(FP), R12
 	MOVQ R11, SI
 	MOVQ R11, R13
 	ADDQ R12, R13
 loop:
 	// for s < len(src)
 	CMPQ SI, R13
 	JEQ  end
 	// CX = uint32(src[s])
 	//
 	// switch src[s] & 0x03
 	MOVBLZX (SI), CX
 	MOVL    CX, BX
 	ANDL    $3, BX
 	CMPL    BX, $1
 	JAE     tagCopy
 	// ----------------------------------------
 	// The code below handles literal tags.
 	// case tagLiteral:
 	// x := uint32(src[s] >> 2)
 	// switch
 	SHRL $2, CX
 	CMPL CX, $60
 	JAE  tagLit60Plus
 	// case x < 60:
 	// s++
 	INCQ SI
 doLit:
 	// This is the end of the inner "switch", when we have a literal tag.
 	//
 	// We assume that CX == x and x fits in a uint32, where x is the variable
 	// used in the pure Go decode_other.go code.
 	// length = int(x) + 1
 	//
 	// Unlike the pure Go code, we don't need to check if length <= 0 because
 	// CX can hold 64 bits, so the increment cannot overflow.
 	INCQ CX
 	// Prepare to check if copying length bytes will run past the end of dst or
 	// src.
 	//
 	// AX = len(dst) - d
 	// BX = len(src) - s
 	MOVQ R10, AX
 	SUBQ DI, AX
 	MOVQ R13, BX
 	SUBQ SI, BX
 	// !!! Try a faster technique for short (16 or fewer bytes) copies.
 	//
 	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
 	//   goto callMemmove // Fall back on calling runtime·memmove.
 	// }
 	//
 	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
 	// against 21 instead of 16, because it cannot assume that all of its input
 	// is contiguous in memory and so it needs to leave enough source bytes to
 	// read the next tag without refilling buffers, but Go's Decode assumes
 	// contiguousness (the src argument is a []byte).
 	CMPQ CX, $16
 	JGT  callMemmove
 	CMPQ AX, $16
 	JLT  callMemmove
 	CMPQ BX, $16
 	JLT  callMemmove
 	// !!! Implement the copy from src to dst as a 16-byte load and store.
 	// (Decode's documentation says that dst and src must not overlap.)
 	//
 	// This always copies 16 bytes, instead of only length bytes, but that's
 	// OK. If the input is a valid Snappy encoding then subsequent iterations
 	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
 	// non-nil error), so the overrun will be ignored.
 	//
 	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
 	// 16-byte loads and stores. This technique probably wouldn't be as
 	// effective on architectures that are fussier about alignment.
 	MOVOU 0(SI), X0
 	MOVOU X0, 0(DI)
 	// d += length
 	// s += length
 	ADDQ CX, DI
 	ADDQ CX, SI
 	JMP  loop
 callMemmove:
 	// if length > len(dst)-d || length > len(src)-s { etc }
 	CMPQ CX, AX
 	JGT  errCorrupt
 	CMPQ CX, BX
 	JGT  errCorrupt
 	// copy(dst[d:], src[s:s+length])
 	//
 	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
 	// DI, SI and CX as arguments. Coincidentally, we also need to spill those
 	// three registers to the stack, to save local variables across the CALL.
 	MOVQ DI, 0(SP)
 	MOVQ SI, 8(SP)
 	MOVQ CX, 16(SP)
 	MOVQ DI, 24(SP)
 	MOVQ SI, 32(SP)
 	MOVQ CX, 40(SP)
 	CALL runtime·memmove(SB)
 	// Restore local variables: unspill registers from the stack and
 	// re-calculate R8-R13.
 	MOVQ 24(SP), DI
 	MOVQ 32(SP), SI
 	MOVQ 40(SP), CX
 	MOVQ dst_base+0(FP), R8
 	MOVQ dst_len+8(FP), R9
 	MOVQ R8, R10
 	ADDQ R9, R10
 	MOVQ src_base+24(FP), R11
 	MOVQ src_len+32(FP), R12
 	MOVQ R11, R13
 	ADDQ R12, R13
 	// d += length
 	// s += length
 	ADDQ CX, DI
 	ADDQ CX, SI
 	JMP  loop
 tagLit60Plus:
 	// !!! This fragment does the
 	//
 	// s += x - 58; if uint(s) > uint(len(src)) { etc }
 	//
 	// checks. In the asm version, we code it once instead of once per switch case.
 	ADDQ CX, SI
 	SUBQ $58, SI
 	MOVQ SI, BX
 	SUBQ R11, BX
 	CMPQ BX, R12
 	JA   errCorrupt
 	// case x == 60:
 	CMPL CX, $61
 	JEQ  tagLit61
 	JA   tagLit62Plus
 	// x = uint32(src[s-1])
 	MOVBLZX -1(SI), CX
 	JMP     doLit
 tagLit61:
 	// case x == 61:
 	// x = uint32(src[s-2]) | uint32(src[s-1])<<8
 	MOVWLZX -2(SI), CX
 	JMP     doLit
 tagLit62Plus:
 	CMPL CX, $62
 	JA   tagLit63
 	// case x == 62:
 	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
 	MOVWLZX -3(SI), CX
 	MOVBLZX -1(SI), BX
 	SHLL    $16, BX
 	ORL     BX, CX
 	JMP     doLit
 tagLit63:
 	// case x == 63:
 	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
 	MOVL -4(SI), CX
 	JMP  doLit
 // The code above handles literal tags.
 // ----------------------------------------
 // The code below handles copy tags.
 tagCopy4:
 	// case tagCopy4:
 	// s += 5
 	ADDQ $5, SI
 	// if uint(s) > uint(len(src)) { etc }
 	MOVQ SI, BX
 	SUBQ R11, BX
 	CMPQ BX, R12
 	JA   errCorrupt
 	// length = 1 + int(src[s-5])>>2
 	SHRQ $2, CX
 	INCQ CX
 	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
 	MOVLQZX -4(SI), DX
 	JMP     doCopy
 tagCopy2:
 	// case tagCopy2:
 	// s += 3
 	ADDQ $3, SI
 	// if uint(s) > uint(len(src)) { etc }
 	MOVQ SI, BX
 	SUBQ R11, BX
 	CMPQ BX, R12
 	JA   errCorrupt
 	// length = 1 + int(src[s-3])>>2
 	SHRQ $2, CX
 	INCQ CX
 	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
 	MOVWQZX -2(SI), DX
 	JMP     doCopy
 tagCopy:
 	// We have a copy tag. We assume that:
 	//	- BX == src[s] & 0x03
 	//	- CX == src[s]
 	CMPQ BX, $2
 	JEQ  tagCopy2
 	JA   tagCopy4
 	// case tagCopy1:
 	// s += 2
 	ADDQ $2, SI
 	// if uint(s) > uint(len(src)) { etc }
 	MOVQ SI, BX
 	SUBQ R11, BX
 	CMPQ BX, R12
 	JA   errCorrupt
 	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
 	MOVQ    CX, DX
 	ANDQ    $0xe0, DX
 	SHLQ    $3, DX
 	MOVBQZX -1(SI), BX
 	ORQ     BX, DX
 	// length = 4 + int(src[s-2])>>2&0x7
 	SHRQ $2, CX
 	ANDQ $7, CX
 	ADDQ $4, CX
 doCopy:
 	// This is the end of the outer "switch", when we have a copy tag.
 	//
 	// We assume that:
 	//	- CX == length && CX > 0
 	//	- DX == offset
 	// if offset <= 0 { etc }
 	CMPQ DX, $0
 	JLE  errCorrupt
 	// if d < offset { etc }
 	MOVQ DI, BX
 	SUBQ R8, BX
 	CMPQ BX, DX
 	JLT  errCorrupt
 	// if length > len(dst)-d { etc }
 	MOVQ R10, BX
 	SUBQ DI, BX
 	CMPQ CX, BX
 	JGT  errCorrupt
 	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
 	//
 	// Set:
 	//	- R14 = len(dst)-d
 	//	- R15 = &dst[d-offset]
 	MOVQ R10, R14
 	SUBQ DI, R14
 	MOVQ DI, R15
 	SUBQ DX, R15
 	// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
 	//
 	// First, try using two 8-byte load/stores, similar to the doLit technique
 	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
 	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
 	// and not one 16-byte load/store, and the first store has to be before the
 	// second load, due to the overlap if offset is in the range [8, 16).
 	//
 	// if length > 16 || offset < 8 || len(dst)-d < 16 {
 	//   goto slowForwardCopy
 	// }
 	// copy 16 bytes
 	// d += length
 	CMPQ CX, $16
 	JGT  slowForwardCopy
 	CMPQ DX, $8
 	JLT  slowForwardCopy
 	CMPQ R14, $16
 	JLT  slowForwardCopy
 	MOVQ 0(R15), AX
 	MOVQ AX, 0(DI)
 	MOVQ 8(R15), BX
 	MOVQ BX, 8(DI)
 	ADDQ CX, DI
 	JMP  loop
 slowForwardCopy:
 	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
 	// can still try 8-byte load stores, provided we can overrun up to 10 extra
 	// bytes. As above, the overrun will be fixed up by subsequent iterations
 	// of the outermost loop.
 	//
 	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
 	// commentary says:
 	//
 	// ----
 	//
 	// The main part of this loop is a simple copy of eight bytes at a time
 	// until we've copied (at least) the requested amount of bytes.  However,
 	// if d and d-offset are less than eight bytes apart (indicating a
 	// repeating pattern of length < 8), we first need to expand the pattern in
 	// order to get the correct results. For instance, if the buffer looks like
 	// this, with the eight-byte <d-offset> and <d> patterns marked as
 	// intervals:
 	//
 	//    abxxxxxxxxxxxx
 	//    [------]           d-offset
 	//      [------]         d
 	//
 	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
 	// once, after which we can move <d> two bytes without moving <d-offset>:
 	//
 	//    ababxxxxxxxxxx
 	//    [------]           d-offset
 	//        [------]       d
 	//
 	// and repeat the exercise until the two no longer overlap.
 	//
 	// This allows us to do very well in the special case of one single byte
 	// repeated many times, without taking a big hit for more general cases.
 	//
 	// The worst case of extra writing past the end of the match occurs when
 	// offset == 1 and length == 1; the last copy will read from byte positions
 	// [0..7] and write to [4..11], whereas it was only supposed to write to
 	// position 1. Thus, ten excess bytes.
 	//
 	// ----
 	//
 	// That "10 byte overrun" worst case is confirmed by Go's
 	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
 	// and finishSlowForwardCopy algorithm.
 	//
 	// if length > len(dst)-d-10 {
 	//   goto verySlowForwardCopy
 	// }
 	SUBQ $10, R14
 	CMPQ CX, R14
 	JGT  verySlowForwardCopy
 makeOffsetAtLeast8:
 	// !!! As above, expand the pattern so that offset >= 8 and we can use
 	// 8-byte load/stores.
 	//
 	// for offset < 8 {
 	//   copy 8 bytes from dst[d-offset:] to dst[d:]
 	//   length -= offset
 	//   d      += offset
 	//   offset += offset
 	//   // The two previous lines together means that d-offset, and therefore
 	//   // R15, is unchanged.
 	// }
 	CMPQ DX, $8
 	JGE  fixUpSlowForwardCopy
 	MOVQ (R15), BX
 	MOVQ BX, (DI)
 	SUBQ DX, CX
 	ADDQ DX, DI
 	ADDQ DX, DX
 	JMP  makeOffsetAtLeast8
 fixUpSlowForwardCopy:
 	// !!! Add length (which might be negative now) to d (implied by DI being
 	// &dst[d]) so that d ends up at the right place when we jump back to the
 	// top of the loop. Before we do that, though, we save DI to AX so that, if
 	// length is positive, copying the remaining length bytes will write to the
 	// right place.
 	MOVQ DI, AX
 	ADDQ CX, DI
 finishSlowForwardCopy:
 	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
 	// length means that we overrun, but as above, that will be fixed up by
 	// subsequent iterations of the outermost loop.
 	CMPQ CX, $0
 	JLE  loop
 	MOVQ (R15), BX
 	MOVQ BX, (AX)
 	ADDQ $8, R15
 	ADDQ $8, AX
 	SUBQ $8, CX
 	JMP  finishSlowForwardCopy
 verySlowForwardCopy:
 	// verySlowForwardCopy is a simple implementation of forward copy. In C
 	// parlance, this is a do/while loop instead of a while loop, since we know
 	// that length > 0. In Go syntax:
 	//
 	// for {
 	//   dst[d] = dst[d - offset]
 	//   d++
 	//   length--
 	//   if length == 0 {
 	//     break
 	//   }
 	// }
 	MOVB (R15), BX
 	MOVB BX, (DI)
 	INCQ R15
 	INCQ DI
 	DECQ CX
 	JNZ  verySlowForwardCopy
 	JMP  loop
 // The code above handles copy tags.
 // ----------------------------------------
 end:
 	// This is the end of the "for s < len(src)".
 	//
 	// if d != len(dst) { etc }
 	CMPQ DI, R10
 	JNE  errCorrupt
 	// return 0
 	MOVQ $0, ret+48(FP)
 	RET
 errCorrupt:
 	// return decodeErrCodeCorrupt
 	MOVQ $1, ret+48(FP)
 	RET
--- a/vendor/github.com/golang/snappy/decode_arm64.s
+++ b/vendor/github.com/golang/snappy/decode_arm64.s
@ -0,0 +1,494 @@
 // Copyright 2020 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !appengine
 // +build gc
 // +build !noasm
 #include "textflag.h"
 // The asm code generally follows the pure Go code in decode_other.go, except
 // where marked with a "!!!".
 // func decode(dst, src []byte) int
 //
 // All local variables fit into registers. The non-zero stack size is only to
 // spill registers and push args when issuing a CALL. The register allocation:
 //	- R2	scratch
 //	- R3	scratch
 //	- R4	length or x
 //	- R5	offset
 //	- R6	&src[s]
 //	- R7	&dst[d]
 //	+ R8	dst_base
 //	+ R9	dst_len
 //	+ R10	dst_base + dst_len
 //	+ R11	src_base
 //	+ R12	src_len
 //	+ R13	src_base + src_len
 //	- R14	used by doCopy
 //	- R15	used by doCopy
 //
 // The registers R8-R13 (marked with a "+") are set at the start of the
 // function, and after a CALL returns, and are not otherwise modified.
 //
 // The d variable is implicitly R7 - R8,  and len(dst)-d is R10 - R7.
 // The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6.
 TEXT ·decode(SB), NOSPLIT, $56-56
 	// Initialize R6, R7 and R8-R13.
 	MOVD dst_base+0(FP), R8
 	MOVD dst_len+8(FP), R9
 	MOVD R8, R7
 	MOVD R8, R10
 	ADD  R9, R10, R10
 	MOVD src_base+24(FP), R11
 	MOVD src_len+32(FP), R12
 	MOVD R11, R6
 	MOVD R11, R13
 	ADD  R12, R13, R13
 loop:
 	// for s < len(src)
 	CMP R13, R6
 	BEQ end
 	// R4 = uint32(src[s])
 	//
 	// switch src[s] & 0x03
 	MOVBU (R6), R4
 	MOVW  R4, R3
 	ANDW  $3, R3
 	MOVW  $1, R1
 	CMPW  R1, R3
 	BGE   tagCopy
 	// ----------------------------------------
 	// The code below handles literal tags.
 	// case tagLiteral:
 	// x := uint32(src[s] >> 2)
 	// switch
 	MOVW $60, R1
 	LSRW $2, R4, R4
 	CMPW R4, R1
 	BLS  tagLit60Plus
 	// case x < 60:
 	// s++
 	ADD $1, R6, R6
 doLit:
 	// This is the end of the inner "switch", when we have a literal tag.
 	//
 	// We assume that R4 == x and x fits in a uint32, where x is the variable
 	// used in the pure Go decode_other.go code.
 	// length = int(x) + 1
 	//
 	// Unlike the pure Go code, we don't need to check if length <= 0 because
 	// R4 can hold 64 bits, so the increment cannot overflow.
 	ADD $1, R4, R4
 	// Prepare to check if copying length bytes will run past the end of dst or
 	// src.
 	//
 	// R2 = len(dst) - d
 	// R3 = len(src) - s
 	MOVD R10, R2
 	SUB  R7, R2, R2
 	MOVD R13, R3
 	SUB  R6, R3, R3
 	// !!! Try a faster technique for short (16 or fewer bytes) copies.
 	//
 	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
 	//   goto callMemmove // Fall back on calling runtime·memmove.
 	// }
 	//
 	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
 	// against 21 instead of 16, because it cannot assume that all of its input
 	// is contiguous in memory and so it needs to leave enough source bytes to
 	// read the next tag without refilling buffers, but Go's Decode assumes
 	// contiguousness (the src argument is a []byte).
 	CMP $16, R4
 	BGT callMemmove
 	CMP $16, R2
 	BLT callMemmove
 	CMP $16, R3
 	BLT callMemmove
 	// !!! Implement the copy from src to dst as a 16-byte load and store.
 	// (Decode's documentation says that dst and src must not overlap.)
 	//
 	// This always copies 16 bytes, instead of only length bytes, but that's
 	// OK. If the input is a valid Snappy encoding then subsequent iterations
 	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
 	// non-nil error), so the overrun will be ignored.
 	//
 	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
 	// 16-byte loads and stores. This technique probably wouldn't be as
 	// effective on architectures that are fussier about alignment.
 	LDP 0(R6), (R14, R15)
 	STP (R14, R15), 0(R7)
 	// d += length
 	// s += length
 	ADD R4, R7, R7
 	ADD R4, R6, R6
 	B   loop
 callMemmove:
 	// if length > len(dst)-d || length > len(src)-s { etc }
 	CMP R2, R4
 	BGT errCorrupt
 	CMP R3, R4
 	BGT errCorrupt
 	// copy(dst[d:], src[s:s+length])
 	//
 	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
 	// R7, R6 and R4 as arguments. Coincidentally, we also need to spill those
 	// three registers to the stack, to save local variables across the CALL.
 	MOVD R7, 8(RSP)
 	MOVD R6, 16(RSP)
 	MOVD R4, 24(RSP)
 	MOVD R7, 32(RSP)
 	MOVD R6, 40(RSP)
 	MOVD R4, 48(RSP)
 	CALL runtime·memmove(SB)
 	// Restore local variables: unspill registers from the stack and
 	// re-calculate R8-R13.
 	MOVD 32(RSP), R7
 	MOVD 40(RSP), R6
 	MOVD 48(RSP), R4
 	MOVD dst_base+0(FP), R8
 	MOVD dst_len+8(FP), R9
 	MOVD R8, R10
 	ADD  R9, R10, R10
 	MOVD src_base+24(FP), R11
 	MOVD src_len+32(FP), R12
 	MOVD R11, R13
 	ADD  R12, R13, R13
 	// d += length
 	// s += length
 	ADD R4, R7, R7
 	ADD R4, R6, R6
 	B   loop
 tagLit60Plus:
 	// !!! This fragment does the
 	//
 	// s += x - 58; if uint(s) > uint(len(src)) { etc }
 	//
 	// checks. In the asm version, we code it once instead of once per switch case.
 	ADD  R4, R6, R6
 	SUB  $58, R6, R6
 	MOVD R6, R3
 	SUB  R11, R3, R3
 	CMP  R12, R3
 	BGT  errCorrupt
 	// case x == 60:
 	MOVW $61, R1
 	CMPW R1, R4
 	BEQ  tagLit61
 	BGT  tagLit62Plus
 	// x = uint32(src[s-1])
 	MOVBU -1(R6), R4
 	B     doLit
 tagLit61:
 	// case x == 61:
 	// x = uint32(src[s-2]) | uint32(src[s-1])<<8
 	MOVHU -2(R6), R4
 	B     doLit
 tagLit62Plus:
 	CMPW $62, R4
 	BHI  tagLit63
 	// case x == 62:
 	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
 	MOVHU -3(R6), R4
 	MOVBU -1(R6), R3
 	ORR   R3<<16, R4
 	B     doLit
 tagLit63:
 	// case x == 63:
 	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
 	MOVWU -4(R6), R4
 	B     doLit
 	// The code above handles literal tags.
 	// ----------------------------------------
 	// The code below handles copy tags.
 tagCopy4:
 	// case tagCopy4:
 	// s += 5
 	ADD $5, R6, R6
 	// if uint(s) > uint(len(src)) { etc }
 	MOVD R6, R3
 	SUB  R11, R3, R3
 	CMP  R12, R3
 	BGT  errCorrupt
 	// length = 1 + int(src[s-5])>>2
 	MOVD $1, R1
 	ADD  R4>>2, R1, R4
 	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
 	MOVWU -4(R6), R5
 	B     doCopy
 tagCopy2:
 	// case tagCopy2:
 	// s += 3
 	ADD $3, R6, R6
 	// if uint(s) > uint(len(src)) { etc }
 	MOVD R6, R3
 	SUB  R11, R3, R3
 	CMP  R12, R3
 	BGT  errCorrupt
 	// length = 1 + int(src[s-3])>>2
 	MOVD $1, R1
 	ADD  R4>>2, R1, R4
 	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
 	MOVHU -2(R6), R5
 	B     doCopy
 tagCopy:
 	// We have a copy tag. We assume that:
 	//	- R3 == src[s] & 0x03
 	//	- R4 == src[s]
 	CMP $2, R3
 	BEQ tagCopy2
 	BGT tagCopy4
 	// case tagCopy1:
 	// s += 2
 	ADD $2, R6, R6
 	// if uint(s) > uint(len(src)) { etc }
 	MOVD R6, R3
 	SUB  R11, R3, R3
 	CMP  R12, R3
 	BGT  errCorrupt
 	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
 	MOVD  R4, R5
 	AND   $0xe0, R5
 	MOVBU -1(R6), R3
 	ORR   R5<<3, R3, R5
 	// length = 4 + int(src[s-2])>>2&0x7
 	MOVD $7, R1
 	AND  R4>>2, R1, R4
 	ADD  $4, R4, R4
 doCopy:
 	// This is the end of the outer "switch", when we have a copy tag.
 	//
 	// We assume that:
 	//	- R4 == length && R4 > 0
 	//	- R5 == offset
 	// if offset <= 0 { etc }
 	MOVD $0, R1
 	CMP  R1, R5
 	BLE  errCorrupt
 	// if d < offset { etc }
 	MOVD R7, R3
 	SUB  R8, R3, R3
 	CMP  R5, R3
 	BLT  errCorrupt
 	// if length > len(dst)-d { etc }
 	MOVD R10, R3
 	SUB  R7, R3, R3
 	CMP  R3, R4
 	BGT  errCorrupt
 	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
 	//
 	// Set:
 	//	- R14 = len(dst)-d
 	//	- R15 = &dst[d-offset]
 	MOVD R10, R14
 	SUB  R7, R14, R14
 	MOVD R7, R15
 	SUB  R5, R15, R15
 	// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
 	//
 	// First, try using two 8-byte load/stores, similar to the doLit technique
 	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
 	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
 	// and not one 16-byte load/store, and the first store has to be before the
 	// second load, due to the overlap if offset is in the range [8, 16).
 	//
 	// if length > 16 || offset < 8 || len(dst)-d < 16 {
 	//   goto slowForwardCopy
 	// }
 	// copy 16 bytes
 	// d += length
 	CMP  $16, R4
 	BGT  slowForwardCopy
 	CMP  $8, R5
 	BLT  slowForwardCopy
 	CMP  $16, R14
 	BLT  slowForwardCopy
 	MOVD 0(R15), R2
 	MOVD R2, 0(R7)
 	MOVD 8(R15), R3
 	MOVD R3, 8(R7)
 	ADD  R4, R7, R7
 	B    loop
 slowForwardCopy:
 	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
 	// can still try 8-byte load stores, provided we can overrun up to 10 extra
 	// bytes. As above, the overrun will be fixed up by subsequent iterations
 	// of the outermost loop.
 	//
 	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
 	// commentary says:
 	//
 	// ----
 	//
 	// The main part of this loop is a simple copy of eight bytes at a time
 	// until we've copied (at least) the requested amount of bytes.  However,
 	// if d and d-offset are less than eight bytes apart (indicating a
 	// repeating pattern of length < 8), we first need to expand the pattern in
 	// order to get the correct results. For instance, if the buffer looks like
 	// this, with the eight-byte <d-offset> and <d> patterns marked as
 	// intervals:
 	//
 	//    abxxxxxxxxxxxx
 	//    [------]           d-offset
 	//      [------]         d
 	//
 	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
 	// once, after which we can move <d> two bytes without moving <d-offset>:
 	//
 	//    ababxxxxxxxxxx
 	//    [------]           d-offset
 	//        [------]       d
 	//
 	// and repeat the exercise until the two no longer overlap.
 	//
 	// This allows us to do very well in the special case of one single byte
 	// repeated many times, without taking a big hit for more general cases.
 	//
 	// The worst case of extra writing past the end of the match occurs when
 	// offset == 1 and length == 1; the last copy will read from byte positions
 	// [0..7] and write to [4..11], whereas it was only supposed to write to
 	// position 1. Thus, ten excess bytes.
 	//
 	// ----
 	//
 	// That "10 byte overrun" worst case is confirmed by Go's
 	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
 	// and finishSlowForwardCopy algorithm.
 	//
 	// if length > len(dst)-d-10 {
 	//   goto verySlowForwardCopy
 	// }
 	SUB $10, R14, R14
 	CMP R14, R4
 	BGT verySlowForwardCopy
 makeOffsetAtLeast8:
 	// !!! As above, expand the pattern so that offset >= 8 and we can use
 	// 8-byte load/stores.
 	//
 	// for offset < 8 {
 	//   copy 8 bytes from dst[d-offset:] to dst[d:]
 	//   length -= offset
 	//   d      += offset
 	//   offset += offset
 	//   // The two previous lines together means that d-offset, and therefore
 	//   // R15, is unchanged.
 	// }
 	CMP  $8, R5
 	BGE  fixUpSlowForwardCopy
 	MOVD (R15), R3
 	MOVD R3, (R7)
 	SUB  R5, R4, R4
 	ADD  R5, R7, R7
 	ADD  R5, R5, R5
 	B    makeOffsetAtLeast8
 fixUpSlowForwardCopy:
 	// !!! Add length (which might be negative now) to d (implied by R7 being
 	// &dst[d]) so that d ends up at the right place when we jump back to the
 	// top of the loop. Before we do that, though, we save R7 to R2 so that, if
 	// length is positive, copying the remaining length bytes will write to the
 	// right place.
 	MOVD R7, R2
 	ADD  R4, R7, R7
 finishSlowForwardCopy:
 	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
 	// length means that we overrun, but as above, that will be fixed up by
 	// subsequent iterations of the outermost loop.
 	MOVD $0, R1
 	CMP  R1, R4
 	BLE  loop
 	MOVD (R15), R3
 	MOVD R3, (R2)
 	ADD  $8, R15, R15
 	ADD  $8, R2, R2
 	SUB  $8, R4, R4
 	B    finishSlowForwardCopy
 verySlowForwardCopy:
 	// verySlowForwardCopy is a simple implementation of forward copy. In C
 	// parlance, this is a do/while loop instead of a while loop, since we know
 	// that length > 0. In Go syntax:
 	//
 	// for {
 	//   dst[d] = dst[d - offset]
 	//   d++
 	//   length--
 	//   if length == 0 {
 	//     break
 	//   }
 	// }
 	MOVB (R15), R3
 	MOVB R3, (R7)
 	ADD  $1, R15, R15
 	ADD  $1, R7, R7
 	SUB  $1, R4, R4
 	CBNZ R4, verySlowForwardCopy
 	B    loop
 	// The code above handles copy tags.
 	// ----------------------------------------
 end:
 	// This is the end of the "for s < len(src)".
 	//
 	// if d != len(dst) { etc }
 	CMP R10, R7
 	BNE errCorrupt
 	// return 0
 	MOVD $0, ret+48(FP)
 	RET
 errCorrupt:
 	// return decodeErrCodeCorrupt
 	MOVD $1, R2
 	MOVD R2, ret+48(FP)
 	RET
--- a/vendor/github.com/golang/snappy/decode_asm.go
+++ b/vendor/github.com/golang/snappy/decode_asm.go
@ -0,0 +1,15 @@
 // Copyright 2016 The Snappy-Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !appengine
 // +build gc
 // +build !noasm
 // +build amd64 arm64
 package snappy
 // decode has the same semantics as in decode_other.go.
 //
 //go:noescape
 func decode(dst, src []byte) int
--- a/vendor/github.com/golang/snappy/decode_other.go
+++ b/vendor/github.com/golang/snappy/decode_other.go
@ -0,0 +1,115 @@
 // Copyright 2016 The Snappy-Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !amd64,!arm64 appengine !gc noasm
 package snappy
 // decode writes the decoding of src to dst. It assumes that the varint-encoded
 // length of the decompressed bytes has already been read, and that len(dst)
 // equals that length.
 //
 // It returns 0 on success or a decodeErrCodeXxx error code on failure.
 func decode(dst, src []byte) int {
 	var d, s, offset, length int
 	for s < len(src) {
 		switch src[s] & 0x03 {
 		case tagLiteral:
 			x := uint32(src[s] >> 2)
 			switch {
 			case x < 60:
 				s++
 			case x == 60:
 				s += 2
 				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
 					return decodeErrCodeCorrupt
 				}
 				x = uint32(src[s-1])
 			case x == 61:
 				s += 3
 				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
 					return decodeErrCodeCorrupt
 				}
 				x = uint32(src[s-2]) | uint32(src[s-1])<<8
 			case x == 62:
 				s += 4
 				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
 					return decodeErrCodeCorrupt
 				}
 				x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
 			case x == 63:
 				s += 5
 				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
 					return decodeErrCodeCorrupt
 				}
 				x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
 			}
 			length = int(x) + 1
 			if length <= 0 {
 				return decodeErrCodeUnsupportedLiteralLength
 			}
 			if length > len(dst)-d || length > len(src)-s {
 				return decodeErrCodeCorrupt
 			}
 			copy(dst[d:], src[s:s+length])
 			d += length
 			s += length
 			continue
 		case tagCopy1:
 			s += 2
 			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
 				return decodeErrCodeCorrupt
 			}
 			length = 4 + int(src[s-2])>>2&0x7
 			offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
 		case tagCopy2:
 			s += 3
 			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
 				return decodeErrCodeCorrupt
 			}
 			length = 1 + int(src[s-3])>>2
 			offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
 		case tagCopy4:
 			s += 5
 			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
 				return decodeErrCodeCorrupt
 			}
 			length = 1 + int(src[s-5])>>2
 			offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
 		}
 		if offset <= 0 || d < offset || length > len(dst)-d {
 			return decodeErrCodeCorrupt
 		}
 		// Copy from an earlier sub-slice of dst to a later sub-slice.
 		// If no overlap, use the built-in copy:
 		if offset >= length {
 			copy(dst[d:d+length], dst[d-offset:])
 			d += length
 			continue
 		}
 		// Unlike the built-in copy function, this byte-by-byte copy always runs
 		// forwards, even if the slices overlap. Conceptually, this is:
 		//
 		// d += forwardCopy(dst[d:d+length], dst[d-offset:])
 		//
 		// We align the slices into a and b and show the compiler they are the same size.
 		// This allows the loop to run without bounds checks.
 		a := dst[d : d+length]
 		b := dst[d-offset:]
 		b = b[:len(a)]
 		for i := range a {
 			a[i] = b[i]
 		}
 		d += length
 	}
 	if d != len(dst) {
 		return decodeErrCodeCorrupt
 	}
 	return 0
 }
--- a/vendor/github.com/golang/snappy/encode.go
+++ b/vendor/github.com/golang/snappy/encode.go
@ -0,0 +1,289 @@
 // Copyright 2011 The Snappy-Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package snappy
 import (
 	"encoding/binary"
 	"errors"
 	"io"
 )
 // Encode returns the encoded form of src. The returned slice may be a sub-
 // slice of dst if dst was large enough to hold the entire encoded block.
 // Otherwise, a newly allocated slice will be returned.
 //
 // The dst and src must not overlap. It is valid to pass a nil dst.
 //
 // Encode handles the Snappy block format, not the Snappy stream format.
 func Encode(dst, src []byte) []byte {
 	if n := MaxEncodedLen(len(src)); n < 0 {
 		panic(ErrTooLarge)
 	} else if len(dst) < n {
 		dst = make([]byte, n)
 	}
 	// The block starts with the varint-encoded length of the decompressed bytes.
 	d := binary.PutUvarint(dst, uint64(len(src)))
 	for len(src) > 0 {
 		p := src
 		src = nil
 		if len(p) > maxBlockSize {
 			p, src = p[:maxBlockSize], p[maxBlockSize:]
 		}
 		if len(p) < minNonLiteralBlockSize {
 			d += emitLiteral(dst[d:], p)
 		} else {
 			d += encodeBlock(dst[d:], p)
 		}
 	}
 	return dst[:d]
 }
 // inputMargin is the minimum number of extra input bytes to keep, inside
 // encodeBlock's inner loop. On some architectures, this margin lets us
 // implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
 // literals can be implemented as a single load to and store from a 16-byte
 // register. That literal's actual length can be as short as 1 byte, so this
 // can copy up to 15 bytes too much, but that's OK as subsequent iterations of
 // the encoding loop will fix up the copy overrun, and this inputMargin ensures
 // that we don't overrun the dst and src buffers.
 const inputMargin = 16 - 1
 // minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
 // could be encoded with a copy tag. This is the minimum with respect to the
 // algorithm used by encodeBlock, not a minimum enforced by the file format.
 //
 // The encoded output must start with at least a 1 byte literal, as there are
 // no previous bytes to copy. A minimal (1 byte) copy after that, generated
 // from an emitCopy call in encodeBlock's main loop, would require at least
 // another inputMargin bytes, for the reason above: we want any emitLiteral
 // calls inside encodeBlock's main loop to use the fast path if possible, which
 // requires being able to overrun by inputMargin bytes. Thus,
 // minNonLiteralBlockSize equals 1 + 1 + inputMargin.
 //
 // The C++ code doesn't use this exact threshold, but it could, as discussed at
 // https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
 // The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
 // optimization. It should not affect the encoded form. This is tested by
 // TestSameEncodingAsCppShortCopies.
 const minNonLiteralBlockSize = 1 + 1 + inputMargin
 // MaxEncodedLen returns the maximum length of a snappy block, given its
 // uncompressed length.
 //
 // It will return a negative value if srcLen is too large to encode.
 func MaxEncodedLen(srcLen int) int {
 	n := uint64(srcLen)
 	if n > 0xffffffff {
 		return -1
 	}
 	// Compressed data can be defined as:
 	//    compressed := item* literal*
 	//    item       := literal* copy
 	//
 	// The trailing literal sequence has a space blowup of at most 62/60
 	// since a literal of length 60 needs one tag byte + one extra byte
 	// for length information.
 	//
 	// Item blowup is trickier to measure. Suppose the "copy" op copies
 	// 4 bytes of data. Because of a special check in the encoding code,
 	// we produce a 4-byte copy only if the offset is < 65536. Therefore
 	// the copy op takes 3 bytes to encode, and this type of item leads
 	// to at most the 62/60 blowup for representing literals.
 	//
 	// Suppose the "copy" op copies 5 bytes of data. If the offset is big
 	// enough, it will take 5 bytes to encode the copy op. Therefore the
 	// worst case here is a one-byte literal followed by a five-byte copy.
 	// That is, 6 bytes of input turn into 7 bytes of "compressed" data.
 	//
 	// This last factor dominates the blowup, so the final estimate is:
 	n = 32 + n + n/6
 	if n > 0xffffffff {
 		return -1
 	}
 	return int(n)
 }
 var errClosed = errors.New("snappy: Writer is closed")
 // NewWriter returns a new Writer that compresses to w.
 //
 // The Writer returned does not buffer writes. There is no need to Flush or
 // Close such a Writer.
 //
 // Deprecated: the Writer returned is not suitable for many small writes, only
 // for few large writes. Use NewBufferedWriter instead, which is efficient
 // regardless of the frequency and shape of the writes, and remember to Close
 // that Writer when done.
 func NewWriter(w io.Writer) *Writer {
 	return &Writer{
 		w:    w,
 		obuf: make([]byte, obufLen),
 	}
 }
 // NewBufferedWriter returns a new Writer that compresses to w, using the
 // framing format described at
 // https://github.com/google/snappy/blob/master/framing_format.txt
 //
 // The Writer returned buffers writes. Users must call Close to guarantee all
 // data has been forwarded to the underlying io.Writer. They may also call
 // Flush zero or more times before calling Close.
 func NewBufferedWriter(w io.Writer) *Writer {
 	return &Writer{
 		w:    w,
 		ibuf: make([]byte, 0, maxBlockSize),
 		obuf: make([]byte, obufLen),
 	}
 }
 // Writer is an io.Writer that can write Snappy-compressed bytes.
 //
 // Writer handles the Snappy stream format, not the Snappy block format.
 type Writer struct {
 	w   io.Writer
 	err error
 	// ibuf is a buffer for the incoming (uncompressed) bytes.
 	//
 	// Its use is optional. For backwards compatibility, Writers created by the
 	// NewWriter function have ibuf == nil, do not buffer incoming bytes, and
 	// therefore do not need to be Flush'ed or Close'd.
 	ibuf []byte
 	// obuf is a buffer for the outgoing (compressed) bytes.
 	obuf []byte
 	// wroteStreamHeader is whether we have written the stream header.
 	wroteStreamHeader bool
 }
 // Reset discards the writer's state and switches the Snappy writer to write to
 // w. This permits reusing a Writer rather than allocating a new one.
 func (w *Writer) Reset(writer io.Writer) {
 	w.w = writer
 	w.err = nil
 	if w.ibuf != nil {
 		w.ibuf = w.ibuf[:0]
 	}
 	w.wroteStreamHeader = false
 }
 // Write satisfies the io.Writer interface.
 func (w *Writer) Write(p []byte) (nRet int, errRet error) {
 	if w.ibuf == nil {
 		// Do not buffer incoming bytes. This does not perform or compress well
 		// if the caller of Writer.Write writes many small slices. This
 		// behavior is therefore deprecated, but still supported for backwards
 		// compatibility with code that doesn't explicitly Flush or Close.
 		return w.write(p)
 	}
 	// The remainder of this method is based on bufio.Writer.Write from the
 	// standard library.
 	for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil {
 		var n int
 		if len(w.ibuf) == 0 {
 			// Large write, empty buffer.
 			// Write directly from p to avoid copy.
 			n, _ = w.write(p)
 		} else {
 			n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
 			w.ibuf = w.ibuf[:len(w.ibuf)+n]
 			w.Flush()
 		}
 		nRet += n
 		p = p[n:]
 	}
 	if w.err != nil {
 		return nRet, w.err
 	}
 	n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
 	w.ibuf = w.ibuf[:len(w.ibuf)+n]
 	nRet += n
 	return nRet, nil
 }
 func (w *Writer) write(p []byte) (nRet int, errRet error) {
 	if w.err != nil {
 		return 0, w.err
 	}
 	for len(p) > 0 {
 		obufStart := len(magicChunk)
 		if !w.wroteStreamHeader {
 			w.wroteStreamHeader = true
 			copy(w.obuf, magicChunk)
 			obufStart = 0
 		}
 		var uncompressed []byte
 		if len(p) > maxBlockSize {
 			uncompressed, p = p[:maxBlockSize], p[maxBlockSize:]
 		} else {
 			uncompressed, p = p, nil
 		}
 		checksum := crc(uncompressed)
 		// Compress the buffer, discarding the result if the improvement
 		// isn't at least 12.5%.
 		compressed := Encode(w.obuf[obufHeaderLen:], uncompressed)
 		chunkType := uint8(chunkTypeCompressedData)
 		chunkLen := 4 + len(compressed)
 		obufEnd := obufHeaderLen + len(compressed)
 		if len(compressed) >= len(uncompressed)-len(uncompressed)/8 {
 			chunkType = chunkTypeUncompressedData
 			chunkLen = 4 + len(uncompressed)
 			obufEnd = obufHeaderLen
 		}
 		// Fill in the per-chunk header that comes before the body.
 		w.obuf[len(magicChunk)+0] = chunkType
 		w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0)
 		w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8)
 		w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16)
 		w.obuf[len(magicChunk)+4] = uint8(checksum >> 0)
 		w.obuf[len(magicChunk)+5] = uint8(checksum >> 8)
 		w.obuf[len(magicChunk)+6] = uint8(checksum >> 16)
 		w.obuf[len(magicChunk)+7] = uint8(checksum >> 24)
 		if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil {
 			w.err = err
 			return nRet, err
 		}
 		if chunkType == chunkTypeUncompressedData {
 			if _, err := w.w.Write(uncompressed); err != nil {
 				w.err = err
 				return nRet, err
 			}
 		}
 		nRet += len(uncompressed)
 	}
 	return nRet, nil
 }
 // Flush flushes the Writer to its underlying io.Writer.
 func (w *Writer) Flush() error {
 	if w.err != nil {
 		return w.err
 	}
 	if len(w.ibuf) == 0 {
 		return nil
 	}
 	w.write(w.ibuf)
 	w.ibuf = w.ibuf[:0]
 	return w.err
 }
 // Close calls Flush and then closes the Writer.
 func (w *Writer) Close() error {
 	w.Flush()
 	ret := w.err
 	if w.err == nil {
 		w.err = errClosed
 	}
 	return ret
 }
--- a/vendor/github.com/golang/snappy/encode_amd64.s
+++ b/vendor/github.com/golang/snappy/encode_amd64.s
@ -0,0 +1,730 @@
 // Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !appengine
 // +build gc
 // +build !noasm
 #include "textflag.h"
 // The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
 // Go toolchain regression. See https://github.com/golang/go/issues/15426 and
 // https://github.com/golang/snappy/issues/29
 //
 // As a workaround, the package was built with a known good assembler, and
 // those instructions were disassembled by "objdump -d" to yield the
 //	4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
 // style comments, in AT&T asm syntax. Note that rsp here is a physical
 // register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
 // The instructions were then encoded as "BYTE $0x.." sequences, which assemble
 // fine on Go 1.6.
 // The asm code generally follows the pure Go code in encode_other.go, except
 // where marked with a "!!!".
 // ----------------------------------------------------------------------------
 // func emitLiteral(dst, lit []byte) int
 //
 // All local variables fit into registers. The register allocation:
 //	- AX	len(lit)
 //	- BX	n
 //	- DX	return value
 //	- DI	&dst[i]
 //	- R10	&lit[0]
 //
 // The 24 bytes of stack space is to call runtime·memmove.
 //
 // The unusual register allocation of local variables, such as R10 for the
 // source pointer, matches the allocation used at the call site in encodeBlock,
 // which makes it easier to manually inline this function.
 TEXT ·emitLiteral(SB), NOSPLIT, $24-56
 	MOVQ dst_base+0(FP), DI
 	MOVQ lit_base+24(FP), R10
 	MOVQ lit_len+32(FP), AX
 	MOVQ AX, DX
 	MOVL AX, BX
 	SUBL $1, BX
 	CMPL BX, $60
 	JLT  oneByte
 	CMPL BX, $256
 	JLT  twoBytes
 threeBytes:
 	MOVB $0xf4, 0(DI)
 	MOVW BX, 1(DI)
 	ADDQ $3, DI
 	ADDQ $3, DX
 	JMP  memmove
 twoBytes:
 	MOVB $0xf0, 0(DI)
 	MOVB BX, 1(DI)
 	ADDQ $2, DI
 	ADDQ $2, DX
 	JMP  memmove
 oneByte:
 	SHLB $2, BX
 	MOVB BX, 0(DI)
 	ADDQ $1, DI
 	ADDQ $1, DX
 memmove:
 	MOVQ DX, ret+48(FP)
 	// copy(dst[i:], lit)
 	//
 	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
 	// DI, R10 and AX as arguments.
 	MOVQ DI, 0(SP)
 	MOVQ R10, 8(SP)
 	MOVQ AX, 16(SP)
 	CALL runtime·memmove(SB)
 	RET
 // ----------------------------------------------------------------------------
 // func emitCopy(dst []byte, offset, length int) int
 //
 // All local variables fit into registers. The register allocation:
 //	- AX	length
 //	- SI	&dst[0]
 //	- DI	&dst[i]
 //	- R11	offset
 //
 // The unusual register allocation of local variables, such as R11 for the
 // offset, matches the allocation used at the call site in encodeBlock, which
 // makes it easier to manually inline this function.
 TEXT ·emitCopy(SB), NOSPLIT, $0-48
 	MOVQ dst_base+0(FP), DI
 	MOVQ DI, SI
 	MOVQ offset+24(FP), R11
 	MOVQ length+32(FP), AX
 loop0:
 	// for length >= 68 { etc }
 	CMPL AX, $68
 	JLT  step1
 	// Emit a length 64 copy, encoded as 3 bytes.
 	MOVB $0xfe, 0(DI)
 	MOVW R11, 1(DI)
 	ADDQ $3, DI
 	SUBL $64, AX
 	JMP  loop0
 step1:
 	// if length > 64 { etc }
 	CMPL AX, $64
 	JLE  step2
 	// Emit a length 60 copy, encoded as 3 bytes.
 	MOVB $0xee, 0(DI)
 	MOVW R11, 1(DI)
 	ADDQ $3, DI
 	SUBL $60, AX
 step2:
 	// if length >= 12 || offset >= 2048 { goto step3 }
 	CMPL AX, $12
 	JGE  step3
 	CMPL R11, $2048
 	JGE  step3
 	// Emit the remaining copy, encoded as 2 bytes.
 	MOVB R11, 1(DI)
 	SHRL $8, R11
 	SHLB $5, R11
 	SUBB $4, AX
 	SHLB $2, AX
 	ORB  AX, R11
 	ORB  $1, R11
 	MOVB R11, 0(DI)
 	ADDQ $2, DI
 	// Return the number of bytes written.
 	SUBQ SI, DI
 	MOVQ DI, ret+40(FP)
 	RET
 step3:
 	// Emit the remaining copy, encoded as 3 bytes.
 	SUBL $1, AX
 	SHLB $2, AX
 	ORB  $2, AX
 	MOVB AX, 0(DI)
 	MOVW R11, 1(DI)
 	ADDQ $3, DI
 	// Return the number of bytes written.
 	SUBQ SI, DI
 	MOVQ DI, ret+40(FP)
 	RET
 // ----------------------------------------------------------------------------
 // func extendMatch(src []byte, i, j int) int
 //
 // All local variables fit into registers. The register allocation:
 //	- DX	&src[0]
 //	- SI	&src[j]
 //	- R13	&src[len(src) - 8]
 //	- R14	&src[len(src)]
 //	- R15	&src[i]
 //
 // The unusual register allocation of local variables, such as R15 for a source
 // pointer, matches the allocation used at the call site in encodeBlock, which
 // makes it easier to manually inline this function.
 TEXT ·extendMatch(SB), NOSPLIT, $0-48
 	MOVQ src_base+0(FP), DX
 	MOVQ src_len+8(FP), R14
 	MOVQ i+24(FP), R15
 	MOVQ j+32(FP), SI
 	ADDQ DX, R14
 	ADDQ DX, R15
 	ADDQ DX, SI
 	MOVQ R14, R13
 	SUBQ $8, R13
 cmp8:
 	// As long as we are 8 or more bytes before the end of src, we can load and
 	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
 	CMPQ SI, R13
 	JA   cmp1
 	MOVQ (R15), AX
 	MOVQ (SI), BX
 	CMPQ AX, BX
 	JNE  bsf
 	ADDQ $8, R15
 	ADDQ $8, SI
 	JMP  cmp8
 bsf:
 	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
 	// the index of the first byte that differs. The BSF instruction finds the
 	// least significant 1 bit, the amd64 architecture is little-endian, and
 	// the shift by 3 converts a bit index to a byte index.
 	XORQ AX, BX
 	BSFQ BX, BX
 	SHRQ $3, BX
 	ADDQ BX, SI
 	// Convert from &src[ret] to ret.
 	SUBQ DX, SI
 	MOVQ SI, ret+40(FP)
 	RET
 cmp1:
 	// In src's tail, compare 1 byte at a time.
 	CMPQ SI, R14
 	JAE  extendMatchEnd
 	MOVB (R15), AX
 	MOVB (SI), BX
 	CMPB AX, BX
 	JNE  extendMatchEnd
 	ADDQ $1, R15
 	ADDQ $1, SI
 	JMP  cmp1
 extendMatchEnd:
 	// Convert from &src[ret] to ret.
 	SUBQ DX, SI
 	MOVQ SI, ret+40(FP)
 	RET
 // ----------------------------------------------------------------------------
 // func encodeBlock(dst, src []byte) (d int)
 //
 // All local variables fit into registers, other than "var table". The register
 // allocation:
 //	- AX	.	.
 //	- BX	.	.
 //	- CX	56	shift (note that amd64 shifts by non-immediates must use CX).
 //	- DX	64	&src[0], tableSize
 //	- SI	72	&src[s]
 //	- DI	80	&dst[d]
 //	- R9	88	sLimit
 //	- R10	.	&src[nextEmit]
 //	- R11	96	prevHash, currHash, nextHash, offset
 //	- R12	104	&src[base], skip
 //	- R13	.	&src[nextS], &src[len(src) - 8]
 //	- R14	.	len(src), bytesBetweenHashLookups, &src[len(src)], x
 //	- R15	112	candidate
 //
 // The second column (56, 64, etc) is the stack offset to spill the registers
 // when calling other functions. We could pack this slightly tighter, but it's
 // simpler to have a dedicated spill map independent of the function called.
 //
 // "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
 // extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
 // local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
 TEXT ·encodeBlock(SB), 0, $32888-56
 	MOVQ dst_base+0(FP), DI
 	MOVQ src_base+24(FP), SI
 	MOVQ src_len+32(FP), R14
 	// shift, tableSize := uint32(32-8), 1<<8
 	MOVQ $24, CX
 	MOVQ $256, DX
 calcShift:
 	// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
 	//	shift--
 	// }
 	CMPQ DX, $16384
 	JGE  varTable
 	CMPQ DX, R14
 	JGE  varTable
 	SUBQ $1, CX
 	SHLQ $1, DX
 	JMP  calcShift
 varTable:
 	// var table [maxTableSize]uint16
 	//
 	// In the asm code, unlike the Go code, we can zero-initialize only the
 	// first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
 	// writes 16 bytes, so we can do only tableSize/8 writes instead of the
 	// 2048 writes that would zero-initialize all of table's 32768 bytes.
 	SHRQ $3, DX
 	LEAQ table-32768(SP), BX
 	PXOR X0, X0
 memclr:
 	MOVOU X0, 0(BX)
 	ADDQ  $16, BX
 	SUBQ  $1, DX
 	JNZ   memclr
 	// !!! DX = &src[0]
 	MOVQ SI, DX
 	// sLimit := len(src) - inputMargin
 	MOVQ R14, R9
 	SUBQ $15, R9
 	// !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
 	// change for the rest of the function.
 	MOVQ CX, 56(SP)
 	MOVQ DX, 64(SP)
 	MOVQ R9, 88(SP)
 	// nextEmit := 0
 	MOVQ DX, R10
 	// s := 1
 	ADDQ $1, SI
 	// nextHash := hash(load32(src, s), shift)
 	MOVL  0(SI), R11
 	IMULL $0x1e35a7bd, R11
 	SHRL  CX, R11
 outer:
 	// for { etc }
 	// skip := 32
 	MOVQ $32, R12
 	// nextS := s
 	MOVQ SI, R13
 	// candidate := 0
 	MOVQ $0, R15
 inner0:
 	// for { etc }
 	// s := nextS
 	MOVQ R13, SI
 	// bytesBetweenHashLookups := skip >> 5
 	MOVQ R12, R14
 	SHRQ $5, R14
 	// nextS = s + bytesBetweenHashLookups
 	ADDQ R14, R13
 	// skip += bytesBetweenHashLookups
 	ADDQ R14, R12
 	// if nextS > sLimit { goto emitRemainder }
 	MOVQ R13, AX
 	SUBQ DX, AX
 	CMPQ AX, R9
 	JA   emitRemainder
 	// candidate = int(table[nextHash])
 	// XXX: MOVWQZX table-32768(SP)(R11*2), R15
 	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
 	BYTE $0x4e
 	BYTE $0x0f
 	BYTE $0xb7
 	BYTE $0x7c
 	BYTE $0x5c
 	BYTE $0x78
 	// table[nextHash] = uint16(s)
 	MOVQ SI, AX
 	SUBQ DX, AX
 	// XXX: MOVW AX, table-32768(SP)(R11*2)
 	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
 	BYTE $0x66
 	BYTE $0x42
 	BYTE $0x89
 	BYTE $0x44
 	BYTE $0x5c
 	BYTE $0x78
 	// nextHash = hash(load32(src, nextS), shift)
 	MOVL  0(R13), R11
 	IMULL $0x1e35a7bd, R11
 	SHRL  CX, R11
 	// if load32(src, s) != load32(src, candidate) { continue } break
 	MOVL 0(SI), AX
 	MOVL (DX)(R15*1), BX
 	CMPL AX, BX
 	JNE  inner0
 fourByteMatch:
 	// As per the encode_other.go code:
 	//
 	// A 4-byte match has been found. We'll later see etc.
 	// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
 	// on inputMargin in encode.go.
 	MOVQ SI, AX
 	SUBQ R10, AX
 	CMPQ AX, $16
 	JLE  emitLiteralFastPath
 	// ----------------------------------------
 	// Begin inline of the emitLiteral call.
 	//
 	// d += emitLiteral(dst[d:], src[nextEmit:s])
 	MOVL AX, BX
 	SUBL $1, BX
 	CMPL BX, $60
 	JLT  inlineEmitLiteralOneByte
 	CMPL BX, $256
 	JLT  inlineEmitLiteralTwoBytes
 inlineEmitLiteralThreeBytes:
 	MOVB $0xf4, 0(DI)
 	MOVW BX, 1(DI)
 	ADDQ $3, DI
 	JMP  inlineEmitLiteralMemmove
 inlineEmitLiteralTwoBytes:
 	MOVB $0xf0, 0(DI)
 	MOVB BX, 1(DI)
 	ADDQ $2, DI
 	JMP  inlineEmitLiteralMemmove
 inlineEmitLiteralOneByte:
 	SHLB $2, BX
 	MOVB BX, 0(DI)
 	ADDQ $1, DI
 inlineEmitLiteralMemmove:
 	// Spill local variables (registers) onto the stack; call; unspill.
 	//
 	// copy(dst[i:], lit)
 	//
 	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
 	// DI, R10 and AX as arguments.
 	MOVQ DI, 0(SP)
 	MOVQ R10, 8(SP)
 	MOVQ AX, 16(SP)
 	ADDQ AX, DI              // Finish the "d +=" part of "d += emitLiteral(etc)".
 	MOVQ SI, 72(SP)
 	MOVQ DI, 80(SP)
 	MOVQ R15, 112(SP)
 	CALL runtime·memmove(SB)
 	MOVQ 56(SP), CX
 	MOVQ 64(SP), DX
 	MOVQ 72(SP), SI
 	MOVQ 80(SP), DI
 	MOVQ 88(SP), R9
 	MOVQ 112(SP), R15
 	JMP  inner1
 inlineEmitLiteralEnd:
 	// End inline of the emitLiteral call.
 	// ----------------------------------------
 emitLiteralFastPath:
 	// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
 	MOVB AX, BX
 	SUBB $1, BX
 	SHLB $2, BX
 	MOVB BX, (DI)
 	ADDQ $1, DI
 	// !!! Implement the copy from lit to dst as a 16-byte load and store.
 	// (Encode's documentation says that dst and src must not overlap.)
 	//
 	// This always copies 16 bytes, instead of only len(lit) bytes, but that's
 	// OK. Subsequent iterations will fix up the overrun.
 	//
 	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
 	// 16-byte loads and stores. This technique probably wouldn't be as
 	// effective on architectures that are fussier about alignment.
 	MOVOU 0(R10), X0
 	MOVOU X0, 0(DI)
 	ADDQ  AX, DI
 inner1:
 	// for { etc }
 	// base := s
 	MOVQ SI, R12
 	// !!! offset := base - candidate
 	MOVQ R12, R11
 	SUBQ R15, R11
 	SUBQ DX, R11
 	// ----------------------------------------
 	// Begin inline of the extendMatch call.
 	//
 	// s = extendMatch(src, candidate+4, s+4)
 	// !!! R14 = &src[len(src)]
 	MOVQ src_len+32(FP), R14
 	ADDQ DX, R14
 	// !!! R13 = &src[len(src) - 8]
 	MOVQ R14, R13
 	SUBQ $8, R13
 	// !!! R15 = &src[candidate + 4]
 	ADDQ $4, R15
 	ADDQ DX, R15
 	// !!! s += 4
 	ADDQ $4, SI
 inlineExtendMatchCmp8:
 	// As long as we are 8 or more bytes before the end of src, we can load and
 	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
 	CMPQ SI, R13
 	JA   inlineExtendMatchCmp1
 	MOVQ (R15), AX
 	MOVQ (SI), BX
 	CMPQ AX, BX
 	JNE  inlineExtendMatchBSF
 	ADDQ $8, R15
 	ADDQ $8, SI
 	JMP  inlineExtendMatchCmp8
 inlineExtendMatchBSF:
 	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
 	// the index of the first byte that differs. The BSF instruction finds the
 	// least significant 1 bit, the amd64 architecture is little-endian, and
 	// the shift by 3 converts a bit index to a byte index.
 	XORQ AX, BX
 	BSFQ BX, BX
 	SHRQ $3, BX
 	ADDQ BX, SI
 	JMP  inlineExtendMatchEnd
 inlineExtendMatchCmp1:
 	// In src's tail, compare 1 byte at a time.
 	CMPQ SI, R14
 	JAE  inlineExtendMatchEnd
 	MOVB (R15), AX
 	MOVB (SI), BX
 	CMPB AX, BX
 	JNE  inlineExtendMatchEnd
 	ADDQ $1, R15
 	ADDQ $1, SI
 	JMP  inlineExtendMatchCmp1
 inlineExtendMatchEnd:
 	// End inline of the extendMatch call.
 	// ----------------------------------------
 	// ----------------------------------------
 	// Begin inline of the emitCopy call.
 	//
 	// d += emitCopy(dst[d:], base-candidate, s-base)
 	// !!! length := s - base
 	MOVQ SI, AX
 	SUBQ R12, AX
 inlineEmitCopyLoop0:
 	// for length >= 68 { etc }
 	CMPL AX, $68
 	JLT  inlineEmitCopyStep1
 	// Emit a length 64 copy, encoded as 3 bytes.
 	MOVB $0xfe, 0(DI)
 	MOVW R11, 1(DI)
 	ADDQ $3, DI
 	SUBL $64, AX
 	JMP  inlineEmitCopyLoop0
 inlineEmitCopyStep1:
 	// if length > 64 { etc }
 	CMPL AX, $64
 	JLE  inlineEmitCopyStep2
 	// Emit a length 60 copy, encoded as 3 bytes.
 	MOVB $0xee, 0(DI)
 	MOVW R11, 1(DI)
 	ADDQ $3, DI
 	SUBL $60, AX
 inlineEmitCopyStep2:
 	// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
 	CMPL AX, $12
 	JGE  inlineEmitCopyStep3
 	CMPL R11, $2048
 	JGE  inlineEmitCopyStep3
 	// Emit the remaining copy, encoded as 2 bytes.
 	MOVB R11, 1(DI)
 	SHRL $8, R11
 	SHLB $5, R11
 	SUBB $4, AX
 	SHLB $2, AX
 	ORB  AX, R11
 	ORB  $1, R11
 	MOVB R11, 0(DI)
 	ADDQ $2, DI
 	JMP  inlineEmitCopyEnd
 inlineEmitCopyStep3:
 	// Emit the remaining copy, encoded as 3 bytes.
 	SUBL $1, AX
 	SHLB $2, AX
 	ORB  $2, AX
 	MOVB AX, 0(DI)
 	MOVW R11, 1(DI)
 	ADDQ $3, DI
 inlineEmitCopyEnd:
 	// End inline of the emitCopy call.
 	// ----------------------------------------
 	// nextEmit = s
 	MOVQ SI, R10
 	// if s >= sLimit { goto emitRemainder }
 	MOVQ SI, AX
 	SUBQ DX, AX
 	CMPQ AX, R9
 	JAE  emitRemainder
 	// As per the encode_other.go code:
 	//
 	// We could immediately etc.
 	// x := load64(src, s-1)
 	MOVQ -1(SI), R14
 	// prevHash := hash(uint32(x>>0), shift)
 	MOVL  R14, R11
 	IMULL $0x1e35a7bd, R11
 	SHRL  CX, R11
 	// table[prevHash] = uint16(s-1)
 	MOVQ SI, AX
 	SUBQ DX, AX
 	SUBQ $1, AX
 	// XXX: MOVW AX, table-32768(SP)(R11*2)
 	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
 	BYTE $0x66
 	BYTE $0x42
 	BYTE $0x89
 	BYTE $0x44
 	BYTE $0x5c
 	BYTE $0x78
 	// currHash := hash(uint32(x>>8), shift)
 	SHRQ  $8, R14
 	MOVL  R14, R11
 	IMULL $0x1e35a7bd, R11
 	SHRL  CX, R11
 	// candidate = int(table[currHash])
 	// XXX: MOVWQZX table-32768(SP)(R11*2), R15
 	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
 	BYTE $0x4e
 	BYTE $0x0f
 	BYTE $0xb7
 	BYTE $0x7c
 	BYTE $0x5c
 	BYTE $0x78
 	// table[currHash] = uint16(s)
 	ADDQ $1, AX
 	// XXX: MOVW AX, table-32768(SP)(R11*2)
 	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
 	BYTE $0x66
 	BYTE $0x42
 	BYTE $0x89
 	BYTE $0x44
 	BYTE $0x5c
 	BYTE $0x78
 	// if uint32(x>>8) == load32(src, candidate) { continue }
 	MOVL (DX)(R15*1), BX
 	CMPL R14, BX
 	JEQ  inner1
 	// nextHash = hash(uint32(x>>16), shift)
 	SHRQ  $8, R14
 	MOVL  R14, R11
 	IMULL $0x1e35a7bd, R11
 	SHRL  CX, R11
 	// s++
 	ADDQ $1, SI
 	// break out of the inner1 for loop, i.e. continue the outer loop.
 	JMP outer
 emitRemainder:
 	// if nextEmit < len(src) { etc }
 	MOVQ src_len+32(FP), AX
 	ADDQ DX, AX
 	CMPQ R10, AX
 	JEQ  encodeBlockEnd
 	// d += emitLiteral(dst[d:], src[nextEmit:])
 	//
 	// Push args.
 	MOVQ DI, 0(SP)
 	MOVQ $0, 8(SP)   // Unnecessary, as the callee ignores it, but conservative.
 	MOVQ $0, 16(SP)  // Unnecessary, as the callee ignores it, but conservative.
 	MOVQ R10, 24(SP)
 	SUBQ R10, AX
 	MOVQ AX, 32(SP)
 	MOVQ AX, 40(SP)  // Unnecessary, as the callee ignores it, but conservative.
 	// Spill local variables (registers) onto the stack; call; unspill.
 	MOVQ DI, 80(SP)
 	CALL ·emitLiteral(SB)
 	MOVQ 80(SP), DI
 	// Finish the "d +=" part of "d += emitLiteral(etc)".
 	ADDQ 48(SP), DI
 encodeBlockEnd:
 	MOVQ dst_base+0(FP), AX
 	SUBQ AX, DI
 	MOVQ DI, d+48(FP)
 	RET
--- a/vendor/github.com/golang/snappy/encode_arm64.s
+++ b/vendor/github.com/golang/snappy/encode_arm64.s
@ -0,0 +1,722 @@
 // Copyright 2020 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !appengine
 // +build gc
 // +build !noasm
 #include "textflag.h"
 // The asm code generally follows the pure Go code in encode_other.go, except
 // where marked with a "!!!".
 // ----------------------------------------------------------------------------
 // func emitLiteral(dst, lit []byte) int
 //
 // All local variables fit into registers. The register allocation:
 //	- R3	len(lit)
 //	- R4	n
 //	- R6	return value
 //	- R8	&dst[i]
 //	- R10	&lit[0]
 //
 // The 32 bytes of stack space is to call runtime·memmove.
 //
 // The unusual register allocation of local variables, such as R10 for the
 // source pointer, matches the allocation used at the call site in encodeBlock,
 // which makes it easier to manually inline this function.
 TEXT ·emitLiteral(SB), NOSPLIT, $32-56
 	MOVD dst_base+0(FP), R8
 	MOVD lit_base+24(FP), R10
 	MOVD lit_len+32(FP), R3
 	MOVD R3, R6
 	MOVW R3, R4
 	SUBW $1, R4, R4
 	CMPW $60, R4
 	BLT  oneByte
 	CMPW $256, R4
 	BLT  twoBytes
 threeBytes:
 	MOVD $0xf4, R2
 	MOVB R2, 0(R8)
 	MOVW R4, 1(R8)
 	ADD  $3, R8, R8
 	ADD  $3, R6, R6
 	B    memmove
 twoBytes:
 	MOVD $0xf0, R2
 	MOVB R2, 0(R8)
 	MOVB R4, 1(R8)
 	ADD  $2, R8, R8
 	ADD  $2, R6, R6
 	B    memmove
 oneByte:
 	LSLW $2, R4, R4
 	MOVB R4, 0(R8)
 	ADD  $1, R8, R8
 	ADD  $1, R6, R6
 memmove:
 	MOVD R6, ret+48(FP)
 	// copy(dst[i:], lit)
 	//
 	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
 	// R8, R10 and R3 as arguments.
 	MOVD R8, 8(RSP)
 	MOVD R10, 16(RSP)
 	MOVD R3, 24(RSP)
 	CALL runtime·memmove(SB)
 	RET
 // ----------------------------------------------------------------------------
 // func emitCopy(dst []byte, offset, length int) int
 //
 // All local variables fit into registers. The register allocation:
 //	- R3	length
 //	- R7	&dst[0]
 //	- R8	&dst[i]
 //	- R11	offset
 //
 // The unusual register allocation of local variables, such as R11 for the
 // offset, matches the allocation used at the call site in encodeBlock, which
 // makes it easier to manually inline this function.
 TEXT ·emitCopy(SB), NOSPLIT, $0-48
 	MOVD dst_base+0(FP), R8
 	MOVD R8, R7
 	MOVD offset+24(FP), R11
 	MOVD length+32(FP), R3
 loop0:
 	// for length >= 68 { etc }
 	CMPW $68, R3
 	BLT  step1
 	// Emit a length 64 copy, encoded as 3 bytes.
 	MOVD $0xfe, R2
 	MOVB R2, 0(R8)
 	MOVW R11, 1(R8)
 	ADD  $3, R8, R8
 	SUB  $64, R3, R3
 	B    loop0
 step1:
 	// if length > 64 { etc }
 	CMP $64, R3
 	BLE step2
 	// Emit a length 60 copy, encoded as 3 bytes.
 	MOVD $0xee, R2
 	MOVB R2, 0(R8)
 	MOVW R11, 1(R8)
 	ADD  $3, R8, R8
 	SUB  $60, R3, R3
 step2:
 	// if length >= 12 || offset >= 2048 { goto step3 }
 	CMP  $12, R3
 	BGE  step3
 	CMPW $2048, R11
 	BGE  step3
 	// Emit the remaining copy, encoded as 2 bytes.
 	MOVB R11, 1(R8)
 	LSRW $3, R11, R11
 	AND  $0xe0, R11, R11
 	SUB  $4, R3, R3
 	LSLW $2, R3
 	AND  $0xff, R3, R3
 	ORRW R3, R11, R11
 	ORRW $1, R11, R11
 	MOVB R11, 0(R8)
 	ADD  $2, R8, R8
 	// Return the number of bytes written.
 	SUB  R7, R8, R8
 	MOVD R8, ret+40(FP)
 	RET
 step3:
 	// Emit the remaining copy, encoded as 3 bytes.
 	SUB  $1, R3, R3
 	AND  $0xff, R3, R3
 	LSLW $2, R3, R3
 	ORRW $2, R3, R3
 	MOVB R3, 0(R8)
 	MOVW R11, 1(R8)
 	ADD  $3, R8, R8
 	// Return the number of bytes written.
 	SUB  R7, R8, R8
 	MOVD R8, ret+40(FP)
 	RET
 // ----------------------------------------------------------------------------
 // func extendMatch(src []byte, i, j int) int
 //
 // All local variables fit into registers. The register allocation:
 //	- R6	&src[0]
 //	- R7	&src[j]
 //	- R13	&src[len(src) - 8]
 //	- R14	&src[len(src)]
 //	- R15	&src[i]
 //
 // The unusual register allocation of local variables, such as R15 for a source
 // pointer, matches the allocation used at the call site in encodeBlock, which
 // makes it easier to manually inline this function.
 TEXT ·extendMatch(SB), NOSPLIT, $0-48
 	MOVD src_base+0(FP), R6
 	MOVD src_len+8(FP), R14
 	MOVD i+24(FP), R15
 	MOVD j+32(FP), R7
 	ADD  R6, R14, R14
 	ADD  R6, R15, R15
 	ADD  R6, R7, R7
 	MOVD R14, R13
 	SUB  $8, R13, R13
 cmp8:
 	// As long as we are 8 or more bytes before the end of src, we can load and
 	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
 	CMP  R13, R7
 	BHI  cmp1
 	MOVD (R15), R3
 	MOVD (R7), R4
 	CMP  R4, R3
 	BNE  bsf
 	ADD  $8, R15, R15
 	ADD  $8, R7, R7
 	B    cmp8
 bsf:
 	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
 	// the index of the first byte that differs.
 	// RBIT reverses the bit order, then CLZ counts the leading zeros, the
 	// combination of which finds the least significant bit which is set.
 	// The arm64 architecture is little-endian, and the shift by 3 converts
 	// a bit index to a byte index.
 	EOR  R3, R4, R4
 	RBIT R4, R4
 	CLZ  R4, R4
 	ADD  R4>>3, R7, R7
 	// Convert from &src[ret] to ret.
 	SUB  R6, R7, R7
 	MOVD R7, ret+40(FP)
 	RET
 cmp1:
 	// In src's tail, compare 1 byte at a time.
 	CMP  R7, R14
 	BLS  extendMatchEnd
 	MOVB (R15), R3
 	MOVB (R7), R4
 	CMP  R4, R3
 	BNE  extendMatchEnd
 	ADD  $1, R15, R15
 	ADD  $1, R7, R7
 	B    cmp1
 extendMatchEnd:
 	// Convert from &src[ret] to ret.
 	SUB  R6, R7, R7
 	MOVD R7, ret+40(FP)
 	RET
 // ----------------------------------------------------------------------------
 // func encodeBlock(dst, src []byte) (d int)
 //
 // All local variables fit into registers, other than "var table". The register
 // allocation:
 //	- R3	.	.
 //	- R4	.	.
 //	- R5	64	shift
 //	- R6	72	&src[0], tableSize
 //	- R7	80	&src[s]
 //	- R8	88	&dst[d]
 //	- R9	96	sLimit
 //	- R10	.	&src[nextEmit]
 //	- R11	104	prevHash, currHash, nextHash, offset
 //	- R12	112	&src[base], skip
 //	- R13	.	&src[nextS], &src[len(src) - 8]
 //	- R14	.	len(src), bytesBetweenHashLookups, &src[len(src)], x
 //	- R15	120	candidate
 //	- R16	.	hash constant, 0x1e35a7bd
 //	- R17	.	&table
 //	- .  	128	table
 //
 // The second column (64, 72, etc) is the stack offset to spill the registers
 // when calling other functions. We could pack this slightly tighter, but it's
 // simpler to have a dedicated spill map independent of the function called.
 //
 // "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
 // extra 64 bytes, to call other functions, and an extra 64 bytes, to spill
 // local variables (registers) during calls gives 32768 + 64 + 64 = 32896.
 TEXT ·encodeBlock(SB), 0, $32896-56
 	MOVD dst_base+0(FP), R8
 	MOVD src_base+24(FP), R7
 	MOVD src_len+32(FP), R14
 	// shift, tableSize := uint32(32-8), 1<<8
 	MOVD  $24, R5
 	MOVD  $256, R6
 	MOVW  $0xa7bd, R16
 	MOVKW $(0x1e35<<16), R16
 calcShift:
 	// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
 	//	shift--
 	// }
 	MOVD $16384, R2
 	CMP  R2, R6
 	BGE  varTable
 	CMP  R14, R6
 	BGE  varTable
 	SUB  $1, R5, R5
 	LSL  $1, R6, R6
 	B    calcShift
 varTable:
 	// var table [maxTableSize]uint16
 	//
 	// In the asm code, unlike the Go code, we can zero-initialize only the
 	// first tableSize elements. Each uint16 element is 2 bytes and each
 	// iterations writes 64 bytes, so we can do only tableSize/32 writes
 	// instead of the 2048 writes that would zero-initialize all of table's
 	// 32768 bytes. This clear could overrun the first tableSize elements, but
 	// it won't overrun the allocated stack size.
 	ADD  $128, RSP, R17
 	MOVD R17, R4
 	// !!! R6 = &src[tableSize]
 	ADD R6<<1, R17, R6
 memclr:
 	STP.P (ZR, ZR), 64(R4)
 	STP   (ZR, ZR), -48(R4)
 	STP   (ZR, ZR), -32(R4)
 	STP   (ZR, ZR), -16(R4)
 	CMP   R4, R6
 	BHI   memclr
 	// !!! R6 = &src[0]
 	MOVD R7, R6
 	// sLimit := len(src) - inputMargin
 	MOVD R14, R9
 	SUB  $15, R9, R9
 	// !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't
 	// change for the rest of the function.
 	MOVD R5, 64(RSP)
 	MOVD R6, 72(RSP)
 	MOVD R9, 96(RSP)
 	// nextEmit := 0
 	MOVD R6, R10
 	// s := 1
 	ADD $1, R7, R7
 	// nextHash := hash(load32(src, s), shift)
 	MOVW 0(R7), R11
 	MULW R16, R11, R11
 	LSRW R5, R11, R11
 outer:
 	// for { etc }
 	// skip := 32
 	MOVD $32, R12
 	// nextS := s
 	MOVD R7, R13
 	// candidate := 0
 	MOVD $0, R15
 inner0:
 	// for { etc }
 	// s := nextS
 	MOVD R13, R7
 	// bytesBetweenHashLookups := skip >> 5
 	MOVD R12, R14
 	LSR  $5, R14, R14
 	// nextS = s + bytesBetweenHashLookups
 	ADD R14, R13, R13
 	// skip += bytesBetweenHashLookups
 	ADD R14, R12, R12
 	// if nextS > sLimit { goto emitRemainder }
 	MOVD R13, R3
 	SUB  R6, R3, R3
 	CMP  R9, R3
 	BHI  emitRemainder
 	// candidate = int(table[nextHash])
 	MOVHU 0(R17)(R11<<1), R15
 	// table[nextHash] = uint16(s)
 	MOVD R7, R3
 	SUB  R6, R3, R3
 	MOVH R3, 0(R17)(R11<<1)
 	// nextHash = hash(load32(src, nextS), shift)
 	MOVW 0(R13), R11
 	MULW R16, R11
 	LSRW R5, R11, R11
 	// if load32(src, s) != load32(src, candidate) { continue } break
 	MOVW 0(R7), R3
 	MOVW (R6)(R15), R4
 	CMPW R4, R3
 	BNE  inner0
 fourByteMatch:
 	// As per the encode_other.go code:
 	//
 	// A 4-byte match has been found. We'll later see etc.
 	// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
 	// on inputMargin in encode.go.
 	MOVD R7, R3
 	SUB  R10, R3, R3
 	CMP  $16, R3
 	BLE  emitLiteralFastPath
 	// ----------------------------------------
 	// Begin inline of the emitLiteral call.
 	//
 	// d += emitLiteral(dst[d:], src[nextEmit:s])
 	MOVW R3, R4
 	SUBW $1, R4, R4
 	MOVW $60, R2
 	CMPW R2, R4
 	BLT  inlineEmitLiteralOneByte
 	MOVW $256, R2
 	CMPW R2, R4
 	BLT  inlineEmitLiteralTwoBytes
 inlineEmitLiteralThreeBytes:
 	MOVD $0xf4, R1
 	MOVB R1, 0(R8)
 	MOVW R4, 1(R8)
 	ADD  $3, R8, R8
 	B    inlineEmitLiteralMemmove
 inlineEmitLiteralTwoBytes:
 	MOVD $0xf0, R1
 	MOVB R1, 0(R8)
 	MOVB R4, 1(R8)
 	ADD  $2, R8, R8
 	B    inlineEmitLiteralMemmove
 inlineEmitLiteralOneByte:
 	LSLW $2, R4, R4
 	MOVB R4, 0(R8)
 	ADD  $1, R8, R8
 inlineEmitLiteralMemmove:
 	// Spill local variables (registers) onto the stack; call; unspill.
 	//
 	// copy(dst[i:], lit)
 	//
 	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
 	// R8, R10 and R3 as arguments.
 	MOVD R8, 8(RSP)
 	MOVD R10, 16(RSP)
 	MOVD R3, 24(RSP)
 	// Finish the "d +=" part of "d += emitLiteral(etc)".
 	ADD   R3, R8, R8
 	MOVD  R7, 80(RSP)
 	MOVD  R8, 88(RSP)
 	MOVD  R15, 120(RSP)
 	CALL  runtime·memmove(SB)
 	MOVD  64(RSP), R5
 	MOVD  72(RSP), R6
 	MOVD  80(RSP), R7
 	MOVD  88(RSP), R8
 	MOVD  96(RSP), R9
 	MOVD  120(RSP), R15
 	ADD   $128, RSP, R17
 	MOVW  $0xa7bd, R16
 	MOVKW $(0x1e35<<16), R16
 	B     inner1
 inlineEmitLiteralEnd:
 	// End inline of the emitLiteral call.
 	// ----------------------------------------
 emitLiteralFastPath:
 	// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
 	MOVB R3, R4
 	SUBW $1, R4, R4
 	AND  $0xff, R4, R4
 	LSLW $2, R4, R4
 	MOVB R4, (R8)
 	ADD  $1, R8, R8
 	// !!! Implement the copy from lit to dst as a 16-byte load and store.
 	// (Encode's documentation says that dst and src must not overlap.)
 	//
 	// This always copies 16 bytes, instead of only len(lit) bytes, but that's
 	// OK. Subsequent iterations will fix up the overrun.
 	//
 	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
 	// 16-byte loads and stores. This technique probably wouldn't be as
 	// effective on architectures that are fussier about alignment.
 	LDP 0(R10), (R0, R1)
 	STP (R0, R1), 0(R8)
 	ADD R3, R8, R8
 inner1:
 	// for { etc }
 	// base := s
 	MOVD R7, R12
 	// !!! offset := base - candidate
 	MOVD R12, R11
 	SUB  R15, R11, R11
 	SUB  R6, R11, R11
 	// ----------------------------------------
 	// Begin inline of the extendMatch call.
 	//
 	// s = extendMatch(src, candidate+4, s+4)
 	// !!! R14 = &src[len(src)]
 	MOVD src_len+32(FP), R14
 	ADD  R6, R14, R14
 	// !!! R13 = &src[len(src) - 8]
 	MOVD R14, R13
 	SUB  $8, R13, R13
 	// !!! R15 = &src[candidate + 4]
 	ADD $4, R15, R15
 	ADD R6, R15, R15
 	// !!! s += 4
 	ADD $4, R7, R7
 inlineExtendMatchCmp8:
 	// As long as we are 8 or more bytes before the end of src, we can load and
 	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
 	CMP  R13, R7
 	BHI  inlineExtendMatchCmp1
 	MOVD (R15), R3
 	MOVD (R7), R4
 	CMP  R4, R3
 	BNE  inlineExtendMatchBSF
 	ADD  $8, R15, R15
 	ADD  $8, R7, R7
 	B    inlineExtendMatchCmp8
 inlineExtendMatchBSF:
 	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
 	// the index of the first byte that differs.
 	// RBIT reverses the bit order, then CLZ counts the leading zeros, the
 	// combination of which finds the least significant bit which is set.
 	// The arm64 architecture is little-endian, and the shift by 3 converts
 	// a bit index to a byte index.
 	EOR  R3, R4, R4
 	RBIT R4, R4
 	CLZ  R4, R4
 	ADD  R4>>3, R7, R7
 	B    inlineExtendMatchEnd
 inlineExtendMatchCmp1:
 	// In src's tail, compare 1 byte at a time.
 	CMP  R7, R14
 	BLS  inlineExtendMatchEnd
 	MOVB (R15), R3
 	MOVB (R7), R4
 	CMP  R4, R3
 	BNE  inlineExtendMatchEnd
 	ADD  $1, R15, R15
 	ADD  $1, R7, R7
 	B    inlineExtendMatchCmp1
 inlineExtendMatchEnd:
 	// End inline of the extendMatch call.
 	// ----------------------------------------
 	// ----------------------------------------
 	// Begin inline of the emitCopy call.
 	//
 	// d += emitCopy(dst[d:], base-candidate, s-base)
 	// !!! length := s - base
 	MOVD R7, R3
 	SUB  R12, R3, R3
 inlineEmitCopyLoop0:
 	// for length >= 68 { etc }
 	MOVW $68, R2
 	CMPW R2, R3
 	BLT  inlineEmitCopyStep1
 	// Emit a length 64 copy, encoded as 3 bytes.
 	MOVD $0xfe, R1
 	MOVB R1, 0(R8)
 	MOVW R11, 1(R8)
 	ADD  $3, R8, R8
 	SUBW $64, R3, R3
 	B    inlineEmitCopyLoop0
 inlineEmitCopyStep1:
 	// if length > 64 { etc }
 	MOVW $64, R2
 	CMPW R2, R3
 	BLE  inlineEmitCopyStep2
 	// Emit a length 60 copy, encoded as 3 bytes.
 	MOVD $0xee, R1
 	MOVB R1, 0(R8)
 	MOVW R11, 1(R8)
 	ADD  $3, R8, R8
 	SUBW $60, R3, R3
 inlineEmitCopyStep2:
 	// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
 	MOVW $12, R2
 	CMPW R2, R3
 	BGE  inlineEmitCopyStep3
 	MOVW $2048, R2
 	CMPW R2, R11
 	BGE  inlineEmitCopyStep3
 	// Emit the remaining copy, encoded as 2 bytes.
 	MOVB R11, 1(R8)
 	LSRW $8, R11, R11
 	LSLW $5, R11, R11
 	SUBW $4, R3, R3
 	AND  $0xff, R3, R3
 	LSLW $2, R3, R3
 	ORRW R3, R11, R11
 	ORRW $1, R11, R11
 	MOVB R11, 0(R8)
 	ADD  $2, R8, R8
 	B    inlineEmitCopyEnd
 inlineEmitCopyStep3:
 	// Emit the remaining copy, encoded as 3 bytes.
 	SUBW $1, R3, R3
 	LSLW $2, R3, R3
 	ORRW $2, R3, R3
 	MOVB R3, 0(R8)
 	MOVW R11, 1(R8)
 	ADD  $3, R8, R8
 inlineEmitCopyEnd:
 	// End inline of the emitCopy call.
 	// ----------------------------------------
 	// nextEmit = s
 	MOVD R7, R10
 	// if s >= sLimit { goto emitRemainder }
 	MOVD R7, R3
 	SUB  R6, R3, R3
 	CMP  R3, R9
 	BLS  emitRemainder
 	// As per the encode_other.go code:
 	//
 	// We could immediately etc.
 	// x := load64(src, s-1)
 	MOVD -1(R7), R14
 	// prevHash := hash(uint32(x>>0), shift)
 	MOVW R14, R11
 	MULW R16, R11, R11
 	LSRW R5, R11, R11
 	// table[prevHash] = uint16(s-1)
 	MOVD R7, R3
 	SUB  R6, R3, R3
 	SUB  $1, R3, R3
 	MOVHU R3, 0(R17)(R11<<1)
 	// currHash := hash(uint32(x>>8), shift)
 	LSR  $8, R14, R14
 	MOVW R14, R11
 	MULW R16, R11, R11
 	LSRW R5, R11, R11
 	// candidate = int(table[currHash])
 	MOVHU 0(R17)(R11<<1), R15
 	// table[currHash] = uint16(s)
 	ADD   $1, R3, R3
 	MOVHU R3, 0(R17)(R11<<1)
 	// if uint32(x>>8) == load32(src, candidate) { continue }
 	MOVW (R6)(R15), R4
 	CMPW R4, R14
 	BEQ  inner1
 	// nextHash = hash(uint32(x>>16), shift)
 	LSR  $8, R14, R14
 	MOVW R14, R11
 	MULW R16, R11, R11
 	LSRW R5, R11, R11
 	// s++
 	ADD $1, R7, R7
 	// break out of the inner1 for loop, i.e. continue the outer loop.
 	B outer
 emitRemainder:
 	// if nextEmit < len(src) { etc }
 	MOVD src_len+32(FP), R3
 	ADD  R6, R3, R3
 	CMP  R3, R10
 	BEQ  encodeBlockEnd
 	// d += emitLiteral(dst[d:], src[nextEmit:])
 	//
 	// Push args.
 	MOVD R8, 8(RSP)
 	MOVD $0, 16(RSP)  // Unnecessary, as the callee ignores it, but conservative.
 	MOVD $0, 24(RSP)  // Unnecessary, as the callee ignores it, but conservative.
 	MOVD R10, 32(RSP)
 	SUB  R10, R3, R3
 	MOVD R3, 40(RSP)
 	MOVD R3, 48(RSP)  // Unnecessary, as the callee ignores it, but conservative.
 	// Spill local variables (registers) onto the stack; call; unspill.
 	MOVD R8, 88(RSP)
 	CALL ·emitLiteral(SB)
 	MOVD 88(RSP), R8
 	// Finish the "d +=" part of "d += emitLiteral(etc)".
 	MOVD 56(RSP), R1
 	ADD  R1, R8, R8
 encodeBlockEnd:
 	MOVD dst_base+0(FP), R3
 	SUB  R3, R8, R8
 	MOVD R8, d+48(FP)
 	RET
--- a/vendor/github.com/golang/snappy/encode_asm.go
+++ b/vendor/github.com/golang/snappy/encode_asm.go
@ -0,0 +1,30 @@
 // Copyright 2016 The Snappy-Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !appengine
 // +build gc
 // +build !noasm
 // +build amd64 arm64
 package snappy
 // emitLiteral has the same semantics as in encode_other.go.
 //
 //go:noescape
 func emitLiteral(dst, lit []byte) int
 // emitCopy has the same semantics as in encode_other.go.
 //
 //go:noescape
 func emitCopy(dst []byte, offset, length int) int
 // extendMatch has the same semantics as in encode_other.go.
 //
 //go:noescape
 func extendMatch(src []byte, i, j int) int
 // encodeBlock has the same semantics as in encode_other.go.
 //
 //go:noescape
 func encodeBlock(dst, src []byte) (d int)
--- a/vendor/github.com/golang/snappy/encode_other.go
+++ b/vendor/github.com/golang/snappy/encode_other.go
@ -0,0 +1,238 @@
 // Copyright 2016 The Snappy-Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // +build !amd64,!arm64 appengine !gc noasm
 package snappy
 func load32(b []byte, i int) uint32 {
 	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
 	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
 }
 func load64(b []byte, i int) uint64 {
 	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
 	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
 		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
 }
 // emitLiteral writes a literal chunk and returns the number of bytes written.
 //
 // It assumes that:
 //	dst is long enough to hold the encoded bytes
 //	1 <= len(lit) && len(lit) <= 65536
 func emitLiteral(dst, lit []byte) int {
 	i, n := 0, uint(len(lit)-1)
 	switch {
 	case n < 60:
 		dst[0] = uint8(n)<<2 | tagLiteral
 		i = 1
 	case n < 1<<8:
 		dst[0] = 60<<2 | tagLiteral
 		dst[1] = uint8(n)
 		i = 2
 	default:
 		dst[0] = 61<<2 | tagLiteral
 		dst[1] = uint8(n)
 		dst[2] = uint8(n >> 8)
 		i = 3
 	}
 	return i + copy(dst[i:], lit)
 }
 // emitCopy writes a copy chunk and returns the number of bytes written.
 //
 // It assumes that:
 //	dst is long enough to hold the encoded bytes
 //	1 <= offset && offset <= 65535
 //	4 <= length && length <= 65535
 func emitCopy(dst []byte, offset, length int) int {
 	i := 0
 	// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
 	// threshold for this loop is a little higher (at 68 = 64 + 4), and the
 	// length emitted down below is is a little lower (at 60 = 64 - 4), because
 	// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
 	// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
 	// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
 	// 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
 	// tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
 	// encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
 	for length >= 68 {
 		// Emit a length 64 copy, encoded as 3 bytes.
 		dst[i+0] = 63<<2 | tagCopy2
 		dst[i+1] = uint8(offset)
 		dst[i+2] = uint8(offset >> 8)
 		i += 3
 		length -= 64
 	}
 	if length > 64 {
 		// Emit a length 60 copy, encoded as 3 bytes.
 		dst[i+0] = 59<<2 | tagCopy2
 		dst[i+1] = uint8(offset)
 		dst[i+2] = uint8(offset >> 8)
 		i += 3
 		length -= 60
 	}
 	if length >= 12 || offset >= 2048 {
 		// Emit the remaining copy, encoded as 3 bytes.
 		dst[i+0] = uint8(length-1)<<2 | tagCopy2
 		dst[i+1] = uint8(offset)
 		dst[i+2] = uint8(offset >> 8)
 		return i + 3
 	}
 	// Emit the remaining copy, encoded as 2 bytes.
 	dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
 	dst[i+1] = uint8(offset)
 	return i + 2
 }
 // extendMatch returns the largest k such that k <= len(src) and that
 // src[i:i+k-j] and src[j:k] have the same contents.
 //
 // It assumes that:
 //	0 <= i && i < j && j <= len(src)
 func extendMatch(src []byte, i, j int) int {
 	for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
 	}
 	return j
 }
 func hash(u, shift uint32) uint32 {
 	return (u * 0x1e35a7bd) >> shift
 }
 // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
 // assumes that the varint-encoded length of the decompressed bytes has already
 // been written.
 //
 // It also assumes that:
 //	len(dst) >= MaxEncodedLen(len(src)) &&
 // 	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
 func encodeBlock(dst, src []byte) (d int) {
 	// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
 	// The table element type is uint16, as s < sLimit and sLimit < len(src)
 	// and len(src) <= maxBlockSize and maxBlockSize == 65536.
 	const (
 		maxTableSize = 1 << 14
 		// tableMask is redundant, but helps the compiler eliminate bounds
 		// checks.
 		tableMask = maxTableSize - 1
 	)
 	shift := uint32(32 - 8)
 	for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
 		shift--
 	}
 	// In Go, all array elements are zero-initialized, so there is no advantage
 	// to a smaller tableSize per se. However, it matches the C++ algorithm,
 	// and in the asm versions of this code, we can get away with zeroing only
 	// the first tableSize elements.
 	var table [maxTableSize]uint16
 	// sLimit is when to stop looking for offset/length copies. The inputMargin
 	// lets us use a fast path for emitLiteral in the main loop, while we are
 	// looking for copies.
 	sLimit := len(src) - inputMargin
 	// nextEmit is where in src the next emitLiteral should start from.
 	nextEmit := 0
 	// The encoded form must start with a literal, as there are no previous
 	// bytes to copy, so we start looking for hash matches at s == 1.
 	s := 1
 	nextHash := hash(load32(src, s), shift)
 	for {
 		// Copied from the C++ snappy implementation:
 		//
 		// Heuristic match skipping: If 32 bytes are scanned with no matches
 		// found, start looking only at every other byte. If 32 more bytes are
 		// scanned (or skipped), look at every third byte, etc.. When a match
 		// is found, immediately go back to looking at every byte. This is a
 		// small loss (~5% performance, ~0.1% density) for compressible data
 		// due to more bookkeeping, but for non-compressible data (such as
 		// JPEG) it's a huge win since the compressor quickly "realizes" the
 		// data is incompressible and doesn't bother looking for matches
 		// everywhere.
 		//
 		// The "skip" variable keeps track of how many bytes there are since
 		// the last match; dividing it by 32 (ie. right-shifting by five) gives
 		// the number of bytes to move ahead for each iteration.
 		skip := 32
 		nextS := s
 		candidate := 0
 		for {
 			s = nextS
 			bytesBetweenHashLookups := skip >> 5
 			nextS = s + bytesBetweenHashLookups
 			skip += bytesBetweenHashLookups
 			if nextS > sLimit {
 				goto emitRemainder
 			}
 			candidate = int(table[nextHash&tableMask])
 			table[nextHash&tableMask] = uint16(s)
 			nextHash = hash(load32(src, nextS), shift)
 			if load32(src, s) == load32(src, candidate) {
 				break
 			}
 		}
 		// A 4-byte match has been found. We'll later see if more than 4 bytes
 		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
 		// them as literal bytes.
 		d += emitLiteral(dst[d:], src[nextEmit:s])
 		// Call emitCopy, and then see if another emitCopy could be our next
 		// move. Repeat until we find no match for the input immediately after
 		// what was consumed by the last emitCopy call.
 		//
 		// If we exit this loop normally then we need to call emitLiteral next,
 		// though we don't yet know how big the literal will be. We handle that
 		// by proceeding to the next iteration of the main loop. We also can
 		// exit this loop via goto if we get close to exhausting the input.
 		for {
 			// Invariant: we have a 4-byte match at s, and no need to emit any
 			// literal bytes prior to s.
 			base := s
 			// Extend the 4-byte match as long as possible.
 			//
 			// This is an inlined version of:
 			//	s = extendMatch(src, candidate+4, s+4)
 			s += 4
 			for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
 			}
 			d += emitCopy(dst[d:], base-candidate, s-base)
 			nextEmit = s
 			if s >= sLimit {
 				goto emitRemainder
 			}
 			// We could immediately start working at s now, but to improve
 			// compression we first update the hash table at s-1 and at s. If
 			// another emitCopy is not our next move, also calculate nextHash
 			// at s+1. At least on GOARCH=amd64, these three hash calculations
 			// are faster as one load64 call (with some shifts) instead of
 			// three load32 calls.
 			x := load64(src, s-1)
 			prevHash := hash(uint32(x>>0), shift)
 			table[prevHash&tableMask] = uint16(s - 1)
 			currHash := hash(uint32(x>>8), shift)
 			candidate = int(table[currHash&tableMask])
 			table[currHash&tableMask] = uint16(s)
 			if uint32(x>>8) != load32(src, candidate) {
 				nextHash = hash(uint32(x>>16), shift)
 				s++
 				break
 			}
 		}
 	}
 emitRemainder:
 	if nextEmit < len(src) {
 		d += emitLiteral(dst[d:], src[nextEmit:])
 	}
 	return d
 }
--- a/vendor/github.com/golang/snappy/snappy.go
+++ b/vendor/github.com/golang/snappy/snappy.go
@ -0,0 +1,98 @@
 // Copyright 2011 The Snappy-Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // Package snappy implements the Snappy compression format. It aims for very
 // high speeds and reasonable compression.
 //
 // There are actually two Snappy formats: block and stream. They are related,
 // but different: trying to decompress block-compressed data as a Snappy stream
 // will fail, and vice versa. The block format is the Decode and Encode
 // functions and the stream format is the Reader and Writer types.
 //
 // The block format, the more common case, is used when the complete size (the
 // number of bytes) of the original data is known upfront, at the time
 // compression starts. The stream format, also known as the framing format, is
 // for when that isn't always true.
 //
 // The canonical, C++ implementation is at https://github.com/google/snappy and
 // it only implements the block format.
 package snappy // import "github.com/golang/snappy"
 import (
 	"hash/crc32"
 )
 /*
 Each encoded block begins with the varint-encoded length of the decoded data,
 followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
 first byte of each chunk is broken into its 2 least and 6 most significant bits
 called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
 Zero means a literal tag. All other values mean a copy tag.
 For literal tags:
  - If m < 60, the next 1 + m bytes are literal bytes.
  - Otherwise, let n be the little-endian unsigned integer denoted by the next
    m - 59 bytes. The next 1 + n bytes after that are literal bytes.
 For copy tags, length bytes are copied from offset bytes ago, in the style of
 Lempel-Ziv compression algorithms. In particular:
  - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
    The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
    of the offset. The next byte is bits 0-7 of the offset.
  - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
    The length is 1 + m. The offset is the little-endian unsigned integer
    denoted by the next 2 bytes.
  - For l == 3, this tag is a legacy format that is no longer issued by most
    encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
    [1, 65). The length is 1 + m. The offset is the little-endian unsigned
    integer denoted by the next 4 bytes.
 */
 const (
 	tagLiteral = 0x00
 	tagCopy1   = 0x01
 	tagCopy2   = 0x02
 	tagCopy4   = 0x03
 )
 const (
 	checksumSize    = 4
 	chunkHeaderSize = 4
 	magicChunk      = "\xff\x06\x00\x00" + magicBody
 	magicBody       = "sNaPpY"
 	// maxBlockSize is the maximum size of the input to encodeBlock. It is not
 	// part of the wire format per se, but some parts of the encoder assume
 	// that an offset fits into a uint16.
 	//
 	// Also, for the framing format (Writer type instead of Encode function),
 	// https://github.com/google/snappy/blob/master/framing_format.txt says
 	// that "the uncompressed data in a chunk must be no longer than 65536
 	// bytes".
 	maxBlockSize = 65536
 	// maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is
 	// hard coded to be a const instead of a variable, so that obufLen can also
 	// be a const. Their equivalence is confirmed by
 	// TestMaxEncodedLenOfMaxBlockSize.
 	maxEncodedLenOfMaxBlockSize = 76490
 	obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize
 	obufLen       = obufHeaderLen + maxEncodedLenOfMaxBlockSize
 )
 const (
 	chunkTypeCompressedData   = 0x00
 	chunkTypeUncompressedData = 0x01
 	chunkTypePadding          = 0xfe
 	chunkTypeStreamIdentifier = 0xff
 )
 var crcTable = crc32.MakeTable(crc32.Castagnoli)
 // crc implements the checksum specified in section 3 of
 // https://github.com/google/snappy/blob/master/framing_format.txt
 func crc(b []byte) uint32 {
 	c := crc32.Update(0, crcTable, b)
 	return uint32(c>>15|c<<17) + 0xa282ead8
 }
--- a/vendor/github.com/pierrec/lz4/v4/.gitignore
+++ b/vendor/github.com/pierrec/lz4/v4/.gitignore
@ -0,0 +1,36 @@
 # Created by https://www.gitignore.io/api/macos
 ### macOS ###
 *.DS_Store
 .AppleDouble
 .LSOverride
 # Icon must end with two \r
 Icon
 # Thumbnails
 ._*
 # Files that might appear in the root of a volume
 .DocumentRevisions-V100
 .fseventsd
 .Spotlight-V100
 .TemporaryItems
 .Trashes
 .VolumeIcon.icns
 .com.apple.timemachine.donotpresent
 # Directories potentially created on remote AFP share
 .AppleDB
 .AppleDesktop
 Network Trash Folder
 Temporary Items
 .apdisk
 # End of https://www.gitignore.io/api/macos
 cmd/*/*exe
 .idea
 fuzz/*.zip
--- a/vendor/github.com/pierrec/lz4/v4/LICENSE
+++ b/vendor/github.com/pierrec/lz4/v4/LICENSE
@ -0,0 +1,28 @@
 Copyright (c) 2015, Pierre Curto
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 * Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.
 * Neither the name of xxHash nor the names of its
  contributors may be used to endorse or promote products derived from
  this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/vendor/github.com/pierrec/lz4/v4/README.md
+++ b/vendor/github.com/pierrec/lz4/v4/README.md
@ -0,0 +1,92 @@
 # lz4 : LZ4 compression in pure Go
 [![Go Reference](https://pkg.go.dev/badge/github.com/pierrec/lz4/v4.svg)](https://pkg.go.dev/github.com/pierrec/lz4/v4)
 [![CI](https://github.com/pierrec/lz4/workflows/ci/badge.svg)](https://github.com/pierrec/lz4/actions)
 [![Go Report Card](https://goreportcard.com/badge/github.com/pierrec/lz4)](https://goreportcard.com/report/github.com/pierrec/lz4)
 [![GitHub tag (latest SemVer)](https://img.shields.io/github/tag/pierrec/lz4.svg?style=social)](https://github.com/pierrec/lz4/tags)
 ## Overview
 This package provides a streaming interface to [LZ4 data streams](http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html) as well as low level compress and uncompress functions for LZ4 data blocks.
 The implementation is based on the reference C [one](https://github.com/lz4/lz4).
 ## Install
 Assuming you have the go toolchain installed:
 ```
 go get github.com/pierrec/lz4/v4
 ```
 There is a command line interface tool to compress and decompress LZ4 files.
 ```
 go install github.com/pierrec/lz4/v4/cmd/lz4c
 ```
 Usage
 ```
 Usage of lz4c:
  -version
        print the program version
 Subcommands:
 Compress the given files or from stdin to stdout.
 compress [arguments] [<file name> ...]
  -bc
        enable block checksum
  -l int
        compression level (0=fastest)
  -sc
        disable stream checksum
  -size string
        block max size [64K,256K,1M,4M] (default "4M")
 Uncompress the given files or from stdin to stdout.
 uncompress [arguments] [<file name> ...]
 ```
 ## Example
 ```
 // Compress and uncompress an input string.
 s := "hello world"
 r := strings.NewReader(s)
 // The pipe will uncompress the data from the writer.
 pr, pw := io.Pipe()
 zw := lz4.NewWriter(pw)
 zr := lz4.NewReader(pr)
 go func() {
 	// Compress the input string.
 	_, _ = io.Copy(zw, r)
 	_ = zw.Close() // Make sure the writer is closed
 	_ = pw.Close() // Terminate the pipe
 }()
 _, _ = io.Copy(os.Stdout, zr)
 // Output:
 // hello world
 ```
 ## Contributing
 Contributions are very welcome for bug fixing, performance improvements...!
 - Open an issue with a proper description
 - Send a pull request with appropriate test case(s)
 ## Contributors
 Thanks to all [contributors](https://github.com/pierrec/lz4/graphs/contributors)  so far!
 Special thanks to [@Zariel](https://github.com/Zariel) for his asm implementation of the decoder.
 Special thanks to [@greatroar](https://github.com/greatroar) for his work on the asm implementations of the decoder for amd64 and arm64.
 Special thanks to [@klauspost](https://github.com/klauspost) for his work on optimizing the code.
--- a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/block.go
+++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/block.go
@ -0,0 +1,481 @@
 package lz4block
 import (
 	"encoding/binary"
 	"math/bits"
 	"sync"
 	"github.com/pierrec/lz4/v4/internal/lz4errors"
 )
 const (
 	// The following constants are used to setup the compression algorithm.
 	minMatch   = 4  // the minimum size of the match sequence size (4 bytes)
 	winSizeLog = 16 // LZ4 64Kb window size limit
 	winSize    = 1 << winSizeLog
 	winMask    = winSize - 1 // 64Kb window of previous data for dependent blocks
 	// hashLog determines the size of the hash table used to quickly find a previous match position.
 	// Its value influences the compression speed and memory usage, the lower the faster,
 	// but at the expense of the compression ratio.
 	// 16 seems to be the best compromise for fast compression.
 	hashLog = 16
 	htSize  = 1 << hashLog
 	mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes.
 )
 func recoverBlock(e *error) {
 	if r := recover(); r != nil && *e == nil {
 		*e = lz4errors.ErrInvalidSourceShortBuffer
 	}
 }
 // blockHash hashes the lower 6 bytes into a value < htSize.
 func blockHash(x uint64) uint32 {
 	const prime6bytes = 227718039650203
 	return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog))
 }
 func CompressBlockBound(n int) int {
 	return n + n/255 + 16
 }
 func UncompressBlock(src, dst, dict []byte) (int, error) {
 	if len(src) == 0 {
 		return 0, nil
 	}
 	if di := decodeBlock(dst, src, dict); di >= 0 {
 		return di, nil
 	}
 	return 0, lz4errors.ErrInvalidSourceShortBuffer
 }
 type Compressor struct {
 	// Offsets are at most 64kiB, so we can store only the lower 16 bits of
 	// match positions: effectively, an offset from some 64kiB block boundary.
 	//
 	// When we retrieve such an offset, we interpret it as relative to the last
 	// block boundary si &^ 0xffff, or the one before, (si &^ 0xffff) - 0x10000,
 	// depending on which of these is inside the current window. If a table
 	// entry was generated more than 64kiB back in the input, we find out by
 	// inspecting the input stream.
 	table [htSize]uint16
 	// Bitmap indicating which positions in the table are in use.
 	// This allows us to quickly reset the table for reuse,
 	// without having to zero everything.
 	inUse [htSize / 32]uint32
 }
 // Get returns the position of a presumptive match for the hash h.
 // The match may be a false positive due to a hash collision or an old entry.
 // If si < winSize, the return value may be negative.
 func (c *Compressor) get(h uint32, si int) int {
 	h &= htSize - 1
 	i := 0
 	if c.inUse[h/32]&(1<<(h%32)) != 0 {
 		i = int(c.table[h])
 	}
 	i += si &^ winMask
 	if i >= si {
 		// Try previous 64kiB block (negative when in first block).
 		i -= winSize
 	}
 	return i
 }
 func (c *Compressor) put(h uint32, si int) {
 	h &= htSize - 1
 	c.table[h] = uint16(si)
 	c.inUse[h/32] |= 1 << (h % 32)
 }
 func (c *Compressor) reset() { c.inUse = [htSize / 32]uint32{} }
 var compressorPool = sync.Pool{New: func() interface{} { return new(Compressor) }}
 func CompressBlock(src, dst []byte) (int, error) {
 	c := compressorPool.Get().(*Compressor)
 	n, err := c.CompressBlock(src, dst)
 	compressorPool.Put(c)
 	return n, err
 }
 func (c *Compressor) CompressBlock(src, dst []byte) (int, error) {
 	// Zero out reused table to avoid non-deterministic output (issue #65).
 	c.reset()
 	// Return 0, nil only if the destination buffer size is < CompressBlockBound.
 	isNotCompressible := len(dst) < CompressBlockBound(len(src))
 	// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
 	// This significantly speeds up incompressible data and usually has very small impact on compression.
 	// bytes to skip =  1 + (bytes since last match >> adaptSkipLog)
 	const adaptSkipLog = 7
 	// si: Current position of the search.
 	// anchor: Position of the current literals.
 	var si, di, anchor int
 	sn := len(src) - mfLimit
 	if sn <= 0 {
 		goto lastLiterals
 	}
 	// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
 	for si < sn {
 		// Hash the next 6 bytes (sequence)...
 		match := binary.LittleEndian.Uint64(src[si:])
 		h := blockHash(match)
 		h2 := blockHash(match >> 8)
 		// We check a match at s, s+1 and s+2 and pick the first one we get.
 		// Checking 3 only requires us to load the source one.
 		ref := c.get(h, si)
 		ref2 := c.get(h2, si+1)
 		c.put(h, si)
 		c.put(h2, si+1)
 		offset := si - ref
 		if offset <= 0 || offset >= winSize || uint32(match) != binary.LittleEndian.Uint32(src[ref:]) {
 			// No match. Start calculating another hash.
 			// The processor can usually do this out-of-order.
 			h = blockHash(match >> 16)
 			ref3 := c.get(h, si+2)
 			// Check the second match at si+1
 			si += 1
 			offset = si - ref2
 			if offset <= 0 || offset >= winSize || uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
 				// No match. Check the third match at si+2
 				si += 1
 				offset = si - ref3
 				c.put(h, si)
 				if offset <= 0 || offset >= winSize || uint32(match>>16) != binary.LittleEndian.Uint32(src[ref3:]) {
 					// Skip one extra byte (at si+3) before we check 3 matches again.
 					si += 2 + (si-anchor)>>adaptSkipLog
 					continue
 				}
 			}
 		}
 		// Match found.
 		lLen := si - anchor // Literal length.
 		// We already matched 4 bytes.
 		mLen := 4
 		// Extend backwards if we can, reducing literals.
 		tOff := si - offset - 1
 		for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] {
 			si--
 			tOff--
 			lLen--
 			mLen++
 		}
 		// Add the match length, so we continue search at the end.
 		// Use mLen to store the offset base.
 		si, mLen = si+mLen, si+minMatch
 		// Find the longest match by looking by batches of 8 bytes.
 		for si+8 <= sn {
 			x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
 			if x == 0 {
 				si += 8
 			} else {
 				// Stop is first non-zero byte.
 				si += bits.TrailingZeros64(x) >> 3
 				break
 			}
 		}
 		mLen = si - mLen
 		if di >= len(dst) {
 			return 0, lz4errors.ErrInvalidSourceShortBuffer
 		}
 		if mLen < 0xF {
 			dst[di] = byte(mLen)
 		} else {
 			dst[di] = 0xF
 		}
 		// Encode literals length.
 		if lLen < 0xF {
 			dst[di] |= byte(lLen << 4)
 		} else {
 			dst[di] |= 0xF0
 			di++
 			l := lLen - 0xF
 			for ; l >= 0xFF && di < len(dst); l -= 0xFF {
 				dst[di] = 0xFF
 				di++
 			}
 			if di >= len(dst) {
 				return 0, lz4errors.ErrInvalidSourceShortBuffer
 			}
 			dst[di] = byte(l)
 		}
 		di++
 		// Literals.
 		if di+lLen > len(dst) {
 			return 0, lz4errors.ErrInvalidSourceShortBuffer
 		}
 		copy(dst[di:di+lLen], src[anchor:anchor+lLen])
 		di += lLen + 2
 		anchor = si
 		// Encode offset.
 		if di > len(dst) {
 			return 0, lz4errors.ErrInvalidSourceShortBuffer
 		}
 		dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
 		// Encode match length part 2.
 		if mLen >= 0xF {
 			for mLen -= 0xF; mLen >= 0xFF && di < len(dst); mLen -= 0xFF {
 				dst[di] = 0xFF
 				di++
 			}
 			if di >= len(dst) {
 				return 0, lz4errors.ErrInvalidSourceShortBuffer
 			}
 			dst[di] = byte(mLen)
 			di++
 		}
 		// Check if we can load next values.
 		if si >= sn {
 			break
 		}
 		// Hash match end-2
 		h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
 		c.put(h, si-2)
 	}
 lastLiterals:
 	if isNotCompressible && anchor == 0 {
 		// Incompressible.
 		return 0, nil
 	}
 	// Last literals.
 	if di >= len(dst) {
 		return 0, lz4errors.ErrInvalidSourceShortBuffer
 	}
 	lLen := len(src) - anchor
 	if lLen < 0xF {
 		dst[di] = byte(lLen << 4)
 	} else {
 		dst[di] = 0xF0
 		di++
 		for lLen -= 0xF; lLen >= 0xFF && di < len(dst); lLen -= 0xFF {
 			dst[di] = 0xFF
 			di++
 		}
 		if di >= len(dst) {
 			return 0, lz4errors.ErrInvalidSourceShortBuffer
 		}
 		dst[di] = byte(lLen)
 	}
 	di++
 	// Write the last literals.
 	if isNotCompressible && di >= anchor {
 		// Incompressible.
 		return 0, nil
 	}
 	if di+len(src)-anchor > len(dst) {
 		return 0, lz4errors.ErrInvalidSourceShortBuffer
 	}
 	di += copy(dst[di:di+len(src)-anchor], src[anchor:])
 	return di, nil
 }
 // blockHash hashes 4 bytes into a value < winSize.
 func blockHashHC(x uint32) uint32 {
 	const hasher uint32 = 2654435761 // Knuth multiplicative hash.
 	return x * hasher >> (32 - winSizeLog)
 }
 type CompressorHC struct {
 	// hashTable: stores the last position found for a given hash
 	// chainTable: stores previous positions for a given hash
 	hashTable, chainTable [htSize]int
 	needsReset            bool
 }
 var compressorHCPool = sync.Pool{New: func() interface{} { return new(CompressorHC) }}
 func CompressBlockHC(src, dst []byte, depth CompressionLevel) (int, error) {
 	c := compressorHCPool.Get().(*CompressorHC)
 	n, err := c.CompressBlock(src, dst, depth)
 	compressorHCPool.Put(c)
 	return n, err
 }
 func (c *CompressorHC) CompressBlock(src, dst []byte, depth CompressionLevel) (_ int, err error) {
 	if c.needsReset {
 		// Zero out reused table to avoid non-deterministic output (issue #65).
 		c.hashTable = [htSize]int{}
 		c.chainTable = [htSize]int{}
 	}
 	c.needsReset = true // Only false on first call.
 	defer recoverBlock(&err)
 	// Return 0, nil only if the destination buffer size is < CompressBlockBound.
 	isNotCompressible := len(dst) < CompressBlockBound(len(src))
 	// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
 	// This significantly speeds up incompressible data and usually has very small impact on compression.
 	// bytes to skip =  1 + (bytes since last match >> adaptSkipLog)
 	const adaptSkipLog = 7
 	var si, di, anchor int
 	sn := len(src) - mfLimit
 	if sn <= 0 {
 		goto lastLiterals
 	}
 	if depth == 0 {
 		depth = winSize
 	}
 	for si < sn {
 		// Hash the next 4 bytes (sequence).
 		match := binary.LittleEndian.Uint32(src[si:])
 		h := blockHashHC(match)
 		// Follow the chain until out of window and give the longest match.
 		mLen := 0
 		offset := 0
 		for next, try := c.hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next, try = c.chainTable[next&winMask], try-1 {
 			// The first (mLen==0) or next byte (mLen>=minMatch) at current match length
 			// must match to improve on the match length.
 			if src[next+mLen] != src[si+mLen] {
 				continue
 			}
 			ml := 0
 			// Compare the current position with a previous with the same hash.
 			for ml < sn-si {
 				x := binary.LittleEndian.Uint64(src[next+ml:]) ^ binary.LittleEndian.Uint64(src[si+ml:])
 				if x == 0 {
 					ml += 8
 				} else {
 					// Stop is first non-zero byte.
 					ml += bits.TrailingZeros64(x) >> 3
 					break
 				}
 			}
 			if ml < minMatch || ml <= mLen {
 				// Match too small (<minMath) or smaller than the current match.
 				continue
 			}
 			// Found a longer match, keep its position and length.
 			mLen = ml
 			offset = si - next
 			// Try another previous position with the same hash.
 		}
 		c.chainTable[si&winMask] = c.hashTable[h]
 		c.hashTable[h] = si
 		// No match found.
 		if mLen == 0 {
 			si += 1 + (si-anchor)>>adaptSkipLog
 			continue
 		}
 		// Match found.
 		// Update hash/chain tables with overlapping bytes:
 		// si already hashed, add everything from si+1 up to the match length.
 		winStart := si + 1
 		if ws := si + mLen - winSize; ws > winStart {
 			winStart = ws
 		}
 		for si, ml := winStart, si+mLen; si < ml; {
 			match >>= 8
 			match |= uint32(src[si+3]) << 24
 			h := blockHashHC(match)
 			c.chainTable[si&winMask] = c.hashTable[h]
 			c.hashTable[h] = si
 			si++
 		}
 		lLen := si - anchor
 		si += mLen
 		mLen -= minMatch // Match length does not include minMatch.
 		if mLen < 0xF {
 			dst[di] = byte(mLen)
 		} else {
 			dst[di] = 0xF
 		}
 		// Encode literals length.
 		if lLen < 0xF {
 			dst[di] |= byte(lLen << 4)
 		} else {
 			dst[di] |= 0xF0
 			di++
 			l := lLen - 0xF
 			for ; l >= 0xFF; l -= 0xFF {
 				dst[di] = 0xFF
 				di++
 			}
 			dst[di] = byte(l)
 		}
 		di++
 		// Literals.
 		copy(dst[di:di+lLen], src[anchor:anchor+lLen])
 		di += lLen
 		anchor = si
 		// Encode offset.
 		di += 2
 		dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
 		// Encode match length part 2.
 		if mLen >= 0xF {
 			for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
 				dst[di] = 0xFF
 				di++
 			}
 			dst[di] = byte(mLen)
 			di++
 		}
 	}
 	if isNotCompressible && anchor == 0 {
 		// Incompressible.
 		return 0, nil
 	}
 	// Last literals.
 lastLiterals:
 	lLen := len(src) - anchor
 	if lLen < 0xF {
 		dst[di] = byte(lLen << 4)
 	} else {
 		dst[di] = 0xF0
 		di++
 		lLen -= 0xF
 		for ; lLen >= 0xFF; lLen -= 0xFF {
 			dst[di] = 0xFF
 			di++
 		}
 		dst[di] = byte(lLen)
 	}
 	di++
 	// Write the last literals.
 	if isNotCompressible && di >= anchor {
 		// Incompressible.
 		return 0, nil
 	}
 	di += copy(dst[di:di+len(src)-anchor], src[anchor:])
 	return di, nil
 }
--- a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/blocks.go
+++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/blocks.go
@ -0,0 +1,90 @@
 // Package lz4block provides LZ4 BlockSize types and pools of buffers.
 package lz4block
 import "sync"
 const (
 	Block64Kb uint32 = 1 << (16 + iota*2)
 	Block256Kb
 	Block1Mb
 	Block4Mb
 )
 // In legacy mode all blocks are compressed regardless
 // of the compressed size: use the bound size.
 var Block8Mb = uint32(CompressBlockBound(8 << 20))
 var (
 	BlockPool64K  = sync.Pool{New: func() interface{} { return make([]byte, Block64Kb) }}
 	BlockPool256K = sync.Pool{New: func() interface{} { return make([]byte, Block256Kb) }}
 	BlockPool1M   = sync.Pool{New: func() interface{} { return make([]byte, Block1Mb) }}
 	BlockPool4M   = sync.Pool{New: func() interface{} { return make([]byte, Block4Mb) }}
 	BlockPool8M   = sync.Pool{New: func() interface{} { return make([]byte, Block8Mb) }}
 )
 func Index(b uint32) BlockSizeIndex {
 	switch b {
 	case Block64Kb:
 		return 4
 	case Block256Kb:
 		return 5
 	case Block1Mb:
 		return 6
 	case Block4Mb:
 		return 7
 	case Block8Mb: // only valid in legacy mode
 		return 3
 	}
 	return 0
 }
 func IsValid(b uint32) bool {
 	return Index(b) > 0
 }
 type BlockSizeIndex uint8
 func (b BlockSizeIndex) IsValid() bool {
 	switch b {
 	case 4, 5, 6, 7:
 		return true
 	}
 	return false
 }
 func (b BlockSizeIndex) Get() []byte {
 	var buf interface{}
 	switch b {
 	case 4:
 		buf = BlockPool64K.Get()
 	case 5:
 		buf = BlockPool256K.Get()
 	case 6:
 		buf = BlockPool1M.Get()
 	case 7:
 		buf = BlockPool4M.Get()
 	case 3:
 		buf = BlockPool8M.Get()
 	}
 	return buf.([]byte)
 }
 func Put(buf []byte) {
 	// Safeguard: do not allow invalid buffers.
 	switch c := cap(buf); uint32(c) {
 	case Block64Kb:
 		BlockPool64K.Put(buf[:c])
 	case Block256Kb:
 		BlockPool256K.Put(buf[:c])
 	case Block1Mb:
 		BlockPool1M.Put(buf[:c])
 	case Block4Mb:
 		BlockPool4M.Put(buf[:c])
 	case Block8Mb:
 		BlockPool8M.Put(buf[:c])
 	}
 }
 type CompressionLevel uint32
 const Fast CompressionLevel = 0
--- a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_amd64.s
+++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_amd64.s
@ -0,0 +1,448 @@
 // +build !appengine
 // +build gc
 // +build !noasm
 #include "go_asm.h"
 #include "textflag.h"
 // AX scratch
 // BX scratch
 // CX literal and match lengths
 // DX token, match offset
 //
 // DI &dst
 // SI &src
 // R8 &dst + len(dst)
 // R9 &src + len(src)
 // R11 &dst
 // R12 short output end
 // R13 short input end
 // R14 &dict
 // R15 len(dict)
 // func decodeBlock(dst, src, dict []byte) int
 TEXT ·decodeBlock(SB), NOSPLIT, $48-80
 	MOVQ dst_base+0(FP), DI
 	MOVQ DI, R11
 	MOVQ dst_len+8(FP), R8
 	ADDQ DI, R8
 	MOVQ src_base+24(FP), SI
 	MOVQ src_len+32(FP), R9
 	CMPQ R9, $0
 	JE   err_corrupt
 	ADDQ SI, R9
 	MOVQ dict_base+48(FP), R14
 	MOVQ dict_len+56(FP), R15
 	// shortcut ends
 	// short output end
 	MOVQ R8, R12
 	SUBQ $32, R12
 	// short input end
 	MOVQ R9, R13
 	SUBQ $16, R13
 	XORL CX, CX
 loop:
 	// token := uint32(src[si])
 	MOVBLZX (SI), DX
 	INCQ SI
 	// lit_len = token >> 4
 	// if lit_len > 0
 	// CX = lit_len
 	MOVL DX, CX
 	SHRL $4, CX
 	// if lit_len != 0xF
 	CMPL CX, $0xF
 	JEQ  lit_len_loop
 	CMPQ DI, R12
 	JAE  copy_literal
 	CMPQ SI, R13
 	JAE  copy_literal
 	// copy shortcut
 	// A two-stage shortcut for the most common case:
 	// 1) If the literal length is 0..14, and there is enough space,
 	// enter the shortcut and copy 16 bytes on behalf of the literals
 	// (in the fast mode, only 8 bytes can be safely copied this way).
 	// 2) Further if the match length is 4..18, copy 18 bytes in a similar
 	// manner; but we ensure that there's enough space in the output for
 	// those 18 bytes earlier, upon entering the shortcut (in other words,
 	// there is a combined check for both stages).
 	// copy literal
 	MOVOU (SI), X0
 	MOVOU X0, (DI)
 	ADDQ CX, DI
 	ADDQ CX, SI
 	MOVL DX, CX
 	ANDL $0xF, CX
 	// The second stage: prepare for match copying, decode full info.
 	// If it doesn't work out, the info won't be wasted.
 	// offset := uint16(data[:2])
 	MOVWLZX (SI), DX
 	TESTL DX, DX
 	JE err_corrupt
 	ADDQ $2, SI
 	JC err_short_buf
 	MOVQ DI, AX
 	SUBQ DX, AX
 	JC err_corrupt
 	CMPQ AX, DI
 	JA err_short_buf
 	// if we can't do the second stage then jump straight to read the
 	// match length, we already have the offset.
 	CMPL CX, $0xF
 	JEQ match_len_loop_pre
 	CMPL DX, $8
 	JLT match_len_loop_pre
 	CMPQ AX, R11
 	JB match_len_loop_pre
 	// memcpy(op + 0, match + 0, 8);
 	MOVQ (AX), BX
 	MOVQ BX, (DI)
 	// memcpy(op + 8, match + 8, 8);
 	MOVQ 8(AX), BX
 	MOVQ BX, 8(DI)
 	// memcpy(op +16, match +16, 2);
 	MOVW 16(AX), BX
 	MOVW BX, 16(DI)
 	LEAQ const_minMatch(DI)(CX*1), DI
 	// shortcut complete, load next token
 	JMP loopcheck
 	// Read the rest of the literal length:
 	// do { BX = src[si++]; lit_len += BX } while (BX == 0xFF).
 lit_len_loop:
 	CMPQ SI, R9
 	JAE err_short_buf
 	MOVBLZX (SI), BX
 	INCQ SI
 	ADDQ BX, CX
 	CMPB BX, $0xFF
 	JE lit_len_loop
 copy_literal:
 	// bounds check src and dst
 	MOVQ SI, AX
 	ADDQ CX, AX
 	JC err_short_buf
 	CMPQ AX, R9
 	JA err_short_buf
 	MOVQ DI, BX
 	ADDQ CX, BX
 	JC err_short_buf
 	CMPQ BX, R8
 	JA err_short_buf
 	// Copy literals of <=48 bytes through the XMM registers.
 	CMPQ CX, $48
 	JGT memmove_lit
 	// if len(dst[di:]) < 48
 	MOVQ R8, AX
 	SUBQ DI, AX
 	CMPQ AX, $48
 	JLT memmove_lit
 	// if len(src[si:]) < 48
 	MOVQ R9, BX
 	SUBQ SI, BX
 	CMPQ BX, $48
 	JLT memmove_lit
 	MOVOU (SI), X0
 	MOVOU 16(SI), X1
 	MOVOU 32(SI), X2
 	MOVOU X0, (DI)
 	MOVOU X1, 16(DI)
 	MOVOU X2, 32(DI)
 	ADDQ CX, SI
 	ADDQ CX, DI
 	JMP finish_lit_copy
 memmove_lit:
 	// memmove(to, from, len)
 	MOVQ DI, 0(SP)
 	MOVQ SI, 8(SP)
 	MOVQ CX, 16(SP)
 	// Spill registers. Increment SI, DI now so we don't need to save CX.
 	ADDQ CX, DI
 	ADDQ CX, SI
 	MOVQ DI, 24(SP)
 	MOVQ SI, 32(SP)
 	MOVL DX, 40(SP)
 	CALL runtime·memmove(SB)
 	// restore registers
 	MOVQ 24(SP), DI
 	MOVQ 32(SP), SI
 	MOVL 40(SP), DX
 	// recalc initial values
 	MOVQ dst_base+0(FP), R8
 	MOVQ R8, R11
 	ADDQ dst_len+8(FP), R8
 	MOVQ src_base+24(FP), R9
 	ADDQ src_len+32(FP), R9
 	MOVQ dict_base+48(FP), R14
 	MOVQ dict_len+56(FP), R15
 	MOVQ R8, R12
 	SUBQ $32, R12
 	MOVQ R9, R13
 	SUBQ $16, R13
 finish_lit_copy:
 	// CX := mLen
 	// free up DX to use for offset
 	MOVL DX, CX
 	ANDL $0xF, CX
 	CMPQ SI, R9
 	JAE end
 	// offset
 	// si += 2
 	// DX := int(src[si-2]) | int(src[si-1])<<8
 	ADDQ $2, SI
 	JC err_short_buf
 	CMPQ SI, R9
 	JA err_short_buf
 	MOVWQZX -2(SI), DX
 	// 0 offset is invalid
 	TESTL DX, DX
 	JEQ   err_corrupt
 match_len_loop_pre:
 	// if mlen != 0xF
 	CMPB CX, $0xF
 	JNE copy_match
 	// do { BX = src[si++]; mlen += BX } while (BX == 0xFF).
 match_len_loop:
 	CMPQ SI, R9
 	JAE err_short_buf
 	MOVBLZX (SI), BX
 	INCQ SI
 	ADDQ BX, CX
 	CMPB BX, $0xFF
 	JE match_len_loop
 copy_match:
 	ADDQ $const_minMatch, CX
 	// check we have match_len bytes left in dst
 	// di+match_len < len(dst)
 	MOVQ DI, AX
 	ADDQ CX, AX
 	JC err_short_buf
 	CMPQ AX, R8
 	JA err_short_buf
 	// DX = offset
 	// CX = match_len
 	// BX = &dst + (di - offset)
 	MOVQ DI, BX
 	SUBQ DX, BX
 	// check BX is within dst
 	// if BX < &dst
 	JC copy_match_from_dict
 	CMPQ BX, R11
 	JBE copy_match_from_dict
 	// if offset + match_len < di
 	LEAQ (BX)(CX*1), AX
 	CMPQ DI, AX
 	JA copy_interior_match
 	// AX := len(dst[:di])
 	// MOVQ DI, AX
 	// SUBQ R11, AX
 	// copy 16 bytes at a time
 	// if di-offset < 16 copy 16-(di-offset) bytes to di
 	// then do the remaining
 copy_match_loop:
 	// for match_len >= 0
 	// dst[di] = dst[i]
 	// di++
 	// i++
 	MOVB (BX), AX
 	MOVB AX, (DI)
 	INCQ DI
 	INCQ BX
 	DECQ CX
 	JNZ copy_match_loop
 	JMP loopcheck
 copy_interior_match:
 	CMPQ CX, $16
 	JGT memmove_match
 	// if len(dst[di:]) < 16
 	MOVQ R8, AX
 	SUBQ DI, AX
 	CMPQ AX, $16
 	JLT memmove_match
 	MOVOU (BX), X0
 	MOVOU X0, (DI)
 	ADDQ CX, DI
 	XORL CX, CX
 	JMP  loopcheck
 copy_match_from_dict:
 	// CX = match_len
 	// BX = &dst + (di - offset)
 	// AX = offset - di = dict_bytes_available => count of bytes potentially covered by the dictionary
 	MOVQ R11, AX
 	SUBQ BX, AX
 	// BX = len(dict) - dict_bytes_available
 	MOVQ R15, BX
 	SUBQ AX, BX
 	JS err_short_dict
 	ADDQ R14, BX
 	// if match_len > dict_bytes_available, match fits entirely within external dictionary : just copy
 	CMPQ CX, AX
 	JLT memmove_match
 	// The match stretches over the dictionary and our block
 	// 1) copy what comes from the dictionary
 	// AX = dict_bytes_available = copy_size
 	// BX = &dict_end - copy_size
 	// CX = match_len
 	// memmove(to, from, len)
 	MOVQ DI, 0(SP)
 	MOVQ BX, 8(SP)
 	MOVQ AX, 16(SP)
 	// store extra stuff we want to recover
 	// spill
 	MOVQ DI, 24(SP)
 	MOVQ SI, 32(SP)
 	MOVQ CX, 40(SP)
 	CALL runtime·memmove(SB)
 	// restore registers
 	MOVQ 16(SP), AX // copy_size
 	MOVQ 24(SP), DI
 	MOVQ 32(SP), SI
 	MOVQ 40(SP), CX // match_len
 	// recalc initial values
 	MOVQ dst_base+0(FP), R8
 	MOVQ R8, R11 // TODO: make these sensible numbers
 	ADDQ dst_len+8(FP), R8
 	MOVQ src_base+24(FP), R9
 	ADDQ src_len+32(FP), R9
 	MOVQ dict_base+48(FP), R14
 	MOVQ dict_len+56(FP), R15
 	MOVQ R8, R12
 	SUBQ $32, R12
 	MOVQ R9, R13
 	SUBQ $16, R13
 	// di+=copy_size
 	ADDQ AX, DI
 	// 2) copy the rest from the current block
 	// CX = match_len - copy_size = rest_size
 	SUBQ AX, CX
 	MOVQ R11, BX
 	// check if we have a copy overlap
 	// AX = &dst + rest_size
 	MOVQ CX, AX
 	ADDQ BX, AX
 	// if &dst + rest_size > di, copy byte by byte
 	CMPQ AX, DI
 	JA copy_match_loop
 memmove_match:
 	// memmove(to, from, len)
 	MOVQ DI, 0(SP)
 	MOVQ BX, 8(SP)
 	MOVQ CX, 16(SP)
 	// Spill registers. Increment DI now so we don't need to save CX.
 	ADDQ CX, DI
 	MOVQ DI, 24(SP)
 	MOVQ SI, 32(SP)
 	CALL runtime·memmove(SB)
 	// restore registers
 	MOVQ 24(SP), DI
 	MOVQ 32(SP), SI
 	// recalc initial values
 	MOVQ dst_base+0(FP), R8
 	MOVQ R8, R11 // TODO: make these sensible numbers
 	ADDQ dst_len+8(FP), R8
 	MOVQ src_base+24(FP), R9
 	ADDQ src_len+32(FP), R9
 	MOVQ R8, R12
 	SUBQ $32, R12
 	MOVQ R9, R13
 	SUBQ $16, R13
 	MOVQ dict_base+48(FP), R14
 	MOVQ dict_len+56(FP), R15
 	XORL CX, CX
 loopcheck:
 	// for si < len(src)
 	CMPQ SI, R9
 	JB   loop
 end:
 	// Remaining length must be zero.
 	TESTQ CX, CX
 	JNE   err_corrupt
 	SUBQ R11, DI
 	MOVQ DI, ret+72(FP)
 	RET
 err_corrupt:
 	MOVQ $-1, ret+72(FP)
 	RET
 err_short_buf:
 	MOVQ $-2, ret+72(FP)
 	RET
 err_short_dict:
 	MOVQ $-3, ret+72(FP)
 	RET
--- a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm.s
+++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm.s
@ -0,0 +1,231 @@
 // +build gc
 // +build !noasm
 #include "go_asm.h"
 #include "textflag.h"
 // Register allocation.
 #define dst	R0
 #define dstorig	R1
 #define src	R2
 #define dstend	R3
 #define srcend	R4
 #define match	R5	// Match address.
 #define dictend	R6
 #define token	R7
 #define len	R8	// Literal and match lengths.
 #define offset	R7	// Match offset; overlaps with token.
 #define tmp1	R9
 #define tmp2	R11
 #define tmp3	R12
 // func decodeBlock(dst, src, dict []byte) int
 TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $-4-40
 	MOVW dst_base  +0(FP), dst
 	MOVW dst_len   +4(FP), dstend
 	MOVW src_base +12(FP), src
 	MOVW src_len  +16(FP), srcend
 	CMP $0, srcend
 	BEQ shortSrc
 	ADD dst, dstend
 	ADD src, srcend
 	MOVW dst, dstorig
 loop:
 	// Read token. Extract literal length.
 	MOVBU.P 1(src), token
 	MOVW    token >> 4, len
 	CMP     $15, len
 	BNE     readLitlenDone
 readLitlenLoop:
 	CMP     src, srcend
 	BEQ     shortSrc
 	MOVBU.P 1(src), tmp1
 	ADD.S   tmp1, len
 	BVS     shortDst
 	CMP     $255, tmp1
 	BEQ     readLitlenLoop
 readLitlenDone:
 	CMP $0, len
 	BEQ copyLiteralDone
 	// Bounds check dst+len and src+len.
 	ADD.S    dst, len, tmp1
 	ADD.CC.S src, len, tmp2
 	BCS      shortSrc
 	CMP      dstend, tmp1
 	//BHI    shortDst // Uncomment for distinct error codes.
 	CMP.LS   srcend, tmp2
 	BHI      shortSrc
 	// Copy literal.
 	CMP $4, len
 	BLO copyLiteralFinish
 	// Copy 0-3 bytes until src is aligned.
 	TST        $1, src
 	MOVBU.NE.P 1(src), tmp1
 	MOVB.NE.P  tmp1, 1(dst)
 	SUB.NE     $1, len
 	TST        $2, src
 	MOVHU.NE.P 2(src), tmp2
 	MOVB.NE.P  tmp2, 1(dst)
 	MOVW.NE    tmp2 >> 8, tmp1
 	MOVB.NE.P  tmp1, 1(dst)
 	SUB.NE     $2, len
 	B copyLiteralLoopCond
 copyLiteralLoop:
 	// Aligned load, unaligned write.
 	MOVW.P 4(src), tmp1
 	MOVW   tmp1 >>  8, tmp2
 	MOVB   tmp2, 1(dst)
 	MOVW   tmp1 >> 16, tmp3
 	MOVB   tmp3, 2(dst)
 	MOVW   tmp1 >> 24, tmp2
 	MOVB   tmp2, 3(dst)
 	MOVB.P tmp1, 4(dst)
 copyLiteralLoopCond:
 	// Loop until len-4 < 0.
 	SUB.S  $4, len
 	BPL    copyLiteralLoop
 copyLiteralFinish:
 	// Copy remaining 0-3 bytes.
 	// At this point, len may be < 0, but len&3 is still accurate.
 	TST       $1, len
 	MOVB.NE.P 1(src), tmp3
 	MOVB.NE.P tmp3, 1(dst)
 	TST       $2, len
 	MOVB.NE.P 2(src), tmp1
 	MOVB.NE.P tmp1, 2(dst)
 	MOVB.NE   -1(src), tmp2
 	MOVB.NE   tmp2, -1(dst)
 copyLiteralDone:
 	// Initial part of match length.
 	// This frees up the token register for reuse as offset.
 	AND $15, token, len
 	CMP src, srcend
 	BEQ end
 	// Read offset.
 	ADD.S $2, src
 	BCS   shortSrc
 	CMP   srcend, src
 	BHI   shortSrc
 	MOVBU -2(src), offset
 	MOVBU -1(src), tmp1
 	ORR.S tmp1 << 8, offset
 	BEQ   corrupt
 	// Read rest of match length.
 	CMP $15, len
 	BNE readMatchlenDone
 readMatchlenLoop:
 	CMP     src, srcend
 	BEQ     shortSrc
 	MOVBU.P 1(src), tmp1
 	ADD.S   tmp1, len
 	BVS     shortDst
 	CMP     $255, tmp1
 	BEQ     readMatchlenLoop
 readMatchlenDone:
 	// Bounds check dst+len+minMatch.
 	ADD.S    dst, len, tmp1
 	ADD.CC.S $const_minMatch, tmp1
 	BCS      shortDst
 	CMP      dstend, tmp1
 	BHI      shortDst
 	RSB dst, offset, match
 	CMP dstorig, match
 	BGE copyMatch4
 	// match < dstorig means the match starts in the dictionary,
 	// at len(dict) - offset + (dst - dstorig).
 	MOVW dict_base+24(FP), match
 	MOVW dict_len +28(FP), dictend
 	ADD $const_minMatch, len
 	RSB   dst, dstorig, tmp1
 	RSB   dictend, offset, tmp2
 	ADD.S tmp2, tmp1
 	BMI   shortDict
 	ADD   match, dictend
 	ADD   tmp1, match
 copyDict:
 	MOVBU.P 1(match), tmp1
 	MOVB.P  tmp1, 1(dst)
 	SUB.S   $1, len
 	CMP.NE  match, dictend
 	BNE     copyDict
 	// If the match extends beyond the dictionary, the rest is at dstorig.
 	CMP  $0, len
 	BEQ  copyMatchDone
 	MOVW dstorig, match
 	B    copyMatch
 	// Copy a regular match.
 	// Since len+minMatch is at least four, we can do a 4× unrolled
 	// byte copy loop. Using MOVW instead of four byte loads is faster,
 	// but to remain portable we'd have to align match first, which is
 	// too expensive. By alternating loads and stores, we also handle
 	// the case offset < 4.
 copyMatch4:
 	SUB.S   $4, len
 	MOVBU.P 4(match), tmp1
 	MOVB.P  tmp1, 4(dst)
 	MOVBU   -3(match), tmp2
 	MOVB    tmp2, -3(dst)
 	MOVBU   -2(match), tmp3
 	MOVB    tmp3, -2(dst)
 	MOVBU   -1(match), tmp1
 	MOVB    tmp1, -1(dst)
 	BPL     copyMatch4
 	// Restore len, which is now negative.
 	ADD.S $4, len
 	BEQ   copyMatchDone
 copyMatch:
 	// Finish with a byte-at-a-time copy.
 	SUB.S   $1, len
 	MOVBU.P 1(match), tmp2
 	MOVB.P  tmp2, 1(dst)
 	BNE     copyMatch
 copyMatchDone:
 	CMP src, srcend
 	BNE loop
 end:
 	CMP  $0, len
 	BNE  corrupt
 	SUB  dstorig, dst, tmp1
 	MOVW tmp1, ret+36(FP)
 	RET
 	// The error cases have distinct labels so we can put different
 	// return codes here when debugging, or if the error returns need to
 	// be changed.
 shortDict:
 shortDst:
 shortSrc:
 corrupt:
 	MOVW $-1, tmp1
 	MOVW tmp1, ret+36(FP)
 	RET
--- a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm64.s
+++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm64.s
@ -0,0 +1,241 @@
 // +build gc
 // +build !noasm
 // This implementation assumes that strict alignment checking is turned off.
 // The Go compiler makes the same assumption.
 #include "go_asm.h"
 #include "textflag.h"
 // Register allocation.
 #define dst		R0
 #define dstorig		R1
 #define src		R2
 #define dstend		R3
 #define dstend16	R4	// dstend - 16
 #define srcend		R5
 #define srcend16	R6	// srcend - 16
 #define match		R7	// Match address.
 #define dict		R8
 #define dictlen		R9
 #define dictend		R10
 #define token		R11
 #define len		R12	// Literal and match lengths.
 #define lenRem		R13
 #define offset		R14	// Match offset.
 #define tmp1		R15
 #define tmp2		R16
 #define tmp3		R17
 #define tmp4		R19
 // func decodeBlock(dst, src, dict []byte) int
 TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $0-80
 	LDP  dst_base+0(FP), (dst, dstend)
 	ADD  dst, dstend
 	MOVD dst, dstorig
 	LDP src_base+24(FP), (src, srcend)
 	CBZ srcend, shortSrc
 	ADD src, srcend
 	// dstend16 = max(dstend-16, 0) and similarly for srcend16.
 	SUBS $16, dstend, dstend16
 	CSEL LO, ZR, dstend16, dstend16
 	SUBS $16, srcend, srcend16
 	CSEL LO, ZR, srcend16, srcend16
 	LDP dict_base+48(FP), (dict, dictlen)
 	ADD dict, dictlen, dictend
 loop:
 	// Read token. Extract literal length.
 	MOVBU.P 1(src), token
 	LSR     $4, token, len
 	CMP     $15, len
 	BNE     readLitlenDone
 readLitlenLoop:
 	CMP     src, srcend
 	BEQ     shortSrc
 	MOVBU.P 1(src), tmp1
 	ADDS    tmp1, len
 	BVS     shortDst
 	CMP     $255, tmp1
 	BEQ     readLitlenLoop
 readLitlenDone:
 	CBZ len, copyLiteralDone
 	// Bounds check dst+len and src+len.
 	ADDS dst, len, tmp1
 	BCS  shortSrc
 	ADDS src, len, tmp2
 	BCS  shortSrc
 	CMP  dstend, tmp1
 	BHI  shortDst
 	CMP  srcend, tmp2
 	BHI  shortSrc
 	// Copy literal.
 	SUBS $16, len
 	BLO  copyLiteralShort
 copyLiteralLoop:
 	LDP.P 16(src), (tmp1, tmp2)
 	STP.P (tmp1, tmp2), 16(dst)
 	SUBS  $16, len
 	BPL   copyLiteralLoop
 	// Copy (final part of) literal of length 0-15.
 	// If we have >=16 bytes left in src and dst, just copy 16 bytes.
 copyLiteralShort:
 	CMP  dstend16, dst
 	CCMP LO, src, srcend16, $0b0010 // 0010 = preserve carry (LO).
 	BHS  copyLiteralShortEnd
 	AND $15, len
 	LDP (src), (tmp1, tmp2)
 	ADD len, src
 	STP (tmp1, tmp2), (dst)
 	ADD len, dst
 	B copyLiteralDone
 	// Safe but slow copy near the end of src, dst.
 copyLiteralShortEnd:
 	TBZ     $3, len, 3(PC)
 	MOVD.P  8(src), tmp1
 	MOVD.P  tmp1, 8(dst)
 	TBZ     $2, len, 3(PC)
 	MOVW.P  4(src), tmp2
 	MOVW.P  tmp2, 4(dst)
 	TBZ     $1, len, 3(PC)
 	MOVH.P  2(src), tmp3
 	MOVH.P  tmp3, 2(dst)
 	TBZ     $0, len, 3(PC)
 	MOVBU.P 1(src), tmp4
 	MOVB.P  tmp4, 1(dst)
 copyLiteralDone:
 	// Initial part of match length.
 	AND $15, token, len
 	CMP src, srcend
 	BEQ end
 	// Read offset.
 	ADDS  $2, src
 	BCS   shortSrc
 	CMP   srcend, src
 	BHI   shortSrc
 	MOVHU -2(src), offset
 	CBZ   offset, corrupt
 	// Read rest of match length.
 	CMP $15, len
 	BNE readMatchlenDone
 readMatchlenLoop:
 	CMP     src, srcend
 	BEQ     shortSrc
 	MOVBU.P 1(src), tmp1
 	ADDS    tmp1, len
 	BVS     shortDst
 	CMP     $255, tmp1
 	BEQ     readMatchlenLoop
 readMatchlenDone:
 	ADD $const_minMatch, len
 	// Bounds check dst+len.
 	ADDS dst, len, tmp2
 	BCS  shortDst
 	CMP  dstend, tmp2
 	BHI  shortDst
 	SUB offset, dst, match
 	CMP dstorig, match
 	BHS copyMatchTry8
 	// match < dstorig means the match starts in the dictionary,
 	// at len(dict) - offset + (dst - dstorig).
 	SUB  dstorig, dst, tmp1
 	SUB  offset, dictlen, tmp2
 	ADDS tmp2, tmp1
 	BMI  shortDict
 	ADD  dict, tmp1, match
 copyDict:
 	MOVBU.P 1(match), tmp3
 	MOVB.P  tmp3, 1(dst)
 	SUBS    $1, len
 	CCMP    NE, dictend, match, $0b0100 // 0100 sets the Z (EQ) flag.
 	BNE     copyDict
 	CBZ len, copyMatchDone
 	// If the match extends beyond the dictionary, the rest is at dstorig.
 	// Recompute the offset for the next check.
 	MOVD dstorig, match
 	SUB  dstorig, dst, offset
 copyMatchTry8:
 	// Copy doublewords if both len and offset are at least eight.
 	// A 16-at-a-time loop doesn't provide a further speedup.
 	CMP  $8, len
 	CCMP HS, offset, $8, $0
 	BLO  copyMatchTry4
 	AND    $7, len, lenRem
 	SUB    $8, len
 copyMatchLoop8:
 	MOVD.P 8(match), tmp1
 	MOVD.P tmp1, 8(dst)
 	SUBS   $8, len
 	BPL    copyMatchLoop8
 	MOVD (match)(len), tmp2 // match+len == match+lenRem-8.
 	ADD  lenRem, dst
 	MOVD $0, len
 	MOVD tmp2, -8(dst)
 	B    copyMatchDone
 copyMatchTry4:
 	// Copy words if both len and offset are at least four.
 	CMP  $4, len
 	CCMP HS, offset, $4, $0
 	BLO  copyMatchLoop1
 	MOVWU.P 4(match), tmp2
 	MOVWU.P tmp2, 4(dst)
 	SUBS    $4, len
 	BEQ     copyMatchDone
 copyMatchLoop1:
 	// Byte-at-a-time copy for small offsets <= 3.
 	MOVBU.P 1(match), tmp2
 	MOVB.P  tmp2, 1(dst)
 	SUBS    $1, len
 	BNE     copyMatchLoop1
 copyMatchDone:
 	CMP src, srcend
 	BNE loop
 end:
 	CBNZ len, corrupt
 	SUB  dstorig, dst, tmp1
 	MOVD tmp1, ret+72(FP)
 	RET
 	// The error cases have distinct labels so we can put different
 	// return codes here when debugging, or if the error returns need to
 	// be changed.
 shortDict:
 shortDst:
 shortSrc:
 corrupt:
 	MOVD $-1, tmp1
 	MOVD tmp1, ret+72(FP)
 	RET
--- a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_asm.go
+++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_asm.go
@ -0,0 +1,10 @@
 //go:build (amd64 || arm || arm64) && !appengine && gc && !noasm
 // +build amd64 arm arm64
 // +build !appengine
 // +build gc
 // +build !noasm
 package lz4block
 //go:noescape
 func decodeBlock(dst, src, dict []byte) int
--- a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_other.go
+++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_other.go
@ -0,0 +1,139 @@
 //go:build (!amd64 && !arm && !arm64) || appengine || !gc || noasm
 // +build !amd64,!arm,!arm64 appengine !gc noasm
 package lz4block
 import (
 	"encoding/binary"
 )
 func decodeBlock(dst, src, dict []byte) (ret int) {
 	// Restrict capacities so we don't read or write out of bounds.
 	dst = dst[:len(dst):len(dst)]
 	src = src[:len(src):len(src)]
 	const hasError = -2
 	if len(src) == 0 {
 		return hasError
 	}
 	defer func() {
 		if recover() != nil {
 			ret = hasError
 		}
 	}()
 	var si, di uint
 	for si < uint(len(src)) {
 		// Literals and match lengths (token).
 		b := uint(src[si])
 		si++
 		// Literals.
 		if lLen := b >> 4; lLen > 0 {
 			switch {
 			case lLen < 0xF && si+16 < uint(len(src)):
 				// Shortcut 1
 				// if we have enough room in src and dst, and the literals length
 				// is small enough (0..14) then copy all 16 bytes, even if not all
 				// are part of the literals.
 				copy(dst[di:], src[si:si+16])
 				si += lLen
 				di += lLen
 				if mLen := b & 0xF; mLen < 0xF {
 					// Shortcut 2
 					// if the match length (4..18) fits within the literals, then copy
 					// all 18 bytes, even if not all are part of the literals.
 					mLen += 4
 					if offset := u16(src[si:]); mLen <= offset && offset < di {
 						i := di - offset
 						// The remaining buffer may not hold 18 bytes.
 						// See https://github.com/pierrec/lz4/issues/51.
 						if end := i + 18; end <= uint(len(dst)) {
 							copy(dst[di:], dst[i:end])
 							si += 2
 							di += mLen
 							continue
 						}
 					}
 				}
 			case lLen == 0xF:
 				for {
 					x := uint(src[si])
 					if lLen += x; int(lLen) < 0 {
 						return hasError
 					}
 					si++
 					if x != 0xFF {
 						break
 					}
 				}
 				fallthrough
 			default:
 				copy(dst[di:di+lLen], src[si:si+lLen])
 				si += lLen
 				di += lLen
 			}
 		}
 		mLen := b & 0xF
 		if si == uint(len(src)) && mLen == 0 {
 			break
 		} else if si >= uint(len(src)) {
 			return hasError
 		}
 		offset := u16(src[si:])
 		if offset == 0 {
 			return hasError
 		}
 		si += 2
 		// Match.
 		mLen += minMatch
 		if mLen == minMatch+0xF {
 			for {
 				x := uint(src[si])
 				if mLen += x; int(mLen) < 0 {
 					return hasError
 				}
 				si++
 				if x != 0xFF {
 					break
 				}
 			}
 		}
 		// Copy the match.
 		if di < offset {
 			// The match is beyond our block, meaning the first part
 			// is in the dictionary.
 			fromDict := dict[uint(len(dict))+di-offset:]
 			n := uint(copy(dst[di:di+mLen], fromDict))
 			di += n
 			if mLen -= n; mLen == 0 {
 				continue
 			}
 			// We copied n = offset-di bytes from the dictionary,
 			// then set di = di+n = offset, so the following code
 			// copies from dst[di-offset:] = dst[0:].
 		}
 		expanded := dst[di-offset:]
 		if mLen > offset {
 			// Efficiently copy the match dst[di-offset:di] into the dst slice.
 			bytesToCopy := offset * (mLen / offset)
 			for n := offset; n <= bytesToCopy+offset; n *= 2 {
 				copy(expanded[n:], expanded[:n])
 			}
 			di += bytesToCopy
 			mLen -= bytesToCopy
 		}
 		di += uint(copy(dst[di:di+mLen], expanded[:mLen]))
 	}
 	return int(di)
 }
 func u16(p []byte) uint { return uint(binary.LittleEndian.Uint16(p)) }
--- a/vendor/github.com/pierrec/lz4/v4/internal/lz4errors/errors.go
+++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4errors/errors.go
@ -0,0 +1,19 @@
 package lz4errors
 type Error string
 func (e Error) Error() string { return string(e) }
 const (
 	ErrInvalidSourceShortBuffer      Error = "lz4: invalid source or destination buffer too short"
 	ErrInvalidFrame                  Error = "lz4: bad magic number"
 	ErrInternalUnhandledState        Error = "lz4: unhandled state"
 	ErrInvalidHeaderChecksum         Error = "lz4: invalid header checksum"
 	ErrInvalidBlockChecksum          Error = "lz4: invalid block checksum"
 	ErrInvalidFrameChecksum          Error = "lz4: invalid frame checksum"
 	ErrOptionInvalidCompressionLevel Error = "lz4: invalid compression level"
 	ErrOptionClosedOrError           Error = "lz4: cannot apply options on closed or in error object"
 	ErrOptionInvalidBlockSize        Error = "lz4: invalid block size"
 	ErrOptionNotApplicable           Error = "lz4: option not applicable"
 	ErrWriterNotClosed               Error = "lz4: writer not closed"
 )
--- a/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/block.go
+++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/block.go
@ -0,0 +1,350 @@
 package lz4stream
 import (
 	"encoding/binary"
 	"fmt"
 	"io"
 	"sync"
 	"github.com/pierrec/lz4/v4/internal/lz4block"
 	"github.com/pierrec/lz4/v4/internal/lz4errors"
 	"github.com/pierrec/lz4/v4/internal/xxh32"
 )
 type Blocks struct {
 	Block  *FrameDataBlock
 	Blocks chan chan *FrameDataBlock
 	mu     sync.Mutex
 	err    error
 }
 func (b *Blocks) initW(f *Frame, dst io.Writer, num int) {
 	if num == 1 {
 		b.Blocks = nil
 		b.Block = NewFrameDataBlock(f)
 		return
 	}
 	b.Block = nil
 	if cap(b.Blocks) != num {
 		b.Blocks = make(chan chan *FrameDataBlock, num)
 	}
 	// goroutine managing concurrent block compression goroutines.
 	go func() {
 		// Process next block compression item.
 		for c := range b.Blocks {
 			// Read the next compressed block result.
 			// Waiting here ensures that the blocks are output in the order they were sent.
 			// The incoming channel is always closed as it indicates to the caller that
 			// the block has been processed.
 			block := <-c
 			if block == nil {
 				// Notify the block compression routine that we are done with its result.
 				// This is used when a sentinel block is sent to terminate the compression.
 				close(c)
 				return
 			}
 			// Do not attempt to write the block upon any previous failure.
 			if b.err == nil {
 				// Write the block.
 				if err := block.Write(f, dst); err != nil {
 					// Keep the first error.
 					b.err = err
 					// All pending compression goroutines need to shut down, so we need to keep going.
 				}
 			}
 			close(c)
 		}
 	}()
 }
 func (b *Blocks) close(f *Frame, num int) error {
 	if num == 1 {
 		if b.Block != nil {
 			b.Block.Close(f)
 		}
 		err := b.err
 		b.err = nil
 		return err
 	}
 	if b.Blocks == nil {
 		err := b.err
 		b.err = nil
 		return err
 	}
 	c := make(chan *FrameDataBlock)
 	b.Blocks <- c
 	c <- nil
 	<-c
 	err := b.err
 	b.err = nil
 	return err
 }
 // ErrorR returns any error set while uncompressing a stream.
 func (b *Blocks) ErrorR() error {
 	b.mu.Lock()
 	defer b.mu.Unlock()
 	return b.err
 }
 // initR returns a channel that streams the uncompressed blocks if in concurrent
 // mode and no error. When the channel is closed, check for any error with b.ErrorR.
 //
 // If not in concurrent mode, the uncompressed block is b.Block and the returned error
 // needs to be checked.
 func (b *Blocks) initR(f *Frame, num int, src io.Reader) (chan []byte, error) {
 	size := f.Descriptor.Flags.BlockSizeIndex()
 	if num == 1 {
 		b.Blocks = nil
 		b.Block = NewFrameDataBlock(f)
 		return nil, nil
 	}
 	b.Block = nil
 	blocks := make(chan chan []byte, num)
 	// data receives the uncompressed blocks.
 	data := make(chan []byte)
 	// Read blocks from the source sequentially
 	// and uncompress them concurrently.
 	// In legacy mode, accrue the uncompress sizes in cum.
 	var cum uint32
 	go func() {
 		var cumx uint32
 		var err error
 		for b.ErrorR() == nil {
 			block := NewFrameDataBlock(f)
 			cumx, err = block.Read(f, src, 0)
 			if err != nil {
 				block.Close(f)
 				break
 			}
 			// Recheck for an error as reading may be slow and uncompressing is expensive.
 			if b.ErrorR() != nil {
 				block.Close(f)
 				break
 			}
 			c := make(chan []byte)
 			blocks <- c
 			go func() {
 				defer block.Close(f)
 				data, err := block.Uncompress(f, size.Get(), nil, false)
 				if err != nil {
 					b.closeR(err)
 					// Close the block channel to indicate an error.
 					close(c)
 				} else {
 					c <- data
 				}
 			}()
 		}
 		// End the collection loop and the data channel.
 		c := make(chan []byte)
 		blocks <- c
 		c <- nil // signal the collection loop that we are done
 		<-c      // wait for the collect loop to complete
 		if f.isLegacy() && cum == cumx {
 			err = io.EOF
 		}
 		b.closeR(err)
 		close(data)
 	}()
 	// Collect the uncompressed blocks and make them available
 	// on the returned channel.
 	go func(leg bool) {
 		defer close(blocks)
 		skipBlocks := false
 		for c := range blocks {
 			buf, ok := <-c
 			if !ok {
 				// A closed channel indicates an error.
 				// All remaining channels should be discarded.
 				skipBlocks = true
 				continue
 			}
 			if buf == nil {
 				// Signal to end the loop.
 				close(c)
 				return
 			}
 			if skipBlocks {
 				// A previous error has occurred, skipping remaining channels.
 				continue
 			}
 			// Perform checksum now as the blocks are received in order.
 			if f.Descriptor.Flags.ContentChecksum() {
 				_, _ = f.checksum.Write(buf)
 			}
 			if leg {
 				cum += uint32(len(buf))
 			}
 			data <- buf
 			close(c)
 		}
 	}(f.isLegacy())
 	return data, nil
 }
 // closeR safely sets the error on b if not already set.
 func (b *Blocks) closeR(err error) {
 	b.mu.Lock()
 	if b.err == nil {
 		b.err = err
 	}
 	b.mu.Unlock()
 }
 func NewFrameDataBlock(f *Frame) *FrameDataBlock {
 	buf := f.Descriptor.Flags.BlockSizeIndex().Get()
 	return &FrameDataBlock{Data: buf, data: buf}
 }
 type FrameDataBlock struct {
 	Size     DataBlockSize
 	Data     []byte // compressed or uncompressed data (.data or .src)
 	Checksum uint32
 	data     []byte // buffer for compressed data
 	src      []byte // uncompressed data
 	err      error  // used in concurrent mode
 }
 func (b *FrameDataBlock) Close(f *Frame) {
 	b.Size = 0
 	b.Checksum = 0
 	b.err = nil
 	if b.data != nil {
 		// Block was not already closed.
 		lz4block.Put(b.data)
 		b.Data = nil
 		b.data = nil
 		b.src = nil
 	}
 }
 // Block compression errors are ignored since the buffer is sized appropriately.
 func (b *FrameDataBlock) Compress(f *Frame, src []byte, level lz4block.CompressionLevel) *FrameDataBlock {
 	data := b.data
 	if f.isLegacy() {
 		// In legacy mode, the buffer is sized according to CompressBlockBound,
 		// but only 8Mb is buffered for compression.
 		src = src[:8<<20]
 	} else {
 		data = data[:len(src)] // trigger the incompressible flag in CompressBlock
 	}
 	var n int
 	switch level {
 	case lz4block.Fast:
 		n, _ = lz4block.CompressBlock(src, data)
 	default:
 		n, _ = lz4block.CompressBlockHC(src, data, level)
 	}
 	if n == 0 {
 		b.Size.UncompressedSet(true)
 		b.Data = src
 	} else {
 		b.Size.UncompressedSet(false)
 		b.Data = data[:n]
 	}
 	b.Size.sizeSet(len(b.Data))
 	b.src = src // keep track of the source for content checksum
 	if f.Descriptor.Flags.BlockChecksum() {
 		b.Checksum = xxh32.ChecksumZero(src)
 	}
 	return b
 }
 func (b *FrameDataBlock) Write(f *Frame, dst io.Writer) error {
 	// Write is called in the same order as blocks are compressed,
 	// so content checksum must be done here.
 	if f.Descriptor.Flags.ContentChecksum() {
 		_, _ = f.checksum.Write(b.src)
 	}
 	buf := f.buf[:]
 	binary.LittleEndian.PutUint32(buf, uint32(b.Size))
 	if _, err := dst.Write(buf[:4]); err != nil {
 		return err
 	}
 	if _, err := dst.Write(b.Data); err != nil {
 		return err
 	}
 	if b.Checksum == 0 {
 		return nil
 	}
 	binary.LittleEndian.PutUint32(buf, b.Checksum)
 	_, err := dst.Write(buf[:4])
 	return err
 }
 // Read updates b with the next block data, size and checksum if available.
 func (b *FrameDataBlock) Read(f *Frame, src io.Reader, cum uint32) (uint32, error) {
 	x, err := f.readUint32(src)
 	if err != nil {
 		return 0, err
 	}
 	if f.isLegacy() {
 		switch x {
 		case frameMagicLegacy:
 			// Concatenated legacy frame.
 			return b.Read(f, src, cum)
 		case cum:
 			// Only works in non concurrent mode, for concurrent mode
 			// it is handled separately.
 			// Linux kernel format appends the total uncompressed size at the end.
 			return 0, io.EOF
 		}
 	} else if x == 0 {
 		// Marker for end of stream.
 		return 0, io.EOF
 	}
 	b.Size = DataBlockSize(x)
 	size := b.Size.size()
 	if size > cap(b.data) {
 		return x, lz4errors.ErrOptionInvalidBlockSize
 	}
 	b.data = b.data[:size]
 	if _, err := io.ReadFull(src, b.data); err != nil {
 		return x, err
 	}
 	if f.Descriptor.Flags.BlockChecksum() {
 		sum, err := f.readUint32(src)
 		if err != nil {
 			return 0, err
 		}
 		b.Checksum = sum
 	}
 	return x, nil
 }
 func (b *FrameDataBlock) Uncompress(f *Frame, dst, dict []byte, sum bool) ([]byte, error) {
 	if b.Size.Uncompressed() {
 		n := copy(dst, b.data)
 		dst = dst[:n]
 	} else {
 		n, err := lz4block.UncompressBlock(b.data, dst, dict)
 		if err != nil {
 			return nil, err
 		}
 		dst = dst[:n]
 	}
 	if f.Descriptor.Flags.BlockChecksum() {
 		if c := xxh32.ChecksumZero(dst); c != b.Checksum {
 			err := fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidBlockChecksum, c, b.Checksum)
 			return nil, err
 		}
 	}
 	if sum && f.Descriptor.Flags.ContentChecksum() {
 		_, _ = f.checksum.Write(dst)
 	}
 	return dst, nil
 }
 func (f *Frame) readUint32(r io.Reader) (x uint32, err error) {
 	if _, err = io.ReadFull(r, f.buf[:4]); err != nil {
 		return
 	}
 	x = binary.LittleEndian.Uint32(f.buf[:4])
 	return
 }
--- a/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/frame.go
+++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/frame.go
@ -0,0 +1,204 @@
 // Package lz4stream provides the types that support reading and writing LZ4 data streams.
 package lz4stream
 import (
 	"encoding/binary"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"github.com/pierrec/lz4/v4/internal/lz4block"
 	"github.com/pierrec/lz4/v4/internal/lz4errors"
 	"github.com/pierrec/lz4/v4/internal/xxh32"
 )
 //go:generate go run gen.go
 const (
 	frameMagic       uint32 = 0x184D2204
 	frameSkipMagic   uint32 = 0x184D2A50
 	frameMagicLegacy uint32 = 0x184C2102
 )
 func NewFrame() *Frame {
 	return &Frame{}
 }
 type Frame struct {
 	buf        [15]byte // frame descriptor needs at most 4(magic)+4+8+1=11 bytes
 	Magic      uint32
 	Descriptor FrameDescriptor
 	Blocks     Blocks
 	Checksum   uint32
 	checksum   xxh32.XXHZero
 }
 // Reset allows reusing the Frame.
 // The Descriptor configuration is not modified.
 func (f *Frame) Reset(num int) {
 	f.Magic = 0
 	f.Descriptor.Checksum = 0
 	f.Descriptor.ContentSize = 0
 	_ = f.Blocks.close(f, num)
 	f.Checksum = 0
 }
 func (f *Frame) InitW(dst io.Writer, num int, legacy bool) {
 	if legacy {
 		f.Magic = frameMagicLegacy
 		idx := lz4block.Index(lz4block.Block8Mb)
 		f.Descriptor.Flags.BlockSizeIndexSet(idx)
 	} else {
 		f.Magic = frameMagic
 		f.Descriptor.initW()
 	}
 	f.Blocks.initW(f, dst, num)
 	f.checksum.Reset()
 }
 func (f *Frame) CloseW(dst io.Writer, num int) error {
 	if err := f.Blocks.close(f, num); err != nil {
 		return err
 	}
 	if f.isLegacy() {
 		return nil
 	}
 	buf := f.buf[:0]
 	// End mark (data block size of uint32(0)).
 	buf = append(buf, 0, 0, 0, 0)
 	if f.Descriptor.Flags.ContentChecksum() {
 		buf = f.checksum.Sum(buf)
 	}
 	_, err := dst.Write(buf)
 	return err
 }
 func (f *Frame) isLegacy() bool {
 	return f.Magic == frameMagicLegacy
 }
 func (f *Frame) ParseHeaders(src io.Reader) error {
 	if f.Magic > 0 {
 		// Header already read.
 		return nil
 	}
 newFrame:
 	var err error
 	if f.Magic, err = f.readUint32(src); err != nil {
 		return err
 	}
 	switch m := f.Magic; {
 	case m == frameMagic || m == frameMagicLegacy:
 	// All 16 values of frameSkipMagic are valid.
 	case m>>8 == frameSkipMagic>>8:
 		skip, err := f.readUint32(src)
 		if err != nil {
 			return err
 		}
 		if _, err := io.CopyN(ioutil.Discard, src, int64(skip)); err != nil {
 			return err
 		}
 		goto newFrame
 	default:
 		return lz4errors.ErrInvalidFrame
 	}
 	if err := f.Descriptor.initR(f, src); err != nil {
 		return err
 	}
 	f.checksum.Reset()
 	return nil
 }
 func (f *Frame) InitR(src io.Reader, num int) (chan []byte, error) {
 	return f.Blocks.initR(f, num, src)
 }
 func (f *Frame) CloseR(src io.Reader) (err error) {
 	if f.isLegacy() {
 		return nil
 	}
 	if !f.Descriptor.Flags.ContentChecksum() {
 		return nil
 	}
 	if f.Checksum, err = f.readUint32(src); err != nil {
 		return err
 	}
 	if c := f.checksum.Sum32(); c != f.Checksum {
 		return fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidFrameChecksum, c, f.Checksum)
 	}
 	return nil
 }
 type FrameDescriptor struct {
 	Flags       DescriptorFlags
 	ContentSize uint64
 	Checksum    uint8
 }
 func (fd *FrameDescriptor) initW() {
 	fd.Flags.VersionSet(1)
 	fd.Flags.BlockIndependenceSet(true)
 }
 func (fd *FrameDescriptor) Write(f *Frame, dst io.Writer) error {
 	if fd.Checksum > 0 {
 		// Header already written.
 		return nil
 	}
 	buf := f.buf[:4]
 	// Write the magic number here even though it belongs to the Frame.
 	binary.LittleEndian.PutUint32(buf, f.Magic)
 	if !f.isLegacy() {
 		buf = buf[:4+2]
 		binary.LittleEndian.PutUint16(buf[4:], uint16(fd.Flags))
 		if fd.Flags.Size() {
 			buf = buf[:4+2+8]
 			binary.LittleEndian.PutUint64(buf[4+2:], fd.ContentSize)
 		}
 		fd.Checksum = descriptorChecksum(buf[4:])
 		buf = append(buf, fd.Checksum)
 	}
 	_, err := dst.Write(buf)
 	return err
 }
 func (fd *FrameDescriptor) initR(f *Frame, src io.Reader) error {
 	if f.isLegacy() {
 		idx := lz4block.Index(lz4block.Block8Mb)
 		f.Descriptor.Flags.BlockSizeIndexSet(idx)
 		return nil
 	}
 	// Read the flags and the checksum, hoping that there is not content size.
 	buf := f.buf[:3]
 	if _, err := io.ReadFull(src, buf); err != nil {
 		return err
 	}
 	descr := binary.LittleEndian.Uint16(buf)
 	fd.Flags = DescriptorFlags(descr)
 	if fd.Flags.Size() {
 		// Append the 8 missing bytes.
 		buf = buf[:3+8]
 		if _, err := io.ReadFull(src, buf[3:]); err != nil {
 			return err
 		}
 		fd.ContentSize = binary.LittleEndian.Uint64(buf[2:])
 	}
 	fd.Checksum = buf[len(buf)-1] // the checksum is the last byte
 	buf = buf[:len(buf)-1]        // all descriptor fields except checksum
 	if c := descriptorChecksum(buf); fd.Checksum != c {
 		return fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidHeaderChecksum, c, fd.Checksum)
 	}
 	// Validate the elements that can be.
 	if idx := fd.Flags.BlockSizeIndex(); !idx.IsValid() {
 		return lz4errors.ErrOptionInvalidBlockSize
 	}
 	return nil
 }
 func descriptorChecksum(buf []byte) byte {
 	return byte(xxh32.ChecksumZero(buf) >> 8)
 }
--- a/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/frame_gen.go
+++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/frame_gen.go
@ -0,0 +1,103 @@
 // Code generated by `gen.exe`. DO NOT EDIT.
 package lz4stream
 import "github.com/pierrec/lz4/v4/internal/lz4block"
 // DescriptorFlags is defined as follow:
 //   field              bits
 //   -----              ----
 //   _                  2
 //   ContentChecksum    1
 //   Size               1
 //   BlockChecksum      1
 //   BlockIndependence  1
 //   Version            2
 //   _                  4
 //   BlockSizeIndex     3
 //   _                  1
 type DescriptorFlags uint16
 // Getters.
 func (x DescriptorFlags) ContentChecksum() bool   { return x>>2&1 != 0 }
 func (x DescriptorFlags) Size() bool              { return x>>3&1 != 0 }
 func (x DescriptorFlags) BlockChecksum() bool     { return x>>4&1 != 0 }
 func (x DescriptorFlags) BlockIndependence() bool { return x>>5&1 != 0 }
 func (x DescriptorFlags) Version() uint16         { return uint16(x >> 6 & 0x3) }
 func (x DescriptorFlags) BlockSizeIndex() lz4block.BlockSizeIndex {
 	return lz4block.BlockSizeIndex(x >> 12 & 0x7)
 }
 // Setters.
 func (x *DescriptorFlags) ContentChecksumSet(v bool) *DescriptorFlags {
 	const b = 1 << 2
 	if v {
 		*x = *x&^b | b
 	} else {
 		*x &^= b
 	}
 	return x
 }
 func (x *DescriptorFlags) SizeSet(v bool) *DescriptorFlags {
 	const b = 1 << 3
 	if v {
 		*x = *x&^b | b
 	} else {
 		*x &^= b
 	}
 	return x
 }
 func (x *DescriptorFlags) BlockChecksumSet(v bool) *DescriptorFlags {
 	const b = 1 << 4
 	if v {
 		*x = *x&^b | b
 	} else {
 		*x &^= b
 	}
 	return x
 }
 func (x *DescriptorFlags) BlockIndependenceSet(v bool) *DescriptorFlags {
 	const b = 1 << 5
 	if v {
 		*x = *x&^b | b
 	} else {
 		*x &^= b
 	}
 	return x
 }
 func (x *DescriptorFlags) VersionSet(v uint16) *DescriptorFlags {
 	*x = *x&^(0x3<<6) | (DescriptorFlags(v) & 0x3 << 6)
 	return x
 }
 func (x *DescriptorFlags) BlockSizeIndexSet(v lz4block.BlockSizeIndex) *DescriptorFlags {
 	*x = *x&^(0x7<<12) | (DescriptorFlags(v) & 0x7 << 12)
 	return x
 }
 // Code generated by `gen.exe`. DO NOT EDIT.
 // DataBlockSize is defined as follow:
 //   field         bits
 //   -----         ----
 //   size          31
 //   Uncompressed  1
 type DataBlockSize uint32
 // Getters.
 func (x DataBlockSize) size() int          { return int(x & 0x7FFFFFFF) }
 func (x DataBlockSize) Uncompressed() bool { return x>>31&1 != 0 }
 // Setters.
 func (x *DataBlockSize) sizeSet(v int) *DataBlockSize {
 	*x = *x&^0x7FFFFFFF | DataBlockSize(v)&0x7FFFFFFF
 	return x
 }
 func (x *DataBlockSize) UncompressedSet(v bool) *DataBlockSize {
 	const b = 1 << 31
 	if v {
 		*x = *x&^b | b
 	} else {
 		*x &^= b
 	}
 	return x
 }
--- a/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero.go
+++ b/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero.go
@ -0,0 +1,212 @@
 // Package xxh32 implements the very fast XXH hashing algorithm (32 bits version).
 // (ported from the reference implementation https://github.com/Cyan4973/xxHash/)
 package xxh32
 import (
 	"encoding/binary"
 )
 const (
 	prime1 uint32 = 2654435761
 	prime2 uint32 = 2246822519
 	prime3 uint32 = 3266489917
 	prime4 uint32 = 668265263
 	prime5 uint32 = 374761393
 	primeMask   = 0xFFFFFFFF
 	prime1plus2 = uint32((uint64(prime1) + uint64(prime2)) & primeMask) // 606290984
 	prime1minus = uint32((-int64(prime1)) & primeMask)                  // 1640531535
 )
 // XXHZero represents an xxhash32 object with seed 0.
 type XXHZero struct {
 	v        [4]uint32
 	totalLen uint64
 	buf      [16]byte
 	bufused  int
 }
 // Sum appends the current hash to b and returns the resulting slice.
 // It does not change the underlying hash state.
 func (xxh XXHZero) Sum(b []byte) []byte {
 	h32 := xxh.Sum32()
 	return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24))
 }
 // Reset resets the Hash to its initial state.
 func (xxh *XXHZero) Reset() {
 	xxh.v[0] = prime1plus2
 	xxh.v[1] = prime2
 	xxh.v[2] = 0
 	xxh.v[3] = prime1minus
 	xxh.totalLen = 0
 	xxh.bufused = 0
 }
 // Size returns the number of bytes returned by Sum().
 func (xxh *XXHZero) Size() int {
 	return 4
 }
 // BlockSizeIndex gives the minimum number of bytes accepted by Write().
 func (xxh *XXHZero) BlockSize() int {
 	return 1
 }
 // Write adds input bytes to the Hash.
 // It never returns an error.
 func (xxh *XXHZero) Write(input []byte) (int, error) {
 	if xxh.totalLen == 0 {
 		xxh.Reset()
 	}
 	n := len(input)
 	m := xxh.bufused
 	xxh.totalLen += uint64(n)
 	r := len(xxh.buf) - m
 	if n < r {
 		copy(xxh.buf[m:], input)
 		xxh.bufused += len(input)
 		return n, nil
 	}
 	var buf *[16]byte
 	if m != 0 {
 		// some data left from previous update
 		buf = &xxh.buf
 		c := copy(buf[m:], input)
 		n -= c
 		input = input[c:]
 	}
 	update(&xxh.v, buf, input)
 	xxh.bufused = copy(xxh.buf[:], input[n-n%16:])
 	return n, nil
 }
 // Portable version of update. This updates v by processing all of buf
 // (if not nil) and all full 16-byte blocks of input.
 func updateGo(v *[4]uint32, buf *[16]byte, input []byte) {
 	// Causes compiler to work directly from registers instead of stack:
 	v1, v2, v3, v4 := v[0], v[1], v[2], v[3]
 	if buf != nil {
 		v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime2) * prime1
 		v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime2) * prime1
 		v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime2) * prime1
 		v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime2) * prime1
 	}
 	for ; len(input) >= 16; input = input[16:] {
 		sub := input[:16] //BCE hint for compiler
 		v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
 		v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
 		v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
 		v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
 	}
 	v[0], v[1], v[2], v[3] = v1, v2, v3, v4
 }
 // Sum32 returns the 32 bits Hash value.
 func (xxh *XXHZero) Sum32() uint32 {
 	h32 := uint32(xxh.totalLen)
 	if h32 >= 16 {
 		h32 += rol1(xxh.v[0]) + rol7(xxh.v[1]) + rol12(xxh.v[2]) + rol18(xxh.v[3])
 	} else {
 		h32 += prime5
 	}
 	p := 0
 	n := xxh.bufused
 	buf := xxh.buf
 	for n := n - 4; p <= n; p += 4 {
 		h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime3
 		h32 = rol17(h32) * prime4
 	}
 	for ; p < n; p++ {
 		h32 += uint32(buf[p]) * prime5
 		h32 = rol11(h32) * prime1
 	}
 	h32 ^= h32 >> 15
 	h32 *= prime2
 	h32 ^= h32 >> 13
 	h32 *= prime3
 	h32 ^= h32 >> 16
 	return h32
 }
 // Portable version of ChecksumZero.
 func checksumZeroGo(input []byte) uint32 {
 	n := len(input)
 	h32 := uint32(n)
 	if n < 16 {
 		h32 += prime5
 	} else {
 		v1 := prime1plus2
 		v2 := prime2
 		v3 := uint32(0)
 		v4 := prime1minus
 		p := 0
 		for n := n - 16; p <= n; p += 16 {
 			sub := input[p:][:16] //BCE hint for compiler
 			v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
 			v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
 			v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
 			v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
 		}
 		input = input[p:]
 		n -= p
 		h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
 	}
 	p := 0
 	for n := n - 4; p <= n; p += 4 {
 		h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime3
 		h32 = rol17(h32) * prime4
 	}
 	for p < n {
 		h32 += uint32(input[p]) * prime5
 		h32 = rol11(h32) * prime1
 		p++
 	}
 	h32 ^= h32 >> 15
 	h32 *= prime2
 	h32 ^= h32 >> 13
 	h32 *= prime3
 	h32 ^= h32 >> 16
 	return h32
 }
 func rol1(u uint32) uint32 {
 	return u<<1 | u>>31
 }
 func rol7(u uint32) uint32 {
 	return u<<7 | u>>25
 }
 func rol11(u uint32) uint32 {
 	return u<<11 | u>>21
 }
 func rol12(u uint32) uint32 {
 	return u<<12 | u>>20
 }
 func rol13(u uint32) uint32 {
 	return u<<13 | u>>19
 }
 func rol17(u uint32) uint32 {
 	return u<<17 | u>>15
 }
 func rol18(u uint32) uint32 {
 	return u<<18 | u>>14
 }
--- a/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_arm.go
+++ b/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_arm.go
@ -0,0 +1,11 @@
 // +build !noasm
 package xxh32
 // ChecksumZero returns the 32-bit hash of input.
 //
 //go:noescape
 func ChecksumZero(input []byte) uint32
 //go:noescape
 func update(v *[4]uint32, buf *[16]byte, input []byte)
--- a/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_arm.s
+++ b/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_arm.s
@ -0,0 +1,251 @@
 // +build !noasm
 #include "go_asm.h"
 #include "textflag.h"
 // Register allocation.
 #define p	R0
 #define n	R1
 #define h	R2
 #define v1	R2	// Alias for h.
 #define v2	R3
 #define v3	R4
 #define v4	R5
 #define x1	R6
 #define x2	R7
 #define x3	R8
 #define x4	R9
 // We need the primes in registers. The 16-byte loop only uses prime{1,2}.
 #define prime1r	R11
 #define prime2r	R12
 #define prime3r	R3	// The rest can alias v{2-4}.
 #define prime4r	R4
 #define prime5r	R5
 // Update round macros. These read from and increment p.
 #define round16aligned			\
 	MOVM.IA.W (p), [x1, x2, x3, x4]	\
 					\
 	MULA x1, prime2r, v1, v1	\
 	MULA x2, prime2r, v2, v2	\
 	MULA x3, prime2r, v3, v3	\
 	MULA x4, prime2r, v4, v4	\
 					\
 	MOVW v1 @> 19, v1		\
 	MOVW v2 @> 19, v2		\
 	MOVW v3 @> 19, v3		\
 	MOVW v4 @> 19, v4		\
 					\
 	MUL prime1r, v1			\
 	MUL prime1r, v2			\
 	MUL prime1r, v3			\
 	MUL prime1r, v4			\
 #define round16unaligned 		\
 	MOVBU.P  16(p), x1		\
 	MOVBU   -15(p), x2		\
 	ORR     x2 <<  8, x1		\
 	MOVBU   -14(p), x3		\
 	MOVBU   -13(p), x4		\
 	ORR     x4 <<  8, x3		\
 	ORR     x3 << 16, x1		\
 					\
 	MULA x1, prime2r, v1, v1	\
 	MOVW v1 @> 19, v1		\
 	MUL prime1r, v1			\
 					\
 	MOVBU -12(p), x1		\
 	MOVBU -11(p), x2		\
 	ORR   x2 <<  8, x1		\
 	MOVBU -10(p), x3		\
 	MOVBU  -9(p), x4		\
 	ORR   x4 <<  8, x3		\
 	ORR   x3 << 16, x1		\
 					\
 	MULA x1, prime2r, v2, v2	\
 	MOVW v2 @> 19, v2		\
 	MUL prime1r, v2			\
 					\
 	MOVBU -8(p), x1			\
 	MOVBU -7(p), x2			\
 	ORR   x2 <<  8, x1		\
 	MOVBU -6(p), x3			\
 	MOVBU -5(p), x4			\
 	ORR   x4 <<  8, x3		\
 	ORR   x3 << 16, x1		\
 					\
 	MULA x1, prime2r, v3, v3	\
 	MOVW v3 @> 19, v3		\
 	MUL prime1r, v3			\
 					\
 	MOVBU -4(p), x1			\
 	MOVBU -3(p), x2			\
 	ORR   x2 <<  8, x1		\
 	MOVBU -2(p), x3			\
 	MOVBU -1(p), x4			\
 	ORR   x4 <<  8, x3		\
 	ORR   x3 << 16, x1		\
 					\
 	MULA x1, prime2r, v4, v4	\
 	MOVW v4 @> 19, v4		\
 	MUL prime1r, v4			\
 // func ChecksumZero([]byte) uint32
 TEXT ·ChecksumZero(SB), NOFRAME|NOSPLIT, $-4-16
 	MOVW input_base+0(FP), p
 	MOVW input_len+4(FP),  n
 	MOVW $const_prime1, prime1r
 	MOVW $const_prime2, prime2r
 	// Set up h for n < 16. It's tempting to say {ADD prime5, n, h}
 	// here, but that's a pseudo-op that generates a load through R11.
 	MOVW $const_prime5, prime5r
 	ADD  prime5r, n, h
 	CMP  $0, n
 	BEQ  end
 	// We let n go negative so we can do comparisons with SUB.S
 	// instead of separate CMP.
 	SUB.S $16, n
 	BMI   loop16done
 	ADD  prime1r, prime2r, v1
 	MOVW prime2r, v2
 	MOVW $0, v3
 	RSB  $0, prime1r, v4
 	TST $3, p
 	BNE loop16unaligned
 loop16aligned:
 	SUB.S $16, n
 	round16aligned
 	BPL loop16aligned
 	B   loop16finish
 loop16unaligned:
 	SUB.S $16, n
 	round16unaligned
 	BPL loop16unaligned
 loop16finish:
 	MOVW v1 @> 31, h
 	ADD  v2 @> 25, h
 	ADD  v3 @> 20, h
 	ADD  v4 @> 14, h
 	// h += len(input) with v2 as temporary.
 	MOVW input_len+4(FP), v2
 	ADD  v2, h
 loop16done:
 	ADD $16, n	// Restore number of bytes left.
 	SUB.S $4, n
 	MOVW  $const_prime3, prime3r
 	BMI   loop4done
 	MOVW  $const_prime4, prime4r
 	TST $3, p
 	BNE loop4unaligned
 loop4aligned:
 	SUB.S $4, n
 	MOVW.P 4(p), x1
 	MULA   prime3r, x1, h, h
 	MOVW   h @> 15, h
 	MUL    prime4r, h
 	BPL loop4aligned
 	B   loop4done
 loop4unaligned:
 	SUB.S $4, n
 	MOVBU.P  4(p), x1
 	MOVBU   -3(p), x2
 	ORR     x2 <<  8, x1
 	MOVBU   -2(p), x3
 	ORR     x3 << 16, x1
 	MOVBU   -1(p), x4
 	ORR     x4 << 24, x1
 	MULA prime3r, x1, h, h
 	MOVW h @> 15, h
 	MUL  prime4r, h
 	BPL loop4unaligned
 loop4done:
 	ADD.S $4, n	// Restore number of bytes left.
 	BEQ   end
 	MOVW $const_prime5, prime5r
 loop1:
 	SUB.S $1, n
 	MOVBU.P 1(p), x1
 	MULA    prime5r, x1, h, h
 	MOVW    h @> 21, h
 	MUL     prime1r, h
 	BNE loop1
 end:
 	MOVW $const_prime3, prime3r
 	EOR  h >> 15, h
 	MUL  prime2r, h
 	EOR  h >> 13, h
 	MUL  prime3r, h
 	EOR  h >> 16, h
 	MOVW h, ret+12(FP)
 	RET
 // func update(v *[4]uint64, buf *[16]byte, p []byte)
 TEXT ·update(SB), NOFRAME|NOSPLIT, $-4-20
 	MOVW    v+0(FP), p
 	MOVM.IA (p), [v1, v2, v3, v4]
 	MOVW $const_prime1, prime1r
 	MOVW $const_prime2, prime2r
 	// Process buf, if not nil.
 	MOVW buf+4(FP), p
 	CMP  $0, p
 	BEQ  noBuffered
 	round16aligned
 noBuffered:
 	MOVW input_base +8(FP), p
 	MOVW input_len +12(FP), n
 	SUB.S $16, n
 	BMI   end
 	TST $3, p
 	BNE loop16unaligned
 loop16aligned:
 	SUB.S $16, n
 	round16aligned
 	BPL loop16aligned
 	B   end
 loop16unaligned:
 	SUB.S $16, n
 	round16unaligned
 	BPL loop16unaligned
 end:
 	MOVW    v+0(FP), p
 	MOVM.IA [v1, v2, v3, v4], (p)
 	RET
--- a/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_other.go
+++ b/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_other.go
@ -0,0 +1,10 @@
 // +build !arm noasm
 package xxh32
 // ChecksumZero returns the 32-bit hash of input.
 func ChecksumZero(input []byte) uint32 { return checksumZeroGo(input) }
 func update(v *[4]uint32, buf *[16]byte, input []byte) {
 	updateGo(v, buf, input)
 }
--- a/vendor/github.com/pierrec/lz4/v4/lz4.go
+++ b/vendor/github.com/pierrec/lz4/v4/lz4.go
@ -0,0 +1,157 @@
 // Package lz4 implements reading and writing lz4 compressed data.
 //
 // The package supports both the LZ4 stream format,
 // as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html,
 // and the LZ4 block format, defined at
 // http://fastcompression.blogspot.fr/2011/05/lz4-explained.html.
 //
 // See https://github.com/lz4/lz4 for the reference C implementation.
 package lz4
 import (
 	"github.com/pierrec/lz4/v4/internal/lz4block"
 	"github.com/pierrec/lz4/v4/internal/lz4errors"
 )
 func _() {
 	// Safety checks for duplicated elements.
 	var x [1]struct{}
 	_ = x[lz4block.CompressionLevel(Fast)-lz4block.Fast]
 	_ = x[Block64Kb-BlockSize(lz4block.Block64Kb)]
 	_ = x[Block256Kb-BlockSize(lz4block.Block256Kb)]
 	_ = x[Block1Mb-BlockSize(lz4block.Block1Mb)]
 	_ = x[Block4Mb-BlockSize(lz4block.Block4Mb)]
 }
 // CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
 func CompressBlockBound(n int) int {
 	return lz4block.CompressBlockBound(n)
 }
 // UncompressBlock uncompresses the source buffer into the destination one,
 // and returns the uncompressed size.
 //
 // The destination buffer must be sized appropriately.
 //
 // An error is returned if the source data is invalid or the destination buffer is too small.
 func UncompressBlock(src, dst []byte) (int, error) {
 	return lz4block.UncompressBlock(src, dst, nil)
 }
 // UncompressBlockWithDict uncompresses the source buffer into the destination one using a
 // dictionary, and returns the uncompressed size.
 //
 // The destination buffer must be sized appropriately.
 //
 // An error is returned if the source data is invalid or the destination buffer is too small.
 func UncompressBlockWithDict(src, dst, dict []byte) (int, error) {
 	return lz4block.UncompressBlock(src, dst, dict)
 }
 // A Compressor compresses data into the LZ4 block format.
 // It uses a fast compression algorithm.
 //
 // A Compressor is not safe for concurrent use by multiple goroutines.
 //
 // Use a Writer to compress into the LZ4 stream format.
 type Compressor struct{ c lz4block.Compressor }
 // CompressBlock compresses the source buffer src into the destination dst.
 //
 // If compression is successful, the first return value is the size of the
 // compressed data, which is always >0.
 //
 // If dst has length at least CompressBlockBound(len(src)), compression always
 // succeeds. Otherwise, the first return value is zero. The error return is
 // non-nil if the compressed data does not fit in dst, but it might fit in a
 // larger buffer that is still smaller than CompressBlockBound(len(src)). The
 // return value (0, nil) means the data is likely incompressible and a buffer
 // of length CompressBlockBound(len(src)) should be passed in.
 func (c *Compressor) CompressBlock(src, dst []byte) (int, error) {
 	return c.c.CompressBlock(src, dst)
 }
 // CompressBlock compresses the source buffer into the destination one.
 // This is the fast version of LZ4 compression and also the default one.
 //
 // The argument hashTable is scratch space for a hash table used by the
 // compressor. If provided, it should have length at least 1<<16. If it is
 // shorter (or nil), CompressBlock allocates its own hash table.
 //
 // The size of the compressed data is returned.
 //
 // If the destination buffer size is lower than CompressBlockBound and
 // the compressed size is 0 and no error, then the data is incompressible.
 //
 // An error is returned if the destination buffer is too small.
 // CompressBlock is equivalent to Compressor.CompressBlock.
 // The final argument is ignored and should be set to nil.
 //
 // This function is deprecated. Use a Compressor instead.
 func CompressBlock(src, dst []byte, _ []int) (int, error) {
 	return lz4block.CompressBlock(src, dst)
 }
 // A CompressorHC compresses data into the LZ4 block format.
 // Its compression ratio is potentially better than that of a Compressor,
 // but it is also slower and requires more memory.
 //
 // A Compressor is not safe for concurrent use by multiple goroutines.
 //
 // Use a Writer to compress into the LZ4 stream format.
 type CompressorHC struct {
 	// Level is the maximum search depth for compression.
 	// Values <= 0 mean no maximum.
 	Level CompressionLevel
 	c     lz4block.CompressorHC
 }
 // CompressBlock compresses the source buffer src into the destination dst.
 //
 // If compression is successful, the first return value is the size of the
 // compressed data, which is always >0.
 //
 // If dst has length at least CompressBlockBound(len(src)), compression always
 // succeeds. Otherwise, the first return value is zero. The error return is
 // non-nil if the compressed data does not fit in dst, but it might fit in a
 // larger buffer that is still smaller than CompressBlockBound(len(src)). The
 // return value (0, nil) means the data is likely incompressible and a buffer
 // of length CompressBlockBound(len(src)) should be passed in.
 func (c *CompressorHC) CompressBlock(src, dst []byte) (int, error) {
 	return c.c.CompressBlock(src, dst, lz4block.CompressionLevel(c.Level))
 }
 // CompressBlockHC is equivalent to CompressorHC.CompressBlock.
 // The final two arguments are ignored and should be set to nil.
 //
 // This function is deprecated. Use a CompressorHC instead.
 func CompressBlockHC(src, dst []byte, depth CompressionLevel, _, _ []int) (int, error) {
 	return lz4block.CompressBlockHC(src, dst, lz4block.CompressionLevel(depth))
 }
 const (
 	// ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed
 	// block is corrupted or the destination buffer is not large enough for the uncompressed data.
 	ErrInvalidSourceShortBuffer = lz4errors.ErrInvalidSourceShortBuffer
 	// ErrInvalidFrame is returned when reading an invalid LZ4 archive.
 	ErrInvalidFrame = lz4errors.ErrInvalidFrame
 	// ErrInternalUnhandledState is an internal error.
 	ErrInternalUnhandledState = lz4errors.ErrInternalUnhandledState
 	// ErrInvalidHeaderChecksum is returned when reading a frame.
 	ErrInvalidHeaderChecksum = lz4errors.ErrInvalidHeaderChecksum
 	// ErrInvalidBlockChecksum is returned when reading a frame.
 	ErrInvalidBlockChecksum = lz4errors.ErrInvalidBlockChecksum
 	// ErrInvalidFrameChecksum is returned when reading a frame.
 	ErrInvalidFrameChecksum = lz4errors.ErrInvalidFrameChecksum
 	// ErrOptionInvalidCompressionLevel is returned when the supplied compression level is invalid.
 	ErrOptionInvalidCompressionLevel = lz4errors.ErrOptionInvalidCompressionLevel
 	// ErrOptionClosedOrError is returned when an option is applied to a closed or in error object.
 	ErrOptionClosedOrError = lz4errors.ErrOptionClosedOrError
 	// ErrOptionInvalidBlockSize is returned when
 	ErrOptionInvalidBlockSize = lz4errors.ErrOptionInvalidBlockSize
 	// ErrOptionNotApplicable is returned when trying to apply an option to an object not supporting it.
 	ErrOptionNotApplicable = lz4errors.ErrOptionNotApplicable
 	// ErrWriterNotClosed is returned when attempting to reset an unclosed writer.
 	ErrWriterNotClosed = lz4errors.ErrWriterNotClosed
 )
--- a/vendor/github.com/pierrec/lz4/v4/options.go
+++ b/vendor/github.com/pierrec/lz4/v4/options.go
@ -0,0 +1,214 @@
 package lz4
 import (
 	"fmt"
 	"reflect"
 	"runtime"
 	"github.com/pierrec/lz4/v4/internal/lz4block"
 	"github.com/pierrec/lz4/v4/internal/lz4errors"
 )
 //go:generate go run golang.org/x/tools/cmd/stringer -type=BlockSize,CompressionLevel -output options_gen.go
 type (
 	applier interface {
 		Apply(...Option) error
 		private()
 	}
 	// Option defines the parameters to setup an LZ4 Writer or Reader.
 	Option func(applier) error
 )
 // String returns a string representation of the option with its parameter(s).
 func (o Option) String() string {
 	return o(nil).Error()
 }
 // Default options.
 var (
 	DefaultBlockSizeOption = BlockSizeOption(Block4Mb)
 	DefaultChecksumOption  = ChecksumOption(true)
 	DefaultConcurrency     = ConcurrencyOption(1)
 	defaultOnBlockDone     = OnBlockDoneOption(nil)
 )
 const (
 	Block64Kb BlockSize = 1 << (16 + iota*2)
 	Block256Kb
 	Block1Mb
 	Block4Mb
 )
 // BlockSizeIndex defines the size of the blocks to be compressed.
 type BlockSize uint32
 // BlockSizeOption defines the maximum size of compressed blocks (default=Block4Mb).
 func BlockSizeOption(size BlockSize) Option {
 	return func(a applier) error {
 		switch w := a.(type) {
 		case nil:
 			s := fmt.Sprintf("BlockSizeOption(%s)", size)
 			return lz4errors.Error(s)
 		case *Writer:
 			size := uint32(size)
 			if !lz4block.IsValid(size) {
 				return fmt.Errorf("%w: %d", lz4errors.ErrOptionInvalidBlockSize, size)
 			}
 			w.frame.Descriptor.Flags.BlockSizeIndexSet(lz4block.Index(size))
 			return nil
 		}
 		return lz4errors.ErrOptionNotApplicable
 	}
 }
 // BlockChecksumOption enables or disables block checksum (default=false).
 func BlockChecksumOption(flag bool) Option {
 	return func(a applier) error {
 		switch w := a.(type) {
 		case nil:
 			s := fmt.Sprintf("BlockChecksumOption(%v)", flag)
 			return lz4errors.Error(s)
 		case *Writer:
 			w.frame.Descriptor.Flags.BlockChecksumSet(flag)
 			return nil
 		}
 		return lz4errors.ErrOptionNotApplicable
 	}
 }
 // ChecksumOption enables/disables all blocks or content checksum (default=true).
 func ChecksumOption(flag bool) Option {
 	return func(a applier) error {
 		switch w := a.(type) {
 		case nil:
 			s := fmt.Sprintf("ChecksumOption(%v)", flag)
 			return lz4errors.Error(s)
 		case *Writer:
 			w.frame.Descriptor.Flags.ContentChecksumSet(flag)
 			return nil
 		}
 		return lz4errors.ErrOptionNotApplicable
 	}
 }
 // SizeOption sets the size of the original uncompressed data (default=0). It is useful to know the size of the
 // whole uncompressed data stream.
 func SizeOption(size uint64) Option {
 	return func(a applier) error {
 		switch w := a.(type) {
 		case nil:
 			s := fmt.Sprintf("SizeOption(%d)", size)
 			return lz4errors.Error(s)
 		case *Writer:
 			w.frame.Descriptor.Flags.SizeSet(size > 0)
 			w.frame.Descriptor.ContentSize = size
 			return nil
 		}
 		return lz4errors.ErrOptionNotApplicable
 	}
 }
 // ConcurrencyOption sets the number of go routines used for compression.
 // If n <= 0, then the output of runtime.GOMAXPROCS(0) is used.
 func ConcurrencyOption(n int) Option {
 	if n <= 0 {
 		n = runtime.GOMAXPROCS(0)
 	}
 	return func(a applier) error {
 		switch rw := a.(type) {
 		case nil:
 			s := fmt.Sprintf("ConcurrencyOption(%d)", n)
 			return lz4errors.Error(s)
 		case *Writer:
 			rw.num = n
 			return nil
 		case *Reader:
 			rw.num = n
 			return nil
 		}
 		return lz4errors.ErrOptionNotApplicable
 	}
 }
 // CompressionLevel defines the level of compression to use. The higher the better, but slower, compression.
 type CompressionLevel uint32
 const (
 	Fast   CompressionLevel = 0
 	Level1 CompressionLevel = 1 << (8 + iota)
 	Level2
 	Level3
 	Level4
 	Level5
 	Level6
 	Level7
 	Level8
 	Level9
 )
 // CompressionLevelOption defines the compression level (default=Fast).
 func CompressionLevelOption(level CompressionLevel) Option {
 	return func(a applier) error {
 		switch w := a.(type) {
 		case nil:
 			s := fmt.Sprintf("CompressionLevelOption(%s)", level)
 			return lz4errors.Error(s)
 		case *Writer:
 			switch level {
 			case Fast, Level1, Level2, Level3, Level4, Level5, Level6, Level7, Level8, Level9:
 			default:
 				return fmt.Errorf("%w: %d", lz4errors.ErrOptionInvalidCompressionLevel, level)
 			}
 			w.level = lz4block.CompressionLevel(level)
 			return nil
 		}
 		return lz4errors.ErrOptionNotApplicable
 	}
 }
 func onBlockDone(int) {}
 // OnBlockDoneOption is triggered when a block has been processed. For a Writer, it is when is has been compressed,
 // for a Reader, it is when it has been uncompressed.
 func OnBlockDoneOption(handler func(size int)) Option {
 	if handler == nil {
 		handler = onBlockDone
 	}
 	return func(a applier) error {
 		switch rw := a.(type) {
 		case nil:
 			s := fmt.Sprintf("OnBlockDoneOption(%s)", reflect.TypeOf(handler).String())
 			return lz4errors.Error(s)
 		case *Writer:
 			rw.handler = handler
 			return nil
 		case *Reader:
 			rw.handler = handler
 			return nil
 		}
 		return lz4errors.ErrOptionNotApplicable
 	}
 }
 // LegacyOption provides support for writing LZ4 frames in the legacy format.
 //
 // See https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md#legacy-frame.
 //
 // NB. compressed Linux kernel images use a tweaked LZ4 legacy format where
 // the compressed stream is followed by the original (uncompressed) size of
 // the kernel (https://events.static.linuxfound.org/sites/events/files/lcjpcojp13_klee.pdf).
 // This is also supported as a special case.
 func LegacyOption(legacy bool) Option {
 	return func(a applier) error {
 		switch rw := a.(type) {
 		case nil:
 			s := fmt.Sprintf("LegacyOption(%v)", legacy)
 			return lz4errors.Error(s)
 		case *Writer:
 			rw.legacy = legacy
 			return nil
 		}
 		return lz4errors.ErrOptionNotApplicable
 	}
 }
--- a/vendor/github.com/pierrec/lz4/v4/options_gen.go
+++ b/vendor/github.com/pierrec/lz4/v4/options_gen.go
@ -0,0 +1,92 @@
 // Code generated by "stringer -type=BlockSize,CompressionLevel -output options_gen.go"; DO NOT EDIT.
 package lz4
 import "strconv"
 func _() {
 	// An "invalid array index" compiler error signifies that the constant values have changed.
 	// Re-run the stringer command to generate them again.
 	var x [1]struct{}
 	_ = x[Block64Kb-65536]
 	_ = x[Block256Kb-262144]
 	_ = x[Block1Mb-1048576]
 	_ = x[Block4Mb-4194304]
 }
 const (
 	_BlockSize_name_0 = "Block64Kb"
 	_BlockSize_name_1 = "Block256Kb"
 	_BlockSize_name_2 = "Block1Mb"
 	_BlockSize_name_3 = "Block4Mb"
 )
 func (i BlockSize) String() string {
 	switch {
 	case i == 65536:
 		return _BlockSize_name_0
 	case i == 262144:
 		return _BlockSize_name_1
 	case i == 1048576:
 		return _BlockSize_name_2
 	case i == 4194304:
 		return _BlockSize_name_3
 	default:
 		return "BlockSize(" + strconv.FormatInt(int64(i), 10) + ")"
 	}
 }
 func _() {
 	// An "invalid array index" compiler error signifies that the constant values have changed.
 	// Re-run the stringer command to generate them again.
 	var x [1]struct{}
 	_ = x[Fast-0]
 	_ = x[Level1-512]
 	_ = x[Level2-1024]
 	_ = x[Level3-2048]
 	_ = x[Level4-4096]
 	_ = x[Level5-8192]
 	_ = x[Level6-16384]
 	_ = x[Level7-32768]
 	_ = x[Level8-65536]
 	_ = x[Level9-131072]
 }
 const (
 	_CompressionLevel_name_0 = "Fast"
 	_CompressionLevel_name_1 = "Level1"
 	_CompressionLevel_name_2 = "Level2"
 	_CompressionLevel_name_3 = "Level3"
 	_CompressionLevel_name_4 = "Level4"
 	_CompressionLevel_name_5 = "Level5"
 	_CompressionLevel_name_6 = "Level6"
 	_CompressionLevel_name_7 = "Level7"
 	_CompressionLevel_name_8 = "Level8"
 	_CompressionLevel_name_9 = "Level9"
 )
 func (i CompressionLevel) String() string {
 	switch {
 	case i == 0:
 		return _CompressionLevel_name_0
 	case i == 512:
 		return _CompressionLevel_name_1
 	case i == 1024:
 		return _CompressionLevel_name_2
 	case i == 2048:
 		return _CompressionLevel_name_3
 	case i == 4096:
 		return _CompressionLevel_name_4
 	case i == 8192:
 		return _CompressionLevel_name_5
 	case i == 16384:
 		return _CompressionLevel_name_6
 	case i == 32768:
 		return _CompressionLevel_name_7
 	case i == 65536:
 		return _CompressionLevel_name_8
 	case i == 131072:
 		return _CompressionLevel_name_9
 	default:
 		return "CompressionLevel(" + strconv.FormatInt(int64(i), 10) + ")"
 	}
 }
--- a/vendor/github.com/pierrec/lz4/v4/reader.go
+++ b/vendor/github.com/pierrec/lz4/v4/reader.go
@ -0,0 +1,275 @@
 package lz4
 import (
 	"bytes"
 	"io"
 	"github.com/pierrec/lz4/v4/internal/lz4block"
 	"github.com/pierrec/lz4/v4/internal/lz4errors"
 	"github.com/pierrec/lz4/v4/internal/lz4stream"
 )
 var readerStates = []aState{
 	noState:     newState,
 	errorState:  newState,
 	newState:    readState,
 	readState:   closedState,
 	closedState: newState,
 }
 // NewReader returns a new LZ4 frame decoder.
 func NewReader(r io.Reader) *Reader {
 	return newReader(r, false)
 }
 func newReader(r io.Reader, legacy bool) *Reader {
 	zr := &Reader{frame: lz4stream.NewFrame()}
 	zr.state.init(readerStates)
 	_ = zr.Apply(DefaultConcurrency, defaultOnBlockDone)
 	zr.Reset(r)
 	return zr
 }
 // Reader allows reading an LZ4 stream.
 type Reader struct {
 	state   _State
 	src     io.Reader        // source reader
 	num     int              // concurrency level
 	frame   *lz4stream.Frame // frame being read
 	data    []byte           // block buffer allocated in non concurrent mode
 	reads   chan []byte      // pending data
 	idx     int              // size of pending data
 	handler func(int)
 	cum     uint32
 	dict    []byte
 }
 func (*Reader) private() {}
 func (r *Reader) Apply(options ...Option) (err error) {
 	defer r.state.check(&err)
 	switch r.state.state {
 	case newState:
 	case errorState:
 		return r.state.err
 	default:
 		return lz4errors.ErrOptionClosedOrError
 	}
 	for _, o := range options {
 		if err = o(r); err != nil {
 			return
 		}
 	}
 	return
 }
 // Size returns the size of the underlying uncompressed data, if set in the stream.
 func (r *Reader) Size() int {
 	switch r.state.state {
 	case readState, closedState:
 		if r.frame.Descriptor.Flags.Size() {
 			return int(r.frame.Descriptor.ContentSize)
 		}
 	}
 	return 0
 }
 func (r *Reader) isNotConcurrent() bool {
 	return r.num == 1
 }
 func (r *Reader) init() error {
 	err := r.frame.ParseHeaders(r.src)
 	if err != nil {
 		return err
 	}
 	if !r.frame.Descriptor.Flags.BlockIndependence() {
 		// We can't decompress dependent blocks concurrently.
 		// Instead of throwing an error to the user, silently drop concurrency
 		r.num = 1
 	}
 	data, err := r.frame.InitR(r.src, r.num)
 	if err != nil {
 		return err
 	}
 	r.reads = data
 	r.idx = 0
 	size := r.frame.Descriptor.Flags.BlockSizeIndex()
 	r.data = size.Get()
 	r.cum = 0
 	return nil
 }
 func (r *Reader) Read(buf []byte) (n int, err error) {
 	defer r.state.check(&err)
 	switch r.state.state {
 	case readState:
 	case closedState, errorState:
 		return 0, r.state.err
 	case newState:
 		// First initialization.
 		if err = r.init(); r.state.next(err) {
 			return
 		}
 	default:
 		return 0, r.state.fail()
 	}
 	for len(buf) > 0 {
 		var bn int
 		if r.idx == 0 {
 			if r.isNotConcurrent() {
 				bn, err = r.read(buf)
 			} else {
 				lz4block.Put(r.data)
 				r.data = <-r.reads
 				if len(r.data) == 0 {
 					// No uncompressed data: something went wrong or we are done.
 					err = r.frame.Blocks.ErrorR()
 				}
 			}
 			switch err {
 			case nil:
 			case io.EOF:
 				if er := r.frame.CloseR(r.src); er != nil {
 					err = er
 				}
 				lz4block.Put(r.data)
 				r.data = nil
 				return
 			default:
 				return
 			}
 		}
 		if bn == 0 {
 			// Fill buf with buffered data.
 			bn = copy(buf, r.data[r.idx:])
 			r.idx += bn
 			if r.idx == len(r.data) {
 				// All data read, get ready for the next Read.
 				r.idx = 0
 			}
 		}
 		buf = buf[bn:]
 		n += bn
 		r.handler(bn)
 	}
 	return
 }
 // read uncompresses the next block as follow:
 // - if buf has enough room, the block is uncompressed into it directly
 //   and the lenght of used space is returned
 // - else, the uncompress data is stored in r.data and 0 is returned
 func (r *Reader) read(buf []byte) (int, error) {
 	block := r.frame.Blocks.Block
 	_, err := block.Read(r.frame, r.src, r.cum)
 	if err != nil {
 		return 0, err
 	}
 	var direct bool
 	dst := r.data[:cap(r.data)]
 	if len(buf) >= len(dst) {
 		// Uncompress directly into buf.
 		direct = true
 		dst = buf
 	}
 	dst, err = block.Uncompress(r.frame, dst, r.dict, true)
 	if err != nil {
 		return 0, err
 	}
 	if !r.frame.Descriptor.Flags.BlockIndependence() {
 		if len(r.dict)+len(dst) > 128*1024 {
 			preserveSize := 64*1024 - len(dst)
 			if preserveSize < 0 {
 				preserveSize = 0
 			}
 			r.dict = r.dict[len(r.dict)-preserveSize:]
 		}
 		r.dict = append(r.dict, dst...)
 	}
 	r.cum += uint32(len(dst))
 	if direct {
 		return len(dst), nil
 	}
 	r.data = dst
 	return 0, nil
 }
 // Reset clears the state of the Reader r such that it is equivalent to its
 // initial state from NewReader, but instead reading from reader.
 // No access to reader is performed.
 func (r *Reader) Reset(reader io.Reader) {
 	if r.data != nil {
 		lz4block.Put(r.data)
 		r.data = nil
 	}
 	r.frame.Reset(r.num)
 	r.state.reset()
 	r.src = reader
 	r.reads = nil
 }
 // WriteTo efficiently uncompresses the data from the Reader underlying source to w.
 func (r *Reader) WriteTo(w io.Writer) (n int64, err error) {
 	switch r.state.state {
 	case closedState, errorState:
 		return 0, r.state.err
 	case newState:
 		if err = r.init(); r.state.next(err) {
 			return
 		}
 	default:
 		return 0, r.state.fail()
 	}
 	defer r.state.nextd(&err)
 	var data []byte
 	if r.isNotConcurrent() {
 		size := r.frame.Descriptor.Flags.BlockSizeIndex()
 		data = size.Get()
 		defer lz4block.Put(data)
 	}
 	for {
 		var bn int
 		var dst []byte
 		if r.isNotConcurrent() {
 			bn, err = r.read(data)
 			dst = data[:bn]
 		} else {
 			lz4block.Put(dst)
 			dst = <-r.reads
 			bn = len(dst)
 			if bn == 0 {
 				// No uncompressed data: something went wrong or we are done.
 				err = r.frame.Blocks.ErrorR()
 			}
 		}
 		switch err {
 		case nil:
 		case io.EOF:
 			err = r.frame.CloseR(r.src)
 			return
 		default:
 			return
 		}
 		r.handler(bn)
 		bn, err = w.Write(dst)
 		n += int64(bn)
 		if err != nil {
 			return
 		}
 	}
 }
 // ValidFrameHeader returns a bool indicating if the given bytes slice matches a LZ4 header.
 func ValidFrameHeader(in []byte) (bool, error) {
 	f := lz4stream.NewFrame()
 	err := f.ParseHeaders(bytes.NewReader(in))
 	if err == nil {
 		return true, nil
 	}
 	if err == lz4errors.ErrInvalidFrame {
 		return false, nil
 	}
 	return false, err
 }
--- a/vendor/github.com/pierrec/lz4/v4/state.go
+++ b/vendor/github.com/pierrec/lz4/v4/state.go
@ -0,0 +1,75 @@
 package lz4
 import (
 	"errors"
 	"fmt"
 	"io"
 	"github.com/pierrec/lz4/v4/internal/lz4errors"
 )
 //go:generate go run golang.org/x/tools/cmd/stringer -type=aState -output state_gen.go
 const (
 	noState     aState = iota // uninitialized reader
 	errorState                // unrecoverable error encountered
 	newState                  // instantiated object
 	readState                 // reading data
 	writeState                // writing data
 	closedState               // all done
 )
 type (
 	aState uint8
 	_State struct {
 		states []aState
 		state  aState
 		err    error
 	}
 )
 func (s *_State) init(states []aState) {
 	s.states = states
 	s.state = states[0]
 }
 func (s *_State) reset() {
 	s.state = s.states[0]
 	s.err = nil
 }
 // next sets the state to the next one unless it is passed a non nil error.
 // It returns whether or not it is in error.
 func (s *_State) next(err error) bool {
 	if err != nil {
 		s.err = fmt.Errorf("%s: %w", s.state, err)
 		s.state = errorState
 		return true
 	}
 	s.state = s.states[s.state]
 	return false
 }
 // nextd is like next but for defers.
 func (s *_State) nextd(errp *error) bool {
 	return errp != nil && s.next(*errp)
 }
 // check sets s in error if not already in error and if the error is not nil or io.EOF,
 func (s *_State) check(errp *error) {
 	if s.state == errorState || errp == nil {
 		return
 	}
 	if err := *errp; err != nil {
 		s.err = fmt.Errorf("%w[%s]", err, s.state)
 		if !errors.Is(err, io.EOF) {
 			s.state = errorState
 		}
 	}
 }
 func (s *_State) fail() error {
 	s.state = errorState
 	s.err = fmt.Errorf("%w[%s]", lz4errors.ErrInternalUnhandledState, s.state)
 	return s.err
 }
--- a/vendor/github.com/pierrec/lz4/v4/state_gen.go
+++ b/vendor/github.com/pierrec/lz4/v4/state_gen.go
@ -0,0 +1,28 @@
 // Code generated by "stringer -type=aState -output state_gen.go"; DO NOT EDIT.
 package lz4
 import "strconv"
 func _() {
 	// An "invalid array index" compiler error signifies that the constant values have changed.
 	// Re-run the stringer command to generate them again.
 	var x [1]struct{}
 	_ = x[noState-0]
 	_ = x[errorState-1]
 	_ = x[newState-2]
 	_ = x[readState-3]
 	_ = x[writeState-4]
 	_ = x[closedState-5]
 }
 const _aState_name = "noStateerrorStatenewStatereadStatewriteStateclosedState"
 var _aState_index = [...]uint8{0, 7, 17, 25, 34, 44, 55}
 func (i aState) String() string {
 	if i >= aState(len(_aState_index)-1) {
 		return "aState(" + strconv.FormatInt(int64(i), 10) + ")"
 	}
 	return _aState_name[_aState_index[i]:_aState_index[i+1]]
 }
--- a/vendor/github.com/pierrec/lz4/v4/writer.go
+++ b/vendor/github.com/pierrec/lz4/v4/writer.go
@ -0,0 +1,242 @@
 package lz4
 import (
 	"io"
 	"github.com/pierrec/lz4/v4/internal/lz4block"
 	"github.com/pierrec/lz4/v4/internal/lz4errors"
 	"github.com/pierrec/lz4/v4/internal/lz4stream"
 )
 var writerStates = []aState{
 	noState:     newState,
 	newState:    writeState,
 	writeState:  closedState,
 	closedState: newState,
 	errorState:  newState,
 }
 // NewWriter returns a new LZ4 frame encoder.
 func NewWriter(w io.Writer) *Writer {
 	zw := &Writer{frame: lz4stream.NewFrame()}
 	zw.state.init(writerStates)
 	_ = zw.Apply(DefaultBlockSizeOption, DefaultChecksumOption, DefaultConcurrency, defaultOnBlockDone)
 	zw.Reset(w)
 	return zw
 }
 // Writer allows writing an LZ4 stream.
 type Writer struct {
 	state   _State
 	src     io.Writer                 // destination writer
 	level   lz4block.CompressionLevel // how hard to try
 	num     int                       // concurrency level
 	frame   *lz4stream.Frame          // frame being built
 	data    []byte                    // pending data
 	idx     int                       // size of pending data
 	handler func(int)
 	legacy  bool
 }
 func (*Writer) private() {}
 func (w *Writer) Apply(options ...Option) (err error) {
 	defer w.state.check(&err)
 	switch w.state.state {
 	case newState:
 	case errorState:
 		return w.state.err
 	default:
 		return lz4errors.ErrOptionClosedOrError
 	}
 	w.Reset(w.src)
 	for _, o := range options {
 		if err = o(w); err != nil {
 			return
 		}
 	}
 	return
 }
 func (w *Writer) isNotConcurrent() bool {
 	return w.num == 1
 }
 // init sets up the Writer when in newState. It does not change the Writer state.
 func (w *Writer) init() error {
 	w.frame.InitW(w.src, w.num, w.legacy)
 	size := w.frame.Descriptor.Flags.BlockSizeIndex()
 	w.data = size.Get()
 	w.idx = 0
 	return w.frame.Descriptor.Write(w.frame, w.src)
 }
 func (w *Writer) Write(buf []byte) (n int, err error) {
 	defer w.state.check(&err)
 	switch w.state.state {
 	case writeState:
 	case closedState, errorState:
 		return 0, w.state.err
 	case newState:
 		if err = w.init(); w.state.next(err) {
 			return
 		}
 	default:
 		return 0, w.state.fail()
 	}
 	zn := len(w.data)
 	for len(buf) > 0 {
 		if w.isNotConcurrent() && w.idx == 0 && len(buf) >= zn {
 			// Avoid a copy as there is enough data for a block.
 			if err = w.write(buf[:zn], false); err != nil {
 				return
 			}
 			n += zn
 			buf = buf[zn:]
 			continue
 		}
 		// Accumulate the data to be compressed.
 		m := copy(w.data[w.idx:], buf)
 		n += m
 		w.idx += m
 		buf = buf[m:]
 		if w.idx < len(w.data) {
 			// Buffer not filled.
 			return
 		}
 		// Buffer full.
 		if err = w.write(w.data, true); err != nil {
 			return
 		}
 		if !w.isNotConcurrent() {
 			size := w.frame.Descriptor.Flags.BlockSizeIndex()
 			w.data = size.Get()
 		}
 		w.idx = 0
 	}
 	return
 }
 func (w *Writer) write(data []byte, safe bool) error {
 	if w.isNotConcurrent() {
 		block := w.frame.Blocks.Block
 		err := block.Compress(w.frame, data, w.level).Write(w.frame, w.src)
 		w.handler(len(block.Data))
 		return err
 	}
 	c := make(chan *lz4stream.FrameDataBlock)
 	w.frame.Blocks.Blocks <- c
 	go func(c chan *lz4stream.FrameDataBlock, data []byte, safe bool) {
 		b := lz4stream.NewFrameDataBlock(w.frame)
 		c <- b.Compress(w.frame, data, w.level)
 		<-c
 		w.handler(len(b.Data))
 		b.Close(w.frame)
 		if safe {
 			// safe to put it back as the last usage of it was FrameDataBlock.Write() called before c is closed
 			lz4block.Put(data)
 		}
 	}(c, data, safe)
 	return nil
 }
 // Flush any buffered data to the underlying writer immediately.
 func (w *Writer) Flush() (err error) {
 	switch w.state.state {
 	case writeState:
 	case errorState:
 		return w.state.err
 	case newState:
 		if err = w.init(); w.state.next(err) {
 			return
 		}
 	default:
 		return nil
 	}
 	if w.idx > 0 {
 		// Flush pending data, disable w.data freeing as it is done later on.
 		if err = w.write(w.data[:w.idx], false); err != nil {
 			return err
 		}
 		w.idx = 0
 	}
 	return nil
 }
 // Close closes the Writer, flushing any unwritten data to the underlying writer
 // without closing it.
 func (w *Writer) Close() error {
 	if err := w.Flush(); err != nil {
 		return err
 	}
 	err := w.frame.CloseW(w.src, w.num)
 	// It is now safe to free the buffer.
 	if w.data != nil {
 		lz4block.Put(w.data)
 		w.data = nil
 	}
 	return err
 }
 // Reset clears the state of the Writer w such that it is equivalent to its
 // initial state from NewWriter, but instead writing to writer.
 // Reset keeps the previous options unless overwritten by the supplied ones.
 // No access to writer is performed.
 //
 // w.Close must be called before Reset or pending data may be dropped.
 func (w *Writer) Reset(writer io.Writer) {
 	w.frame.Reset(w.num)
 	w.state.reset()
 	w.src = writer
 }
 // ReadFrom efficiently reads from r and compressed into the Writer destination.
 func (w *Writer) ReadFrom(r io.Reader) (n int64, err error) {
 	switch w.state.state {
 	case closedState, errorState:
 		return 0, w.state.err
 	case newState:
 		if err = w.init(); w.state.next(err) {
 			return
 		}
 	default:
 		return 0, w.state.fail()
 	}
 	defer w.state.check(&err)
 	size := w.frame.Descriptor.Flags.BlockSizeIndex()
 	var done bool
 	var rn int
 	data := size.Get()
 	if w.isNotConcurrent() {
 		// Keep the same buffer for the whole process.
 		defer lz4block.Put(data)
 	}
 	for !done {
 		rn, err = io.ReadFull(r, data)
 		switch err {
 		case nil:
 		case io.EOF, io.ErrUnexpectedEOF: // read may be partial
 			done = true
 		default:
 			return
 		}
 		n += int64(rn)
 		err = w.write(data[:rn], true)
 		if err != nil {
 			return
 		}
 		w.handler(rn)
 		if !done && !w.isNotConcurrent() {
 			// The buffer will be returned automatically by go routines (safe=true)
 			// so get a new one fo the next round.
 			data = size.Get()
 		}
 	}
 	return
 }
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -8,6 +8,9 @@ github.com/alecthomas/chroma/v2/styles
 ## explicit; go 1.13
 github.com/dlclark/regexp2
 github.com/dlclark/regexp2/syntax
 # github.com/golang/snappy v0.0.4
 ## explicit
 github.com/golang/snappy
 # github.com/inconshreveable/mousetrap v1.1.0
 ## explicit; go 1.18
 github.com/inconshreveable/mousetrap
@ -23,6 +26,13 @@ github.com/klauspost/compress/internal/cpuinfo
 github.com/klauspost/compress/internal/snapref
 github.com/klauspost/compress/zstd
 github.com/klauspost/compress/zstd/internal/xxhash
 # github.com/pierrec/lz4/v4 v4.1.19
 ## explicit; go 1.14
 github.com/pierrec/lz4/v4
 github.com/pierrec/lz4/v4/internal/lz4block
 github.com/pierrec/lz4/v4/internal/lz4errors
 github.com/pierrec/lz4/v4/internal/lz4stream
 github.com/pierrec/lz4/v4/internal/xxh32
 # github.com/spf13/cobra v1.8.0
 ## explicit; go 1.15
 github.com/spf13/cobra