diff options
Diffstat (limited to 'vendor/github.com')
65 files changed, 10452 insertions, 3628 deletions
diff --git a/vendor/github.com/cornelk/hashmap/.codecov.yml b/vendor/github.com/cornelk/hashmap/.codecov.yml new file mode 100644 index 000000000..b9ca27e34 --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/.codecov.yml @@ -0,0 +1,6 @@ +coverage: + status: + project: + default: + target: 70% + threshold: 5% diff --git a/vendor/github.com/cornelk/hashmap/.gitignore b/vendor/github.com/cornelk/hashmap/.gitignore new file mode 100644 index 000000000..38ecb5dc2 --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/.gitignore @@ -0,0 +1,14 @@ +*.exe +.idea +.vscode +*.iml +*.local +/*.log +*.out +*.prof +*.test +.DS_Store +*.dmp +*.db + +.testCoverage diff --git a/vendor/github.com/cornelk/hashmap/.golangci.yml b/vendor/github.com/cornelk/hashmap/.golangci.yml new file mode 100644 index 000000000..0c29842d6 --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/.golangci.yml @@ -0,0 +1,68 @@ +run: + deadline: 5m + +linters: + enable: + - asasalint # check for pass []any as any in variadic func(...any) + - asciicheck # Simple linter to check that your code does not contain non-ASCII identifiers + - bidichk # Checks for dangerous unicode character sequences + - containedctx # detects struct contained context.Context field + - contextcheck # check the function whether use a non-inherited context + - cyclop # checks function and package cyclomatic complexity + - decorder # check declaration order and count of types, constants, variables and functions + - depguard # Go linter that checks if package imports are in a list of acceptable packages + - dogsled # Checks assignments with too many blank identifiers (e.g. x, _, _, _, := f()) + - durationcheck # check for two durations multiplied together + - errcheck # checking for unchecked errors + - errname # Checks that errors are prefixed with the `Err` and error types are suffixed with the `Error` + - errorlint # finds code that will cause problems with the error wrapping scheme introduced in Go 1.13 + - exportloopref # checks for pointers to enclosing loop variables + - funlen # Tool for detection of long functions + - gci # controls golang package import order and makes it always deterministic + - gocognit # Computes and checks the cognitive complexity of functions + - gocritic # Provides diagnostics that check for bugs, performance and style issues + - gocyclo # Computes and checks the cyclomatic complexity of functions + - godot # Check if comments end in a period + - goerr113 # Golang linter to check the errors handling expressions + - gosimple # Linter for Go source code that specializes in simplifying a code + - govet # reports suspicious constructs, such as Printf calls with wrong arguments + - ineffassign # Detects when assignments to existing variables are not used + - maintidx # measures the maintainability index of each function + - makezero # Finds slice declarations with non-zero initial length + - misspell # Finds commonly misspelled English words in comments + - nakedret # Finds naked returns in functions + - nestif # Reports deeply nested if statements + - nilerr # Finds the code that returns nil even if it checks that the error is not nil + - nilnil # Checks that there is no simultaneous return of `nil` error and an invalid value + - prealloc # Finds slice declarations that could potentially be preallocated + - predeclared # find code that shadows one of Go's predeclared identifiers + - revive # drop-in replacement of golint + - staticcheck # drop-in replacement of go vet + - stylecheck # Stylecheck is a replacement for golint + - tenv # detects using os.Setenv instead of t.Setenv + - thelper # checks the consistency of test helpers + - tparallel # detects inappropriate usage of t.Parallel() + - typecheck # parses and type-checks Go code + - unconvert # Remove unnecessary type conversions + - unparam # Reports unused function parameters + - unused # Checks Go code for unused constants, variables, functions and types + - usestdlibvars # detect the possibility to use variables/constants from the Go standard library + - wastedassign # finds wasted assignment statements + - whitespace # detects leading and trailing whitespace + +linters-settings: + cyclop: + max-complexity: 15 + gocritic: + disabled-checks: + - newDeref + govet: + disable: + - unsafeptr + +issues: + exclude-use-default: false + exclude-rules: + - linters: + - goerr113 + text: "do not define dynamic errors" diff --git a/vendor/github.com/cornelk/hashmap/LICENSE b/vendor/github.com/cornelk/hashmap/LICENSE new file mode 100644 index 000000000..e034cdf25 --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright cornelk + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/cornelk/hashmap/Makefile b/vendor/github.com/cornelk/hashmap/Makefile new file mode 100644 index 000000000..9bab5c4dd --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/Makefile @@ -0,0 +1,25 @@ +help: ## show help, shown by default if no target is specified + @grep -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +lint: ## run code linters + golangci-lint run + +benchmark: ## run benchmarks + cd benchmarks && perflock go test -cpu 8 -run=^# -bench=. + +benchmark-perflock: ## run benchmarks using perflock - https://github.com/aclements/perflock + cd benchmarks && perflock -governor 80% go test -count 3 -cpu 8 -run=^# -bench=. + +test: ## run tests + go test -race ./... + GOARCH=386 go test ./... + +test-coverage: ## run unit tests and create test coverage + go test ./... -coverprofile .testCoverage -covermode=atomic -coverpkg=./... + +test-coverage-web: test-coverage ## run unit tests and show test coverage in browser + go tool cover -func .testCoverage | grep total | awk '{print "Total coverage: "$$3}' + go tool cover -html=.testCoverage + +install-linters: ## install all used linters + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $$(go env GOPATH)/bin v1.49.0 diff --git a/vendor/github.com/cornelk/hashmap/README.md b/vendor/github.com/cornelk/hashmap/README.md new file mode 100644 index 000000000..955eb5816 --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/README.md @@ -0,0 +1,88 @@ +# hashmap + +[](https://github.com/cornelk/hashmap/actions) +[](https://pkg.go.dev/github.com/cornelk/hashmap) +[](https://goreportcard.com/report/github.com/cornelk/hashmap) +[](https://codecov.io/gh/cornelk/hashmap) + +## Overview + +A Golang lock-free thread-safe HashMap optimized for fastest read access. + +It is not a general-use HashMap and currently has slow write performance for write heavy uses. + +The minimal supported Golang version is 1.19 as it makes use of Generics and the new atomic package helpers. + +## Usage + +Example uint8 key map uses: + +``` +m := New[uint8, int]() +m.Set(1, 123) +value, ok := m.Get(1) +``` + +Example string key map uses: + +``` +m := New[string, int]() +m.Set("amount", 123) +value, ok := m.Get("amount") +``` + +Using the map to count URL requests: +``` +m := New[string, *int64]() +var i int64 +counter, _ := m.GetOrInsert("api/123", &i) +atomic.AddInt64(counter, 1) // increase counter +... +count := atomic.LoadInt64(counter) // read counter +``` + +## Benchmarks + +Reading from the hash map for numeric key types in a thread-safe way is faster than reading from a standard Golang map +in an unsafe way and four times faster than Golang's `sync.Map`: + +``` +BenchmarkReadHashMapUint-8 1774460 677.3 ns/op +BenchmarkReadHaxMapUint-8 1758708 679.0 ns/op +BenchmarkReadGoMapUintUnsafe-8 1497732 790.9 ns/op +BenchmarkReadGoMapUintMutex-8 41562 28672 ns/op +BenchmarkReadGoSyncMapUint-8 454401 2646 ns/op +``` + +Reading from the map while writes are happening: +``` +BenchmarkReadHashMapWithWritesUint-8 1388560 859.1 ns/op +BenchmarkReadHaxMapWithWritesUint-8 1306671 914.5 ns/op +BenchmarkReadGoSyncMapWithWritesUint-8 335732 3113 ns/op +``` + +Write performance without any concurrent reads: + +``` +BenchmarkWriteHashMapUint-8 54756 21977 ns/op +BenchmarkWriteGoMapMutexUint-8 83907 14827 ns/op +BenchmarkWriteGoSyncMapUint-8 16983 70305 ns/op +``` + +The benchmarks were run with Golang 1.19.0 on Linux and AMD64 using `make benchmark`. + +## Technical details + +* Technical design decisions have been made based on benchmarks that are stored in an external repository: + [go-benchmark](https://github.com/cornelk/go-benchmark) + +* The library uses a sorted linked list and a slice as an index into that list. + +* The Get() function contains helper functions that have been inlined manually until the Golang compiler will inline them automatically. + +* It optimizes the slice access by circumventing the Golang size check when reading from the slice. + Once a slice is allocated, the size of it does not change. + The library limits the index into the slice, therefore the Golang size check is obsolete. + When the slice reaches a defined fill rate, a bigger slice is allocated and all keys are recalculated and transferred into the new slice. + +* For hashing, specialized xxhash implementations are used that match the size of the key type where available diff --git a/vendor/github.com/cornelk/hashmap/defines.go b/vendor/github.com/cornelk/hashmap/defines.go new file mode 100644 index 000000000..75f0e9eb3 --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/defines.go @@ -0,0 +1,12 @@ +package hashmap + +// defaultSize is the default size for a map. +const defaultSize = 8 + +// maxFillRate is the maximum fill rate for the slice before a resize will happen. +const maxFillRate = 50 + +// support all numeric and string types and aliases of those. +type hashable interface { + ~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | ~float32 | ~float64 | ~string +} diff --git a/vendor/github.com/cornelk/hashmap/hashmap.go b/vendor/github.com/cornelk/hashmap/hashmap.go new file mode 100644 index 000000000..dbceb52b7 --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/hashmap.go @@ -0,0 +1,348 @@ +// Package hashmap provides a lock-free and thread-safe HashMap. +package hashmap + +import ( + "bytes" + "fmt" + "reflect" + "strconv" + "sync/atomic" + "unsafe" +) + +// Map implements a read optimized hash map. +type Map[Key hashable, Value any] struct { + hasher func(Key) uintptr + store atomic.Pointer[store[Key, Value]] // pointer to a map instance that gets replaced if the map resizes + linkedList *List[Key, Value] // key sorted linked list of elements + // resizing marks a resizing operation in progress. + // this is using uintptr instead of atomic.Bool to avoid using 32 bit int on 64 bit systems + resizing atomic.Uintptr +} + +// New returns a new map instance. +func New[Key hashable, Value any]() *Map[Key, Value] { + return NewSized[Key, Value](defaultSize) +} + +// NewSized returns a new map instance with a specific initialization size. +func NewSized[Key hashable, Value any](size uintptr) *Map[Key, Value] { + m := &Map[Key, Value]{} + m.allocate(size) + m.setDefaultHasher() + return m +} + +// SetHasher sets a custom hasher. +func (m *Map[Key, Value]) SetHasher(hasher func(Key) uintptr) { + m.hasher = hasher +} + +// Len returns the number of elements within the map. +func (m *Map[Key, Value]) Len() int { + return m.linkedList.Len() +} + +// Get retrieves an element from the map under given hash key. +func (m *Map[Key, Value]) Get(key Key) (Value, bool) { + hash := m.hasher(key) + + for element := m.store.Load().item(hash); element != nil; element = element.Next() { + if element.keyHash == hash && element.key == key { + return element.Value(), true + } + + if element.keyHash > hash { + return *new(Value), false + } + } + return *new(Value), false +} + +// GetOrInsert returns the existing value for the key if present. +// Otherwise, it stores and returns the given value. +// The returned bool is true if the value was loaded, false if stored. +func (m *Map[Key, Value]) GetOrInsert(key Key, value Value) (Value, bool) { + hash := m.hasher(key) + var newElement *ListElement[Key, Value] + + for { + for element := m.store.Load().item(hash); element != nil; element = element.Next() { + if element.keyHash == hash && element.key == key { + actual := element.Value() + return actual, true + } + + if element.keyHash > hash { + break + } + } + + if newElement == nil { // allocate only once + newElement = &ListElement[Key, Value]{ + key: key, + keyHash: hash, + } + newElement.value.Store(&value) + } + + if m.insertElement(newElement, hash, key, value) { + return value, false + } + } +} + +// FillRate returns the fill rate of the map as a percentage integer. +func (m *Map[Key, Value]) FillRate() int { + store := m.store.Load() + count := int(store.count.Load()) + l := len(store.index) + return (count * 100) / l +} + +// Del deletes the key from the map and returns whether the key was deleted. +func (m *Map[Key, Value]) Del(key Key) bool { + hash := m.hasher(key) + store := m.store.Load() + element := store.item(hash) + + for ; element != nil; element = element.Next() { + if element.keyHash == hash && element.key == key { + m.deleteElement(element) + m.linkedList.Delete(element) + return true + } + + if element.keyHash > hash { + return false + } + } + return false +} + +// Insert sets the value under the specified key to the map if it does not exist yet. +// If a resizing operation is happening concurrently while calling Insert, the item might show up in the map +// after the resize operation is finished. +// Returns true if the item was inserted or false if it existed. +func (m *Map[Key, Value]) Insert(key Key, value Value) bool { + hash := m.hasher(key) + var ( + existed, inserted bool + element *ListElement[Key, Value] + ) + + for { + store := m.store.Load() + searchStart := store.item(hash) + + if !inserted { // if retrying after insert during grow, do not add to list again + element, existed, inserted = m.linkedList.Add(searchStart, hash, key, value) + if existed { + return false + } + if !inserted { + continue // a concurrent add did interfere, try again + } + } + + count := store.addItem(element) + currentStore := m.store.Load() + if store != currentStore { // retry insert in case of insert during grow + continue + } + + if m.isResizeNeeded(store, count) && m.resizing.CompareAndSwap(0, 1) { + go m.grow(0, true) + } + return true + } +} + +// Set sets the value under the specified key to the map. An existing item for this key will be overwritten. +// If a resizing operation is happening concurrently while calling Set, the item might show up in the map +// after the resize operation is finished. +func (m *Map[Key, Value]) Set(key Key, value Value) { + hash := m.hasher(key) + + for { + store := m.store.Load() + searchStart := store.item(hash) + + element, added := m.linkedList.AddOrUpdate(searchStart, hash, key, value) + if !added { + continue // a concurrent add did interfere, try again + } + + count := store.addItem(element) + currentStore := m.store.Load() + if store != currentStore { // retry insert in case of insert during grow + continue + } + + if m.isResizeNeeded(store, count) && m.resizing.CompareAndSwap(0, 1) { + go m.grow(0, true) + } + return + } +} + +// Grow resizes the map to a new size, the size gets rounded up to next power of 2. +// To double the size of the map use newSize 0. +// This function returns immediately, the resize operation is done in a goroutine. +// No resizing is done in case of another resize operation already being in progress. +func (m *Map[Key, Value]) Grow(newSize uintptr) { + if m.resizing.CompareAndSwap(0, 1) { + go m.grow(newSize, true) + } +} + +// String returns the map as a string, only hashed keys are printed. +func (m *Map[Key, Value]) String() string { + buffer := bytes.NewBufferString("") + buffer.WriteRune('[') + + first := m.linkedList.First() + item := first + + for item != nil { + if item != first { + buffer.WriteRune(',') + } + fmt.Fprint(buffer, item.keyHash) + item = item.Next() + } + buffer.WriteRune(']') + return buffer.String() +} + +// Range calls f sequentially for each key and value present in the map. +// If f returns false, range stops the iteration. +func (m *Map[Key, Value]) Range(f func(Key, Value) bool) { + item := m.linkedList.First() + + for item != nil { + value := item.Value() + if !f(item.key, value) { + return + } + item = item.Next() + } +} + +func (m *Map[Key, Value]) allocate(newSize uintptr) { + m.linkedList = NewList[Key, Value]() + if m.resizing.CompareAndSwap(0, 1) { + m.grow(newSize, false) + } +} + +func (m *Map[Key, Value]) isResizeNeeded(store *store[Key, Value], count uintptr) bool { + l := uintptr(len(store.index)) // l can't be 0 as it gets initialized in New() + fillRate := (count * 100) / l + return fillRate > maxFillRate +} + +func (m *Map[Key, Value]) insertElement(element *ListElement[Key, Value], hash uintptr, key Key, value Value) bool { + var existed, inserted bool + + for { + store := m.store.Load() + searchStart := store.item(element.keyHash) + + if !inserted { // if retrying after insert during grow, do not add to list again + _, existed, inserted = m.linkedList.Add(searchStart, hash, key, value) + if existed { + return false + } + + if !inserted { + continue // a concurrent add did interfere, try again + } + } + + count := store.addItem(element) + currentStore := m.store.Load() + if store != currentStore { // retry insert in case of insert during grow + continue + } + + if m.isResizeNeeded(store, count) && m.resizing.CompareAndSwap(0, 1) { + go m.grow(0, true) + } + return true + } +} + +// deleteElement deletes an element from index. +func (m *Map[Key, Value]) deleteElement(element *ListElement[Key, Value]) { + for { + store := m.store.Load() + index := element.keyHash >> store.keyShifts + ptr := (*unsafe.Pointer)(unsafe.Pointer(uintptr(store.array) + index*intSizeBytes)) + + next := element.Next() + if next != nil && element.keyHash>>store.keyShifts != index { + next = nil // do not set index to next item if it's not the same slice index + } + atomic.CompareAndSwapPointer(ptr, unsafe.Pointer(element), unsafe.Pointer(next)) + + currentStore := m.store.Load() + if store == currentStore { // check that no resize happened + break + } + } +} + +func (m *Map[Key, Value]) grow(newSize uintptr, loop bool) { + defer m.resizing.CompareAndSwap(1, 0) + + for { + currentStore := m.store.Load() + if newSize == 0 { + newSize = uintptr(len(currentStore.index)) << 1 + } else { + newSize = roundUpPower2(newSize) + } + + index := make([]*ListElement[Key, Value], newSize) + header := (*reflect.SliceHeader)(unsafe.Pointer(&index)) + + newStore := &store[Key, Value]{ + keyShifts: strconv.IntSize - log2(newSize), + array: unsafe.Pointer(header.Data), // use address of slice data storage + index: index, + } + + m.fillIndexItems(newStore) // initialize new index slice with longer keys + + m.store.Store(newStore) + + m.fillIndexItems(newStore) // make sure that the new index is up-to-date with the current state of the linked list + + if !loop { + return + } + + // check if a new resize needs to be done already + count := uintptr(m.Len()) + if !m.isResizeNeeded(newStore, count) { + return + } + newSize = 0 // 0 means double the current size + } +} + +func (m *Map[Key, Value]) fillIndexItems(store *store[Key, Value]) { + first := m.linkedList.First() + item := first + lastIndex := uintptr(0) + + for item != nil { + index := item.keyHash >> store.keyShifts + if item == first || index != lastIndex { // store item with smallest hash key for every index + store.addItem(item) + lastIndex = index + } + item = item.Next() + } +} diff --git a/vendor/github.com/cornelk/hashmap/list.go b/vendor/github.com/cornelk/hashmap/list.go new file mode 100644 index 000000000..596b2cf26 --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/list.go @@ -0,0 +1,127 @@ +package hashmap + +import ( + "sync/atomic" +) + +// List is a sorted linked list. +type List[Key comparable, Value any] struct { + count atomic.Uintptr + head *ListElement[Key, Value] +} + +// NewList returns an initialized list. +func NewList[Key comparable, Value any]() *List[Key, Value] { + return &List[Key, Value]{ + head: &ListElement[Key, Value]{}, + } +} + +// Len returns the number of elements within the list. +func (l *List[Key, Value]) Len() int { + return int(l.count.Load()) +} + +// First returns the first item of the list. +func (l *List[Key, Value]) First() *ListElement[Key, Value] { + return l.head.Next() +} + +// Add adds an item to the list and returns false if an item for the hash existed. +// searchStart = nil will start to search at the head item. +func (l *List[Key, Value]) Add(searchStart *ListElement[Key, Value], hash uintptr, key Key, value Value) (element *ListElement[Key, Value], existed bool, inserted bool) { + left, found, right := l.search(searchStart, hash, key) + if found != nil { // existing item found + return found, true, false + } + + element = &ListElement[Key, Value]{ + key: key, + keyHash: hash, + } + element.value.Store(&value) + return element, false, l.insertAt(element, left, right) +} + +// AddOrUpdate adds or updates an item to the list. +func (l *List[Key, Value]) AddOrUpdate(searchStart *ListElement[Key, Value], hash uintptr, key Key, value Value) (*ListElement[Key, Value], bool) { + left, found, right := l.search(searchStart, hash, key) + if found != nil { // existing item found + found.value.Store(&value) // update the value + return found, true + } + + element := &ListElement[Key, Value]{ + key: key, + keyHash: hash, + } + element.value.Store(&value) + return element, l.insertAt(element, left, right) +} + +// Delete deletes an element from the list. +func (l *List[Key, Value]) Delete(element *ListElement[Key, Value]) { + if !element.deleted.CompareAndSwap(0, 1) { + return // concurrent delete of the item is in progress + } + + right := element.Next() + // point head to next element if element to delete was head + l.head.next.CompareAndSwap(element, right) + + // element left from the deleted element will replace its next + // pointer to the next valid element on call of Next(). + + l.count.Add(^uintptr(0)) // decrease counter +} + +func (l *List[Key, Value]) search(searchStart *ListElement[Key, Value], hash uintptr, key Key) (left, found, right *ListElement[Key, Value]) { + if searchStart != nil && hash < searchStart.keyHash { // key would remain left from item? { + searchStart = nil // start search at head + } + + if searchStart == nil { // start search at head? + left = l.head + found = left.Next() + if found == nil { // no items beside head? + return nil, nil, nil + } + } else { + found = searchStart + } + + for { + if hash == found.keyHash && key == found.key { // key hash already exists, compare keys + return nil, found, nil + } + + if hash < found.keyHash { // new item needs to be inserted before the found value + if l.head == left { + return nil, nil, found + } + return left, nil, found + } + + // go to next element in sorted linked list + left = found + found = left.Next() + if found == nil { // no more items on the right + return left, nil, nil + } + } +} + +func (l *List[Key, Value]) insertAt(element, left, right *ListElement[Key, Value]) bool { + if left == nil { + left = l.head + } + + element.next.Store(right) + + if !left.next.CompareAndSwap(right, element) { + return false // item was modified concurrently + } + + l.count.Add(1) + return true +} diff --git a/vendor/github.com/cornelk/hashmap/list_element.go b/vendor/github.com/cornelk/hashmap/list_element.go new file mode 100644 index 000000000..1be64b0ac --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/list_element.go @@ -0,0 +1,47 @@ +package hashmap + +import ( + "sync/atomic" +) + +// ListElement is an element of a list. +type ListElement[Key comparable, Value any] struct { + keyHash uintptr + + // deleted marks the item as deleting or deleted + // this is using uintptr instead of atomic.Bool to avoid using 32 bit int on 64 bit systems + deleted atomic.Uintptr + + // next points to the next element in the list. + // it is nil for the last item in the list. + next atomic.Pointer[ListElement[Key, Value]] + + value atomic.Pointer[Value] + + key Key +} + +// Value returns the value of the list item. +func (e *ListElement[Key, Value]) Value() Value { + return *e.value.Load() +} + +// Next returns the item on the right. +func (e *ListElement[Key, Value]) Next() *ListElement[Key, Value] { + for next := e.next.Load(); next != nil; { + // if the next item is not deleted, return it + if next.deleted.Load() == 0 { + return next + } + + // point current elements next to the following item + // after the deleted one until a non deleted or list end is found + following := next.Next() + if e.next.CompareAndSwap(next, following) { + next = following + } else { + next = next.Next() + } + } + return nil // end of the list reached +} diff --git a/vendor/github.com/cornelk/hashmap/store.go b/vendor/github.com/cornelk/hashmap/store.go new file mode 100644 index 000000000..8fc1d5986 --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/store.go @@ -0,0 +1,45 @@ +package hashmap + +import ( + "sync/atomic" + "unsafe" +) + +type store[Key comparable, Value any] struct { + keyShifts uintptr // Pointer size - log2 of array size, to be used as index in the data array + count atomic.Uintptr // count of filled elements in the slice + array unsafe.Pointer // pointer to slice data array + index []*ListElement[Key, Value] // storage for the slice for the garbage collector to not clean it up +} + +// item returns the item for the given hashed key. +func (s *store[Key, Value]) item(hashedKey uintptr) *ListElement[Key, Value] { + index := hashedKey >> s.keyShifts + ptr := (*unsafe.Pointer)(unsafe.Pointer(uintptr(s.array) + index*intSizeBytes)) + item := (*ListElement[Key, Value])(atomic.LoadPointer(ptr)) + return item +} + +// adds an item to the index if needed and returns the new item counter if it changed, otherwise 0. +func (s *store[Key, Value]) addItem(item *ListElement[Key, Value]) uintptr { + index := item.keyHash >> s.keyShifts + ptr := (*unsafe.Pointer)(unsafe.Pointer(uintptr(s.array) + index*intSizeBytes)) + + for { // loop until the smallest key hash is in the index + element := (*ListElement[Key, Value])(atomic.LoadPointer(ptr)) // get the current item in the index + if element == nil { // no item yet at this index + if atomic.CompareAndSwapPointer(ptr, nil, unsafe.Pointer(item)) { + return s.count.Add(1) + } + continue // a new item was inserted concurrently, retry + } + + if item.keyHash < element.keyHash { + // the new item is the smallest for this index? + if !atomic.CompareAndSwapPointer(ptr, unsafe.Pointer(element), unsafe.Pointer(item)) { + continue // a new item was inserted concurrently, retry + } + } + return 0 + } +} diff --git a/vendor/github.com/cornelk/hashmap/util.go b/vendor/github.com/cornelk/hashmap/util.go new file mode 100644 index 000000000..4ef40e224 --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/util.go @@ -0,0 +1,32 @@ +package hashmap + +import ( + "strconv" +) + +const ( + // intSizeBytes is the size in byte of an int or uint value. + intSizeBytes = strconv.IntSize >> 3 +) + +// roundUpPower2 rounds a number to the next power of 2. +func roundUpPower2(i uintptr) uintptr { + i-- + i |= i >> 1 + i |= i >> 2 + i |= i >> 4 + i |= i >> 8 + i |= i >> 16 + i |= i >> 32 + i++ + return i +} + +// log2 computes the binary logarithm of x, rounded up to the next integer. +func log2(i uintptr) uintptr { + var n, p uintptr + for p = 1; p < i; p += p { + n++ + } + return n +} diff --git a/vendor/github.com/cornelk/hashmap/util_hash.go b/vendor/github.com/cornelk/hashmap/util_hash.go new file mode 100644 index 000000000..5cd233ed7 --- /dev/null +++ b/vendor/github.com/cornelk/hashmap/util_hash.go @@ -0,0 +1,258 @@ +package hashmap + +import ( + "encoding/binary" + "fmt" + "math/bits" + "reflect" + "unsafe" +) + +const ( + prime1 uint64 = 11400714785074694791 + prime2 uint64 = 14029467366897019727 + prime3 uint64 = 1609587929392839161 + prime4 uint64 = 9650029242287828579 + prime5 uint64 = 2870177450012600261 +) + +var prime1v = prime1 + +/* +Copyright (c) 2016 Caleb Spare + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +// setDefaultHasher sets the default hasher depending on the key type. +// Inlines hashing as anonymous functions for performance improvements, other options like +// returning an anonymous functions from another function turned out to not be as performant. +func (m *Map[Key, Value]) setDefaultHasher() { + var key Key + kind := reflect.ValueOf(&key).Elem().Type().Kind() + + switch kind { + case reflect.Int, reflect.Uint, reflect.Uintptr: + switch intSizeBytes { + case 2: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashWord)) + case 4: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashDword)) + case 8: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashQword)) + + default: + panic(fmt.Errorf("unsupported integer byte size %d", intSizeBytes)) + } + + case reflect.Int8, reflect.Uint8: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashByte)) + case reflect.Int16, reflect.Uint16: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashWord)) + case reflect.Int32, reflect.Uint32: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashDword)) + case reflect.Int64, reflect.Uint64: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashQword)) + case reflect.Float32: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashFloat32)) + case reflect.Float64: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashFloat64)) + case reflect.String: + m.hasher = *(*func(Key) uintptr)(unsafe.Pointer(&xxHashString)) + + default: + panic(fmt.Errorf("unsupported key type %T of kind %v", key, kind)) + } +} + +// Specialized xxhash hash functions, optimized for the bit size of the key where available, +// for all supported types beside string. + +var xxHashByte = func(key uint8) uintptr { + h := prime5 + 1 + h ^= uint64(key) * prime5 + h = bits.RotateLeft64(h, 11) * prime1 + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +var xxHashWord = func(key uint16) uintptr { + h := prime5 + 2 + h ^= (uint64(key) & 0xff) * prime5 + h = bits.RotateLeft64(h, 11) * prime1 + h ^= ((uint64(key) >> 8) & 0xff) * prime5 + h = bits.RotateLeft64(h, 11) * prime1 + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +var xxHashDword = func(key uint32) uintptr { + h := prime5 + 4 + h ^= uint64(key) * prime1 + h = bits.RotateLeft64(h, 23)*prime2 + prime3 + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +var xxHashFloat32 = func(key float32) uintptr { + h := prime5 + 4 + h ^= uint64(key) * prime1 + h = bits.RotateLeft64(h, 23)*prime2 + prime3 + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +var xxHashFloat64 = func(key float64) uintptr { + h := prime5 + 4 + h ^= uint64(key) * prime1 + h = bits.RotateLeft64(h, 23)*prime2 + prime3 + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +var xxHashQword = func(key uint64) uintptr { + k1 := key * prime2 + k1 = bits.RotateLeft64(k1, 31) + k1 *= prime1 + h := (prime5 + 8) ^ k1 + h = bits.RotateLeft64(h, 27)*prime1 + prime4 + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +var xxHashString = func(key string) uintptr { + sh := (*reflect.StringHeader)(unsafe.Pointer(&key)) + bh := reflect.SliceHeader{ + Data: sh.Data, + Len: sh.Len, + Cap: sh.Len, // cap needs to be set, otherwise xxhash fails on ARM Macs + } + + b := *(*[]byte)(unsafe.Pointer(&bh)) + var h uint64 + + if sh.Len >= 32 { + v1 := prime1v + prime2 + v2 := prime2 + v3 := uint64(0) + v4 := -prime1v + for len(b) >= 32 { + v1 = round(v1, binary.LittleEndian.Uint64(b[0:8:len(b)])) + v2 = round(v2, binary.LittleEndian.Uint64(b[8:16:len(b)])) + v3 = round(v3, binary.LittleEndian.Uint64(b[16:24:len(b)])) + v4 = round(v4, binary.LittleEndian.Uint64(b[24:32:len(b)])) + b = b[32:len(b):len(b)] + } + h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) + h = mergeRound(h, v1) + h = mergeRound(h, v2) + h = mergeRound(h, v3) + h = mergeRound(h, v4) + } else { + h = prime5 + } + + h += uint64(sh.Len) + + i, end := 0, len(b) + for ; i+8 <= end; i += 8 { + k1 := round(0, binary.LittleEndian.Uint64(b[i:i+8:len(b)])) + h ^= k1 + h = rol27(h)*prime1 + prime4 + } + if i+4 <= end { + h ^= uint64(binary.LittleEndian.Uint32(b[i:i+4:len(b)])) * prime1 + h = rol23(h)*prime2 + prime3 + i += 4 + } + for ; i < end; i++ { + h ^= uint64(b[i]) * prime5 + h = rol11(h) * prime1 + } + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return uintptr(h) +} + +func round(acc, input uint64) uint64 { + acc += input * prime2 + acc = rol31(acc) + acc *= prime1 + return acc +} + +func mergeRound(acc, val uint64) uint64 { + val = round(0, val) + acc ^= val + acc = acc*prime1 + prime4 + return acc +} + +func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) } +func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) } +func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) } +func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) } +func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) } +func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) } +func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) } +func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) } diff --git a/vendor/github.com/golang/snappy/README b/vendor/github.com/golang/snappy/README deleted file mode 100644 index cea12879a..000000000 --- a/vendor/github.com/golang/snappy/README +++ /dev/null @@ -1,107 +0,0 @@ -The Snappy compression format in the Go programming language. - -To download and install from source: -$ go get github.com/golang/snappy - -Unless otherwise noted, the Snappy-Go source files are distributed -under the BSD-style license found in the LICENSE file. - - - -Benchmarks. - -The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten -or so files, the same set used by the C++ Snappy code (github.com/google/snappy -and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @ -3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29: - -"go test -test.bench=." - -_UFlat0-8 2.19GB/s ± 0% html -_UFlat1-8 1.41GB/s ± 0% urls -_UFlat2-8 23.5GB/s ± 2% jpg -_UFlat3-8 1.91GB/s ± 0% jpg_200 -_UFlat4-8 14.0GB/s ± 1% pdf -_UFlat5-8 1.97GB/s ± 0% html4 -_UFlat6-8 814MB/s ± 0% txt1 -_UFlat7-8 785MB/s ± 0% txt2 -_UFlat8-8 857MB/s ± 0% txt3 -_UFlat9-8 719MB/s ± 1% txt4 -_UFlat10-8 2.84GB/s ± 0% pb -_UFlat11-8 1.05GB/s ± 0% gaviota - -_ZFlat0-8 1.04GB/s ± 0% html -_ZFlat1-8 534MB/s ± 0% urls -_ZFlat2-8 15.7GB/s ± 1% jpg -_ZFlat3-8 740MB/s ± 3% jpg_200 -_ZFlat4-8 9.20GB/s ± 1% pdf -_ZFlat5-8 991MB/s ± 0% html4 -_ZFlat6-8 379MB/s ± 0% txt1 -_ZFlat7-8 352MB/s ± 0% txt2 -_ZFlat8-8 396MB/s ± 1% txt3 -_ZFlat9-8 327MB/s ± 1% txt4 -_ZFlat10-8 1.33GB/s ± 1% pb -_ZFlat11-8 605MB/s ± 1% gaviota - - - -"go test -test.bench=. -tags=noasm" - -_UFlat0-8 621MB/s ± 2% html -_UFlat1-8 494MB/s ± 1% urls -_UFlat2-8 23.2GB/s ± 1% jpg -_UFlat3-8 1.12GB/s ± 1% jpg_200 -_UFlat4-8 4.35GB/s ± 1% pdf -_UFlat5-8 609MB/s ± 0% html4 -_UFlat6-8 296MB/s ± 0% txt1 -_UFlat7-8 288MB/s ± 0% txt2 -_UFlat8-8 309MB/s ± 1% txt3 -_UFlat9-8 280MB/s ± 1% txt4 -_UFlat10-8 753MB/s ± 0% pb -_UFlat11-8 400MB/s ± 0% gaviota - -_ZFlat0-8 409MB/s ± 1% html -_ZFlat1-8 250MB/s ± 1% urls -_ZFlat2-8 12.3GB/s ± 1% jpg -_ZFlat3-8 132MB/s ± 0% jpg_200 -_ZFlat4-8 2.92GB/s ± 0% pdf -_ZFlat5-8 405MB/s ± 1% html4 -_ZFlat6-8 179MB/s ± 1% txt1 -_ZFlat7-8 170MB/s ± 1% txt2 -_ZFlat8-8 189MB/s ± 1% txt3 -_ZFlat9-8 164MB/s ± 1% txt4 -_ZFlat10-8 479MB/s ± 1% pb -_ZFlat11-8 270MB/s ± 1% gaviota - - - -For comparison (Go's encoded output is byte-for-byte identical to C++'s), here -are the numbers from C++ Snappy's - -make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log - -BM_UFlat/0 2.4GB/s html -BM_UFlat/1 1.4GB/s urls -BM_UFlat/2 21.8GB/s jpg -BM_UFlat/3 1.5GB/s jpg_200 -BM_UFlat/4 13.3GB/s pdf -BM_UFlat/5 2.1GB/s html4 -BM_UFlat/6 1.0GB/s txt1 -BM_UFlat/7 959.4MB/s txt2 -BM_UFlat/8 1.0GB/s txt3 -BM_UFlat/9 864.5MB/s txt4 -BM_UFlat/10 2.9GB/s pb -BM_UFlat/11 1.2GB/s gaviota - -BM_ZFlat/0 944.3MB/s html (22.31 %) -BM_ZFlat/1 501.6MB/s urls (47.78 %) -BM_ZFlat/2 14.3GB/s jpg (99.95 %) -BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %) -BM_ZFlat/4 8.3GB/s pdf (83.30 %) -BM_ZFlat/5 903.5MB/s html4 (22.52 %) -BM_ZFlat/6 336.0MB/s txt1 (57.88 %) -BM_ZFlat/7 312.3MB/s txt2 (61.91 %) -BM_ZFlat/8 353.1MB/s txt3 (54.99 %) -BM_ZFlat/9 289.9MB/s txt4 (66.26 %) -BM_ZFlat/10 1.2GB/s pb (19.68 %) -BM_ZFlat/11 527.4MB/s gaviota (37.72 %) diff --git a/vendor/github.com/golang/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go deleted file mode 100644 index 23c6e26c6..000000000 --- a/vendor/github.com/golang/snappy/decode.go +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package snappy - -import ( - "encoding/binary" - "errors" - "io" -) - -var ( - // ErrCorrupt reports that the input is invalid. - ErrCorrupt = errors.New("snappy: corrupt input") - // ErrTooLarge reports that the uncompressed length is too large. - ErrTooLarge = errors.New("snappy: decoded block is too large") - // ErrUnsupported reports that the input isn't supported. - ErrUnsupported = errors.New("snappy: unsupported input") - - errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") -) - -// DecodedLen returns the length of the decoded block. -func DecodedLen(src []byte) (int, error) { - v, _, err := decodedLen(src) - return v, err -} - -// decodedLen returns the length of the decoded block and the number of bytes -// that the length header occupied. -func decodedLen(src []byte) (blockLen, headerLen int, err error) { - v, n := binary.Uvarint(src) - if n <= 0 || v > 0xffffffff { - return 0, 0, ErrCorrupt - } - - const wordSize = 32 << (^uint(0) >> 32 & 1) - if wordSize == 32 && v > 0x7fffffff { - return 0, 0, ErrTooLarge - } - return int(v), n, nil -} - -const ( - decodeErrCodeCorrupt = 1 - decodeErrCodeUnsupportedLiteralLength = 2 -) - -// Decode returns the decoded form of src. The returned slice may be a sub- -// slice of dst if dst was large enough to hold the entire decoded block. -// Otherwise, a newly allocated slice will be returned. -// -// The dst and src must not overlap. It is valid to pass a nil dst. -// -// Decode handles the Snappy block format, not the Snappy stream format. -func Decode(dst, src []byte) ([]byte, error) { - dLen, s, err := decodedLen(src) - if err != nil { - return nil, err - } - if dLen <= len(dst) { - dst = dst[:dLen] - } else { - dst = make([]byte, dLen) - } - switch decode(dst, src[s:]) { - case 0: - return dst, nil - case decodeErrCodeUnsupportedLiteralLength: - return nil, errUnsupportedLiteralLength - } - return nil, ErrCorrupt -} - -// NewReader returns a new Reader that decompresses from r, using the framing -// format described at -// https://github.com/google/snappy/blob/master/framing_format.txt -func NewReader(r io.Reader) *Reader { - return &Reader{ - r: r, - decoded: make([]byte, maxBlockSize), - buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize), - } -} - -// Reader is an io.Reader that can read Snappy-compressed bytes. -// -// Reader handles the Snappy stream format, not the Snappy block format. -type Reader struct { - r io.Reader - err error - decoded []byte - buf []byte - // decoded[i:j] contains decoded bytes that have not yet been passed on. - i, j int - readHeader bool -} - -// Reset discards any buffered data, resets all state, and switches the Snappy -// reader to read from r. This permits reusing a Reader rather than allocating -// a new one. -func (r *Reader) Reset(reader io.Reader) { - r.r = reader - r.err = nil - r.i = 0 - r.j = 0 - r.readHeader = false -} - -func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { - if _, r.err = io.ReadFull(r.r, p); r.err != nil { - if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { - r.err = ErrCorrupt - } - return false - } - return true -} - -func (r *Reader) fill() error { - for r.i >= r.j { - if !r.readFull(r.buf[:4], true) { - return r.err - } - chunkType := r.buf[0] - if !r.readHeader { - if chunkType != chunkTypeStreamIdentifier { - r.err = ErrCorrupt - return r.err - } - r.readHeader = true - } - chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 - if chunkLen > len(r.buf) { - r.err = ErrUnsupported - return r.err - } - - // The chunk types are specified at - // https://github.com/google/snappy/blob/master/framing_format.txt - switch chunkType { - case chunkTypeCompressedData: - // Section 4.2. Compressed data (chunk type 0x00). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return r.err - } - buf := r.buf[:chunkLen] - if !r.readFull(buf, false) { - return r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - buf = buf[checksumSize:] - - n, err := DecodedLen(buf) - if err != nil { - r.err = err - return r.err - } - if n > len(r.decoded) { - r.err = ErrCorrupt - return r.err - } - if _, err := Decode(r.decoded, buf); err != nil { - r.err = err - return r.err - } - if crc(r.decoded[:n]) != checksum { - r.err = ErrCorrupt - return r.err - } - r.i, r.j = 0, n - continue - - case chunkTypeUncompressedData: - // Section 4.3. Uncompressed data (chunk type 0x01). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return r.err - } - buf := r.buf[:checksumSize] - if !r.readFull(buf, false) { - return r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - // Read directly into r.decoded instead of via r.buf. - n := chunkLen - checksumSize - if n > len(r.decoded) { - r.err = ErrCorrupt - return r.err - } - if !r.readFull(r.decoded[:n], false) { - return r.err - } - if crc(r.decoded[:n]) != checksum { - r.err = ErrCorrupt - return r.err - } - r.i, r.j = 0, n - continue - - case chunkTypeStreamIdentifier: - // Section 4.1. Stream identifier (chunk type 0xff). - if chunkLen != len(magicBody) { - r.err = ErrCorrupt - return r.err - } - if !r.readFull(r.buf[:len(magicBody)], false) { - return r.err - } - for i := 0; i < len(magicBody); i++ { - if r.buf[i] != magicBody[i] { - r.err = ErrCorrupt - return r.err - } - } - continue - } - - if chunkType <= 0x7f { - // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). - r.err = ErrUnsupported - return r.err - } - // Section 4.4 Padding (chunk type 0xfe). - // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). - if !r.readFull(r.buf[:chunkLen], false) { - return r.err - } - } - - return nil -} - -// Read satisfies the io.Reader interface. -func (r *Reader) Read(p []byte) (int, error) { - if r.err != nil { - return 0, r.err - } - - if err := r.fill(); err != nil { - return 0, err - } - - n := copy(p, r.decoded[r.i:r.j]) - r.i += n - return n, nil -} - -// ReadByte satisfies the io.ByteReader interface. -func (r *Reader) ReadByte() (byte, error) { - if r.err != nil { - return 0, r.err - } - - if err := r.fill(); err != nil { - return 0, err - } - - c := r.decoded[r.i] - r.i++ - return c, nil -} diff --git a/vendor/github.com/golang/snappy/decode_amd64.s b/vendor/github.com/golang/snappy/decode_amd64.s deleted file mode 100644 index e6179f65e..000000000 --- a/vendor/github.com/golang/snappy/decode_amd64.s +++ /dev/null @@ -1,490 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" - -// The asm code generally follows the pure Go code in decode_other.go, except -// where marked with a "!!!". - -// func decode(dst, src []byte) int -// -// All local variables fit into registers. The non-zero stack size is only to -// spill registers and push args when issuing a CALL. The register allocation: -// - AX scratch -// - BX scratch -// - CX length or x -// - DX offset -// - SI &src[s] -// - DI &dst[d] -// + R8 dst_base -// + R9 dst_len -// + R10 dst_base + dst_len -// + R11 src_base -// + R12 src_len -// + R13 src_base + src_len -// - R14 used by doCopy -// - R15 used by doCopy -// -// The registers R8-R13 (marked with a "+") are set at the start of the -// function, and after a CALL returns, and are not otherwise modified. -// -// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI. -// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI. -TEXT ·decode(SB), NOSPLIT, $48-56 - // Initialize SI, DI and R8-R13. - MOVQ dst_base+0(FP), R8 - MOVQ dst_len+8(FP), R9 - MOVQ R8, DI - MOVQ R8, R10 - ADDQ R9, R10 - MOVQ src_base+24(FP), R11 - MOVQ src_len+32(FP), R12 - MOVQ R11, SI - MOVQ R11, R13 - ADDQ R12, R13 - -loop: - // for s < len(src) - CMPQ SI, R13 - JEQ end - - // CX = uint32(src[s]) - // - // switch src[s] & 0x03 - MOVBLZX (SI), CX - MOVL CX, BX - ANDL $3, BX - CMPL BX, $1 - JAE tagCopy - - // ---------------------------------------- - // The code below handles literal tags. - - // case tagLiteral: - // x := uint32(src[s] >> 2) - // switch - SHRL $2, CX - CMPL CX, $60 - JAE tagLit60Plus - - // case x < 60: - // s++ - INCQ SI - -doLit: - // This is the end of the inner "switch", when we have a literal tag. - // - // We assume that CX == x and x fits in a uint32, where x is the variable - // used in the pure Go decode_other.go code. - - // length = int(x) + 1 - // - // Unlike the pure Go code, we don't need to check if length <= 0 because - // CX can hold 64 bits, so the increment cannot overflow. - INCQ CX - - // Prepare to check if copying length bytes will run past the end of dst or - // src. - // - // AX = len(dst) - d - // BX = len(src) - s - MOVQ R10, AX - SUBQ DI, AX - MOVQ R13, BX - SUBQ SI, BX - - // !!! Try a faster technique for short (16 or fewer bytes) copies. - // - // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { - // goto callMemmove // Fall back on calling runtime·memmove. - // } - // - // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s - // against 21 instead of 16, because it cannot assume that all of its input - // is contiguous in memory and so it needs to leave enough source bytes to - // read the next tag without refilling buffers, but Go's Decode assumes - // contiguousness (the src argument is a []byte). - CMPQ CX, $16 - JGT callMemmove - CMPQ AX, $16 - JLT callMemmove - CMPQ BX, $16 - JLT callMemmove - - // !!! Implement the copy from src to dst as a 16-byte load and store. - // (Decode's documentation says that dst and src must not overlap.) - // - // This always copies 16 bytes, instead of only length bytes, but that's - // OK. If the input is a valid Snappy encoding then subsequent iterations - // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a - // non-nil error), so the overrun will be ignored. - // - // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or - // 16-byte loads and stores. This technique probably wouldn't be as - // effective on architectures that are fussier about alignment. - MOVOU 0(SI), X0 - MOVOU X0, 0(DI) - - // d += length - // s += length - ADDQ CX, DI - ADDQ CX, SI - JMP loop - -callMemmove: - // if length > len(dst)-d || length > len(src)-s { etc } - CMPQ CX, AX - JGT errCorrupt - CMPQ CX, BX - JGT errCorrupt - - // copy(dst[d:], src[s:s+length]) - // - // This means calling runtime·memmove(&dst[d], &src[s], length), so we push - // DI, SI and CX as arguments. Coincidentally, we also need to spill those - // three registers to the stack, to save local variables across the CALL. - MOVQ DI, 0(SP) - MOVQ SI, 8(SP) - MOVQ CX, 16(SP) - MOVQ DI, 24(SP) - MOVQ SI, 32(SP) - MOVQ CX, 40(SP) - CALL runtime·memmove(SB) - - // Restore local variables: unspill registers from the stack and - // re-calculate R8-R13. - MOVQ 24(SP), DI - MOVQ 32(SP), SI - MOVQ 40(SP), CX - MOVQ dst_base+0(FP), R8 - MOVQ dst_len+8(FP), R9 - MOVQ R8, R10 - ADDQ R9, R10 - MOVQ src_base+24(FP), R11 - MOVQ src_len+32(FP), R12 - MOVQ R11, R13 - ADDQ R12, R13 - - // d += length - // s += length - ADDQ CX, DI - ADDQ CX, SI - JMP loop - -tagLit60Plus: - // !!! This fragment does the - // - // s += x - 58; if uint(s) > uint(len(src)) { etc } - // - // checks. In the asm version, we code it once instead of once per switch case. - ADDQ CX, SI - SUBQ $58, SI - MOVQ SI, BX - SUBQ R11, BX - CMPQ BX, R12 - JA errCorrupt - - // case x == 60: - CMPL CX, $61 - JEQ tagLit61 - JA tagLit62Plus - - // x = uint32(src[s-1]) - MOVBLZX -1(SI), CX - JMP doLit - -tagLit61: - // case x == 61: - // x = uint32(src[s-2]) | uint32(src[s-1])<<8 - MOVWLZX -2(SI), CX - JMP doLit - -tagLit62Plus: - CMPL CX, $62 - JA tagLit63 - - // case x == 62: - // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 - MOVWLZX -3(SI), CX - MOVBLZX -1(SI), BX - SHLL $16, BX - ORL BX, CX - JMP doLit - -tagLit63: - // case x == 63: - // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 - MOVL -4(SI), CX - JMP doLit - -// The code above handles literal tags. -// ---------------------------------------- -// The code below handles copy tags. - -tagCopy4: - // case tagCopy4: - // s += 5 - ADDQ $5, SI - - // if uint(s) > uint(len(src)) { etc } - MOVQ SI, BX - SUBQ R11, BX - CMPQ BX, R12 - JA errCorrupt - - // length = 1 + int(src[s-5])>>2 - SHRQ $2, CX - INCQ CX - - // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) - MOVLQZX -4(SI), DX - JMP doCopy - -tagCopy2: - // case tagCopy2: - // s += 3 - ADDQ $3, SI - - // if uint(s) > uint(len(src)) { etc } - MOVQ SI, BX - SUBQ R11, BX - CMPQ BX, R12 - JA errCorrupt - - // length = 1 + int(src[s-3])>>2 - SHRQ $2, CX - INCQ CX - - // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) - MOVWQZX -2(SI), DX - JMP doCopy - -tagCopy: - // We have a copy tag. We assume that: - // - BX == src[s] & 0x03 - // - CX == src[s] - CMPQ BX, $2 - JEQ tagCopy2 - JA tagCopy4 - - // case tagCopy1: - // s += 2 - ADDQ $2, SI - - // if uint(s) > uint(len(src)) { etc } - MOVQ SI, BX - SUBQ R11, BX - CMPQ BX, R12 - JA errCorrupt - - // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) - MOVQ CX, DX - ANDQ $0xe0, DX - SHLQ $3, DX - MOVBQZX -1(SI), BX - ORQ BX, DX - - // length = 4 + int(src[s-2])>>2&0x7 - SHRQ $2, CX - ANDQ $7, CX - ADDQ $4, CX - -doCopy: - // This is the end of the outer "switch", when we have a copy tag. - // - // We assume that: - // - CX == length && CX > 0 - // - DX == offset - - // if offset <= 0 { etc } - CMPQ DX, $0 - JLE errCorrupt - - // if d < offset { etc } - MOVQ DI, BX - SUBQ R8, BX - CMPQ BX, DX - JLT errCorrupt - - // if length > len(dst)-d { etc } - MOVQ R10, BX - SUBQ DI, BX - CMPQ CX, BX - JGT errCorrupt - - // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length - // - // Set: - // - R14 = len(dst)-d - // - R15 = &dst[d-offset] - MOVQ R10, R14 - SUBQ DI, R14 - MOVQ DI, R15 - SUBQ DX, R15 - - // !!! Try a faster technique for short (16 or fewer bytes) forward copies. - // - // First, try using two 8-byte load/stores, similar to the doLit technique - // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is - // still OK if offset >= 8. Note that this has to be two 8-byte load/stores - // and not one 16-byte load/store, and the first store has to be before the - // second load, due to the overlap if offset is in the range [8, 16). - // - // if length > 16 || offset < 8 || len(dst)-d < 16 { - // goto slowForwardCopy - // } - // copy 16 bytes - // d += length - CMPQ CX, $16 - JGT slowForwardCopy - CMPQ DX, $8 - JLT slowForwardCopy - CMPQ R14, $16 - JLT slowForwardCopy - MOVQ 0(R15), AX - MOVQ AX, 0(DI) - MOVQ 8(R15), BX - MOVQ BX, 8(DI) - ADDQ CX, DI - JMP loop - -slowForwardCopy: - // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we - // can still try 8-byte load stores, provided we can overrun up to 10 extra - // bytes. As above, the overrun will be fixed up by subsequent iterations - // of the outermost loop. - // - // The C++ snappy code calls this technique IncrementalCopyFastPath. Its - // commentary says: - // - // ---- - // - // The main part of this loop is a simple copy of eight bytes at a time - // until we've copied (at least) the requested amount of bytes. However, - // if d and d-offset are less than eight bytes apart (indicating a - // repeating pattern of length < 8), we first need to expand the pattern in - // order to get the correct results. For instance, if the buffer looks like - // this, with the eight-byte <d-offset> and <d> patterns marked as - // intervals: - // - // abxxxxxxxxxxxx - // [------] d-offset - // [------] d - // - // a single eight-byte copy from <d-offset> to <d> will repeat the pattern - // once, after which we can move <d> two bytes without moving <d-offset>: - // - // ababxxxxxxxxxx - // [------] d-offset - // [------] d - // - // and repeat the exercise until the two no longer overlap. - // - // This allows us to do very well in the special case of one single byte - // repeated many times, without taking a big hit for more general cases. - // - // The worst case of extra writing past the end of the match occurs when - // offset == 1 and length == 1; the last copy will read from byte positions - // [0..7] and write to [4..11], whereas it was only supposed to write to - // position 1. Thus, ten excess bytes. - // - // ---- - // - // That "10 byte overrun" worst case is confirmed by Go's - // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy - // and finishSlowForwardCopy algorithm. - // - // if length > len(dst)-d-10 { - // goto verySlowForwardCopy - // } - SUBQ $10, R14 - CMPQ CX, R14 - JGT verySlowForwardCopy - -makeOffsetAtLeast8: - // !!! As above, expand the pattern so that offset >= 8 and we can use - // 8-byte load/stores. - // - // for offset < 8 { - // copy 8 bytes from dst[d-offset:] to dst[d:] - // length -= offset - // d += offset - // offset += offset - // // The two previous lines together means that d-offset, and therefore - // // R15, is unchanged. - // } - CMPQ DX, $8 - JGE fixUpSlowForwardCopy - MOVQ (R15), BX - MOVQ BX, (DI) - SUBQ DX, CX - ADDQ DX, DI - ADDQ DX, DX - JMP makeOffsetAtLeast8 - -fixUpSlowForwardCopy: - // !!! Add length (which might be negative now) to d (implied by DI being - // &dst[d]) so that d ends up at the right place when we jump back to the - // top of the loop. Before we do that, though, we save DI to AX so that, if - // length is positive, copying the remaining length bytes will write to the - // right place. - MOVQ DI, AX - ADDQ CX, DI - -finishSlowForwardCopy: - // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative - // length means that we overrun, but as above, that will be fixed up by - // subsequent iterations of the outermost loop. - CMPQ CX, $0 - JLE loop - MOVQ (R15), BX - MOVQ BX, (AX) - ADDQ $8, R15 - ADDQ $8, AX - SUBQ $8, CX - JMP finishSlowForwardCopy - -verySlowForwardCopy: - // verySlowForwardCopy is a simple implementation of forward copy. In C - // parlance, this is a do/while loop instead of a while loop, since we know - // that length > 0. In Go syntax: - // - // for { - // dst[d] = dst[d - offset] - // d++ - // length-- - // if length == 0 { - // break - // } - // } - MOVB (R15), BX - MOVB BX, (DI) - INCQ R15 - INCQ DI - DECQ CX - JNZ verySlowForwardCopy - JMP loop - -// The code above handles copy tags. -// ---------------------------------------- - -end: - // This is the end of the "for s < len(src)". - // - // if d != len(dst) { etc } - CMPQ DI, R10 - JNE errCorrupt - - // return 0 - MOVQ $0, ret+48(FP) - RET - -errCorrupt: - // return decodeErrCodeCorrupt - MOVQ $1, ret+48(FP) - RET diff --git a/vendor/github.com/golang/snappy/decode_arm64.s b/vendor/github.com/golang/snappy/decode_arm64.s deleted file mode 100644 index 7a3ead17e..000000000 --- a/vendor/github.com/golang/snappy/decode_arm64.s +++ /dev/null @@ -1,494 +0,0 @@ -// Copyright 2020 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" - -// The asm code generally follows the pure Go code in decode_other.go, except -// where marked with a "!!!". - -// func decode(dst, src []byte) int -// -// All local variables fit into registers. The non-zero stack size is only to -// spill registers and push args when issuing a CALL. The register allocation: -// - R2 scratch -// - R3 scratch -// - R4 length or x -// - R5 offset -// - R6 &src[s] -// - R7 &dst[d] -// + R8 dst_base -// + R9 dst_len -// + R10 dst_base + dst_len -// + R11 src_base -// + R12 src_len -// + R13 src_base + src_len -// - R14 used by doCopy -// - R15 used by doCopy -// -// The registers R8-R13 (marked with a "+") are set at the start of the -// function, and after a CALL returns, and are not otherwise modified. -// -// The d variable is implicitly R7 - R8, and len(dst)-d is R10 - R7. -// The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6. -TEXT ·decode(SB), NOSPLIT, $56-56 - // Initialize R6, R7 and R8-R13. - MOVD dst_base+0(FP), R8 - MOVD dst_len+8(FP), R9 - MOVD R8, R7 - MOVD R8, R10 - ADD R9, R10, R10 - MOVD src_base+24(FP), R11 - MOVD src_len+32(FP), R12 - MOVD R11, R6 - MOVD R11, R13 - ADD R12, R13, R13 - -loop: - // for s < len(src) - CMP R13, R6 - BEQ end - - // R4 = uint32(src[s]) - // - // switch src[s] & 0x03 - MOVBU (R6), R4 - MOVW R4, R3 - ANDW $3, R3 - MOVW $1, R1 - CMPW R1, R3 - BGE tagCopy - - // ---------------------------------------- - // The code below handles literal tags. - - // case tagLiteral: - // x := uint32(src[s] >> 2) - // switch - MOVW $60, R1 - LSRW $2, R4, R4 - CMPW R4, R1 - BLS tagLit60Plus - - // case x < 60: - // s++ - ADD $1, R6, R6 - -doLit: - // This is the end of the inner "switch", when we have a literal tag. - // - // We assume that R4 == x and x fits in a uint32, where x is the variable - // used in the pure Go decode_other.go code. - - // length = int(x) + 1 - // - // Unlike the pure Go code, we don't need to check if length <= 0 because - // R4 can hold 64 bits, so the increment cannot overflow. - ADD $1, R4, R4 - - // Prepare to check if copying length bytes will run past the end of dst or - // src. - // - // R2 = len(dst) - d - // R3 = len(src) - s - MOVD R10, R2 - SUB R7, R2, R2 - MOVD R13, R3 - SUB R6, R3, R3 - - // !!! Try a faster technique for short (16 or fewer bytes) copies. - // - // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { - // goto callMemmove // Fall back on calling runtime·memmove. - // } - // - // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s - // against 21 instead of 16, because it cannot assume that all of its input - // is contiguous in memory and so it needs to leave enough source bytes to - // read the next tag without refilling buffers, but Go's Decode assumes - // contiguousness (the src argument is a []byte). - CMP $16, R4 - BGT callMemmove - CMP $16, R2 - BLT callMemmove - CMP $16, R3 - BLT callMemmove - - // !!! Implement the copy from src to dst as a 16-byte load and store. - // (Decode's documentation says that dst and src must not overlap.) - // - // This always copies 16 bytes, instead of only length bytes, but that's - // OK. If the input is a valid Snappy encoding then subsequent iterations - // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a - // non-nil error), so the overrun will be ignored. - // - // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or - // 16-byte loads and stores. This technique probably wouldn't be as - // effective on architectures that are fussier about alignment. - LDP 0(R6), (R14, R15) - STP (R14, R15), 0(R7) - - // d += length - // s += length - ADD R4, R7, R7 - ADD R4, R6, R6 - B loop - -callMemmove: - // if length > len(dst)-d || length > len(src)-s { etc } - CMP R2, R4 - BGT errCorrupt - CMP R3, R4 - BGT errCorrupt - - // copy(dst[d:], src[s:s+length]) - // - // This means calling runtime·memmove(&dst[d], &src[s], length), so we push - // R7, R6 and R4 as arguments. Coincidentally, we also need to spill those - // three registers to the stack, to save local variables across the CALL. - MOVD R7, 8(RSP) - MOVD R6, 16(RSP) - MOVD R4, 24(RSP) - MOVD R7, 32(RSP) - MOVD R6, 40(RSP) - MOVD R4, 48(RSP) - CALL runtime·memmove(SB) - - // Restore local variables: unspill registers from the stack and - // re-calculate R8-R13. - MOVD 32(RSP), R7 - MOVD 40(RSP), R6 - MOVD 48(RSP), R4 - MOVD dst_base+0(FP), R8 - MOVD dst_len+8(FP), R9 - MOVD R8, R10 - ADD R9, R10, R10 - MOVD src_base+24(FP), R11 - MOVD src_len+32(FP), R12 - MOVD R11, R13 - ADD R12, R13, R13 - - // d += length - // s += length - ADD R4, R7, R7 - ADD R4, R6, R6 - B loop - -tagLit60Plus: - // !!! This fragment does the - // - // s += x - 58; if uint(s) > uint(len(src)) { etc } - // - // checks. In the asm version, we code it once instead of once per switch case. - ADD R4, R6, R6 - SUB $58, R6, R6 - MOVD R6, R3 - SUB R11, R3, R3 - CMP R12, R3 - BGT errCorrupt - - // case x == 60: - MOVW $61, R1 - CMPW R1, R4 - BEQ tagLit61 - BGT tagLit62Plus - - // x = uint32(src[s-1]) - MOVBU -1(R6), R4 - B doLit - -tagLit61: - // case x == 61: - // x = uint32(src[s-2]) | uint32(src[s-1])<<8 - MOVHU -2(R6), R4 - B doLit - -tagLit62Plus: - CMPW $62, R4 - BHI tagLit63 - - // case x == 62: - // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 - MOVHU -3(R6), R4 - MOVBU -1(R6), R3 - ORR R3<<16, R4 - B doLit - -tagLit63: - // case x == 63: - // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 - MOVWU -4(R6), R4 - B doLit - - // The code above handles literal tags. - // ---------------------------------------- - // The code below handles copy tags. - -tagCopy4: - // case tagCopy4: - // s += 5 - ADD $5, R6, R6 - - // if uint(s) > uint(len(src)) { etc } - MOVD R6, R3 - SUB R11, R3, R3 - CMP R12, R3 - BGT errCorrupt - - // length = 1 + int(src[s-5])>>2 - MOVD $1, R1 - ADD R4>>2, R1, R4 - - // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) - MOVWU -4(R6), R5 - B doCopy - -tagCopy2: - // case tagCopy2: - // s += 3 - ADD $3, R6, R6 - - // if uint(s) > uint(len(src)) { etc } - MOVD R6, R3 - SUB R11, R3, R3 - CMP R12, R3 - BGT errCorrupt - - // length = 1 + int(src[s-3])>>2 - MOVD $1, R1 - ADD R4>>2, R1, R4 - - // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) - MOVHU -2(R6), R5 - B doCopy - -tagCopy: - // We have a copy tag. We assume that: - // - R3 == src[s] & 0x03 - // - R4 == src[s] - CMP $2, R3 - BEQ tagCopy2 - BGT tagCopy4 - - // case tagCopy1: - // s += 2 - ADD $2, R6, R6 - - // if uint(s) > uint(len(src)) { etc } - MOVD R6, R3 - SUB R11, R3, R3 - CMP R12, R3 - BGT errCorrupt - - // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) - MOVD R4, R5 - AND $0xe0, R5 - MOVBU -1(R6), R3 - ORR R5<<3, R3, R5 - - // length = 4 + int(src[s-2])>>2&0x7 - MOVD $7, R1 - AND R4>>2, R1, R4 - ADD $4, R4, R4 - -doCopy: - // This is the end of the outer "switch", when we have a copy tag. - // - // We assume that: - // - R4 == length && R4 > 0 - // - R5 == offset - - // if offset <= 0 { etc } - MOVD $0, R1 - CMP R1, R5 - BLE errCorrupt - - // if d < offset { etc } - MOVD R7, R3 - SUB R8, R3, R3 - CMP R5, R3 - BLT errCorrupt - - // if length > len(dst)-d { etc } - MOVD R10, R3 - SUB R7, R3, R3 - CMP R3, R4 - BGT errCorrupt - - // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length - // - // Set: - // - R14 = len(dst)-d - // - R15 = &dst[d-offset] - MOVD R10, R14 - SUB R7, R14, R14 - MOVD R7, R15 - SUB R5, R15, R15 - - // !!! Try a faster technique for short (16 or fewer bytes) forward copies. - // - // First, try using two 8-byte load/stores, similar to the doLit technique - // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is - // still OK if offset >= 8. Note that this has to be two 8-byte load/stores - // and not one 16-byte load/store, and the first store has to be before the - // second load, due to the overlap if offset is in the range [8, 16). - // - // if length > 16 || offset < 8 || len(dst)-d < 16 { - // goto slowForwardCopy - // } - // copy 16 bytes - // d += length - CMP $16, R4 - BGT slowForwardCopy - CMP $8, R5 - BLT slowForwardCopy - CMP $16, R14 - BLT slowForwardCopy - MOVD 0(R15), R2 - MOVD R2, 0(R7) - MOVD 8(R15), R3 - MOVD R3, 8(R7) - ADD R4, R7, R7 - B loop - -slowForwardCopy: - // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we - // can still try 8-byte load stores, provided we can overrun up to 10 extra - // bytes. As above, the overrun will be fixed up by subsequent iterations - // of the outermost loop. - // - // The C++ snappy code calls this technique IncrementalCopyFastPath. Its - // commentary says: - // - // ---- - // - // The main part of this loop is a simple copy of eight bytes at a time - // until we've copied (at least) the requested amount of bytes. However, - // if d and d-offset are less than eight bytes apart (indicating a - // repeating pattern of length < 8), we first need to expand the pattern in - // order to get the correct results. For instance, if the buffer looks like - // this, with the eight-byte <d-offset> and <d> patterns marked as - // intervals: - // - // abxxxxxxxxxxxx - // [------] d-offset - // [------] d - // - // a single eight-byte copy from <d-offset> to <d> will repeat the pattern - // once, after which we can move <d> two bytes without moving <d-offset>: - // - // ababxxxxxxxxxx - // [------] d-offset - // [------] d - // - // and repeat the exercise until the two no longer overlap. - // - // This allows us to do very well in the special case of one single byte - // repeated many times, without taking a big hit for more general cases. - // - // The worst case of extra writing past the end of the match occurs when - // offset == 1 and length == 1; the last copy will read from byte positions - // [0..7] and write to [4..11], whereas it was only supposed to write to - // position 1. Thus, ten excess bytes. - // - // ---- - // - // That "10 byte overrun" worst case is confirmed by Go's - // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy - // and finishSlowForwardCopy algorithm. - // - // if length > len(dst)-d-10 { - // goto verySlowForwardCopy - // } - SUB $10, R14, R14 - CMP R14, R4 - BGT verySlowForwardCopy - -makeOffsetAtLeast8: - // !!! As above, expand the pattern so that offset >= 8 and we can use - // 8-byte load/stores. - // - // for offset < 8 { - // copy 8 bytes from dst[d-offset:] to dst[d:] - // length -= offset - // d += offset - // offset += offset - // // The two previous lines together means that d-offset, and therefore - // // R15, is unchanged. - // } - CMP $8, R5 - BGE fixUpSlowForwardCopy - MOVD (R15), R3 - MOVD R3, (R7) - SUB R5, R4, R4 - ADD R5, R7, R7 - ADD R5, R5, R5 - B makeOffsetAtLeast8 - -fixUpSlowForwardCopy: - // !!! Add length (which might be negative now) to d (implied by R7 being - // &dst[d]) so that d ends up at the right place when we jump back to the - // top of the loop. Before we do that, though, we save R7 to R2 so that, if - // length is positive, copying the remaining length bytes will write to the - // right place. - MOVD R7, R2 - ADD R4, R7, R7 - -finishSlowForwardCopy: - // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative - // length means that we overrun, but as above, that will be fixed up by - // subsequent iterations of the outermost loop. - MOVD $0, R1 - CMP R1, R4 - BLE loop - MOVD (R15), R3 - MOVD R3, (R2) - ADD $8, R15, R15 - ADD $8, R2, R2 - SUB $8, R4, R4 - B finishSlowForwardCopy - -verySlowForwardCopy: - // verySlowForwardCopy is a simple implementation of forward copy. In C - // parlance, this is a do/while loop instead of a while loop, since we know - // that length > 0. In Go syntax: - // - // for { - // dst[d] = dst[d - offset] - // d++ - // length-- - // if length == 0 { - // break - // } - // } - MOVB (R15), R3 - MOVB R3, (R7) - ADD $1, R15, R15 - ADD $1, R7, R7 - SUB $1, R4, R4 - CBNZ R4, verySlowForwardCopy - B loop - - // The code above handles copy tags. - // ---------------------------------------- - -end: - // This is the end of the "for s < len(src)". - // - // if d != len(dst) { etc } - CMP R10, R7 - BNE errCorrupt - - // return 0 - MOVD $0, ret+48(FP) - RET - -errCorrupt: - // return decodeErrCodeCorrupt - MOVD $1, R2 - MOVD R2, ret+48(FP) - RET diff --git a/vendor/github.com/golang/snappy/decode_asm.go b/vendor/github.com/golang/snappy/decode_asm.go deleted file mode 100644 index 7082b3491..000000000 --- a/vendor/github.com/golang/snappy/decode_asm.go +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm -// +build amd64 arm64 - -package snappy - -// decode has the same semantics as in decode_other.go. -// -//go:noescape -func decode(dst, src []byte) int diff --git a/vendor/github.com/golang/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go deleted file mode 100644 index 2f672be55..000000000 --- a/vendor/github.com/golang/snappy/decode_other.go +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !amd64,!arm64 appengine !gc noasm - -package snappy - -// decode writes the decoding of src to dst. It assumes that the varint-encoded -// length of the decompressed bytes has already been read, and that len(dst) -// equals that length. -// -// It returns 0 on success or a decodeErrCodeXxx error code on failure. -func decode(dst, src []byte) int { - var d, s, offset, length int - for s < len(src) { - switch src[s] & 0x03 { - case tagLiteral: - x := uint32(src[s] >> 2) - switch { - case x < 60: - s++ - case x == 60: - s += 2 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-1]) - case x == 61: - s += 3 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-2]) | uint32(src[s-1])<<8 - case x == 62: - s += 4 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 - case x == 63: - s += 5 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 - } - length = int(x) + 1 - if length <= 0 { - return decodeErrCodeUnsupportedLiteralLength - } - if length > len(dst)-d || length > len(src)-s { - return decodeErrCodeCorrupt - } - copy(dst[d:], src[s:s+length]) - d += length - s += length - continue - - case tagCopy1: - s += 2 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - length = 4 + int(src[s-2])>>2&0x7 - offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) - - case tagCopy2: - s += 3 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - length = 1 + int(src[s-3])>>2 - offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) - - case tagCopy4: - s += 5 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - length = 1 + int(src[s-5])>>2 - offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) - } - - if offset <= 0 || d < offset || length > len(dst)-d { - return decodeErrCodeCorrupt - } - // Copy from an earlier sub-slice of dst to a later sub-slice. - // If no overlap, use the built-in copy: - if offset >= length { - copy(dst[d:d+length], dst[d-offset:]) - d += length - continue - } - - // Unlike the built-in copy function, this byte-by-byte copy always runs - // forwards, even if the slices overlap. Conceptually, this is: - // - // d += forwardCopy(dst[d:d+length], dst[d-offset:]) - // - // We align the slices into a and b and show the compiler they are the same size. - // This allows the loop to run without bounds checks. - a := dst[d : d+length] - b := dst[d-offset:] - b = b[:len(a)] - for i := range a { - a[i] = b[i] - } - d += length - } - if d != len(dst) { - return decodeErrCodeCorrupt - } - return 0 -} diff --git a/vendor/github.com/golang/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go deleted file mode 100644 index 7f2365707..000000000 --- a/vendor/github.com/golang/snappy/encode.go +++ /dev/null @@ -1,289 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package snappy - -import ( - "encoding/binary" - "errors" - "io" -) - -// Encode returns the encoded form of src. The returned slice may be a sub- -// slice of dst if dst was large enough to hold the entire encoded block. -// Otherwise, a newly allocated slice will be returned. -// -// The dst and src must not overlap. It is valid to pass a nil dst. -// -// Encode handles the Snappy block format, not the Snappy stream format. -func Encode(dst, src []byte) []byte { - if n := MaxEncodedLen(len(src)); n < 0 { - panic(ErrTooLarge) - } else if len(dst) < n { - dst = make([]byte, n) - } - - // The block starts with the varint-encoded length of the decompressed bytes. - d := binary.PutUvarint(dst, uint64(len(src))) - - for len(src) > 0 { - p := src - src = nil - if len(p) > maxBlockSize { - p, src = p[:maxBlockSize], p[maxBlockSize:] - } - if len(p) < minNonLiteralBlockSize { - d += emitLiteral(dst[d:], p) - } else { - d += encodeBlock(dst[d:], p) - } - } - return dst[:d] -} - -// inputMargin is the minimum number of extra input bytes to keep, inside -// encodeBlock's inner loop. On some architectures, this margin lets us -// implement a fast path for emitLiteral, where the copy of short (<= 16 byte) -// literals can be implemented as a single load to and store from a 16-byte -// register. That literal's actual length can be as short as 1 byte, so this -// can copy up to 15 bytes too much, but that's OK as subsequent iterations of -// the encoding loop will fix up the copy overrun, and this inputMargin ensures -// that we don't overrun the dst and src buffers. -const inputMargin = 16 - 1 - -// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that -// could be encoded with a copy tag. This is the minimum with respect to the -// algorithm used by encodeBlock, not a minimum enforced by the file format. -// -// The encoded output must start with at least a 1 byte literal, as there are -// no previous bytes to copy. A minimal (1 byte) copy after that, generated -// from an emitCopy call in encodeBlock's main loop, would require at least -// another inputMargin bytes, for the reason above: we want any emitLiteral -// calls inside encodeBlock's main loop to use the fast path if possible, which -// requires being able to overrun by inputMargin bytes. Thus, -// minNonLiteralBlockSize equals 1 + 1 + inputMargin. -// -// The C++ code doesn't use this exact threshold, but it could, as discussed at -// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion -// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an -// optimization. It should not affect the encoded form. This is tested by -// TestSameEncodingAsCppShortCopies. -const minNonLiteralBlockSize = 1 + 1 + inputMargin - -// MaxEncodedLen returns the maximum length of a snappy block, given its -// uncompressed length. -// -// It will return a negative value if srcLen is too large to encode. -func MaxEncodedLen(srcLen int) int { - n := uint64(srcLen) - if n > 0xffffffff { - return -1 - } - // Compressed data can be defined as: - // compressed := item* literal* - // item := literal* copy - // - // The trailing literal sequence has a space blowup of at most 62/60 - // since a literal of length 60 needs one tag byte + one extra byte - // for length information. - // - // Item blowup is trickier to measure. Suppose the "copy" op copies - // 4 bytes of data. Because of a special check in the encoding code, - // we produce a 4-byte copy only if the offset is < 65536. Therefore - // the copy op takes 3 bytes to encode, and this type of item leads - // to at most the 62/60 blowup for representing literals. - // - // Suppose the "copy" op copies 5 bytes of data. If the offset is big - // enough, it will take 5 bytes to encode the copy op. Therefore the - // worst case here is a one-byte literal followed by a five-byte copy. - // That is, 6 bytes of input turn into 7 bytes of "compressed" data. - // - // This last factor dominates the blowup, so the final estimate is: - n = 32 + n + n/6 - if n > 0xffffffff { - return -1 - } - return int(n) -} - -var errClosed = errors.New("snappy: Writer is closed") - -// NewWriter returns a new Writer that compresses to w. -// -// The Writer returned does not buffer writes. There is no need to Flush or -// Close such a Writer. -// -// Deprecated: the Writer returned is not suitable for many small writes, only -// for few large writes. Use NewBufferedWriter instead, which is efficient -// regardless of the frequency and shape of the writes, and remember to Close -// that Writer when done. -func NewWriter(w io.Writer) *Writer { - return &Writer{ - w: w, - obuf: make([]byte, obufLen), - } -} - -// NewBufferedWriter returns a new Writer that compresses to w, using the -// framing format described at -// https://github.com/google/snappy/blob/master/framing_format.txt -// -// The Writer returned buffers writes. Users must call Close to guarantee all -// data has been forwarded to the underlying io.Writer. They may also call -// Flush zero or more times before calling Close. -func NewBufferedWriter(w io.Writer) *Writer { - return &Writer{ - w: w, - ibuf: make([]byte, 0, maxBlockSize), - obuf: make([]byte, obufLen), - } -} - -// Writer is an io.Writer that can write Snappy-compressed bytes. -// -// Writer handles the Snappy stream format, not the Snappy block format. -type Writer struct { - w io.Writer - err error - - // ibuf is a buffer for the incoming (uncompressed) bytes. - // - // Its use is optional. For backwards compatibility, Writers created by the - // NewWriter function have ibuf == nil, do not buffer incoming bytes, and - // therefore do not need to be Flush'ed or Close'd. - ibuf []byte - - // obuf is a buffer for the outgoing (compressed) bytes. - obuf []byte - - // wroteStreamHeader is whether we have written the stream header. - wroteStreamHeader bool -} - -// Reset discards the writer's state and switches the Snappy writer to write to -// w. This permits reusing a Writer rather than allocating a new one. -func (w *Writer) Reset(writer io.Writer) { - w.w = writer - w.err = nil - if w.ibuf != nil { - w.ibuf = w.ibuf[:0] - } - w.wroteStreamHeader = false -} - -// Write satisfies the io.Writer interface. -func (w *Writer) Write(p []byte) (nRet int, errRet error) { - if w.ibuf == nil { - // Do not buffer incoming bytes. This does not perform or compress well - // if the caller of Writer.Write writes many small slices. This - // behavior is therefore deprecated, but still supported for backwards - // compatibility with code that doesn't explicitly Flush or Close. - return w.write(p) - } - - // The remainder of this method is based on bufio.Writer.Write from the - // standard library. - - for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { - var n int - if len(w.ibuf) == 0 { - // Large write, empty buffer. - // Write directly from p to avoid copy. - n, _ = w.write(p) - } else { - n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) - w.ibuf = w.ibuf[:len(w.ibuf)+n] - w.Flush() - } - nRet += n - p = p[n:] - } - if w.err != nil { - return nRet, w.err - } - n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) - w.ibuf = w.ibuf[:len(w.ibuf)+n] - nRet += n - return nRet, nil -} - -func (w *Writer) write(p []byte) (nRet int, errRet error) { - if w.err != nil { - return 0, w.err - } - for len(p) > 0 { - obufStart := len(magicChunk) - if !w.wroteStreamHeader { - w.wroteStreamHeader = true - copy(w.obuf, magicChunk) - obufStart = 0 - } - - var uncompressed []byte - if len(p) > maxBlockSize { - uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] - } else { - uncompressed, p = p, nil - } - checksum := crc(uncompressed) - - // Compress the buffer, discarding the result if the improvement - // isn't at least 12.5%. - compressed := Encode(w.obuf[obufHeaderLen:], uncompressed) - chunkType := uint8(chunkTypeCompressedData) - chunkLen := 4 + len(compressed) - obufEnd := obufHeaderLen + len(compressed) - if len(compressed) >= len(uncompressed)-len(uncompressed)/8 { - chunkType = chunkTypeUncompressedData - chunkLen = 4 + len(uncompressed) - obufEnd = obufHeaderLen - } - - // Fill in the per-chunk header that comes before the body. - w.obuf[len(magicChunk)+0] = chunkType - w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) - w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) - w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) - w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) - w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) - w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) - w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) - - if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { - w.err = err - return nRet, err - } - if chunkType == chunkTypeUncompressedData { - if _, err := w.w.Write(uncompressed); err != nil { - w.err = err - return nRet, err - } - } - nRet += len(uncompressed) - } - return nRet, nil -} - -// Flush flushes the Writer to its underlying io.Writer. -func (w *Writer) Flush() error { - if w.err != nil { - return w.err - } - if len(w.ibuf) == 0 { - return nil - } - w.write(w.ibuf) - w.ibuf = w.ibuf[:0] - return w.err -} - -// Close calls Flush and then closes the Writer. -func (w *Writer) Close() error { - w.Flush() - ret := w.err - if w.err == nil { - w.err = errClosed - } - return ret -} diff --git a/vendor/github.com/golang/snappy/encode_amd64.s b/vendor/github.com/golang/snappy/encode_amd64.s deleted file mode 100644 index adfd979fe..000000000 --- a/vendor/github.com/golang/snappy/encode_amd64.s +++ /dev/null @@ -1,730 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" - -// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a -// Go toolchain regression. See https://github.com/golang/go/issues/15426 and -// https://github.com/golang/snappy/issues/29 -// -// As a workaround, the package was built with a known good assembler, and -// those instructions were disassembled by "objdump -d" to yield the -// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 -// style comments, in AT&T asm syntax. Note that rsp here is a physical -// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm). -// The instructions were then encoded as "BYTE $0x.." sequences, which assemble -// fine on Go 1.6. - -// The asm code generally follows the pure Go code in encode_other.go, except -// where marked with a "!!!". - -// ---------------------------------------------------------------------------- - -// func emitLiteral(dst, lit []byte) int -// -// All local variables fit into registers. The register allocation: -// - AX len(lit) -// - BX n -// - DX return value -// - DI &dst[i] -// - R10 &lit[0] -// -// The 24 bytes of stack space is to call runtime·memmove. -// -// The unusual register allocation of local variables, such as R10 for the -// source pointer, matches the allocation used at the call site in encodeBlock, -// which makes it easier to manually inline this function. -TEXT ·emitLiteral(SB), NOSPLIT, $24-56 - MOVQ dst_base+0(FP), DI - MOVQ lit_base+24(FP), R10 - MOVQ lit_len+32(FP), AX - MOVQ AX, DX - MOVL AX, BX - SUBL $1, BX - - CMPL BX, $60 - JLT oneByte - CMPL BX, $256 - JLT twoBytes - -threeBytes: - MOVB $0xf4, 0(DI) - MOVW BX, 1(DI) - ADDQ $3, DI - ADDQ $3, DX - JMP memmove - -twoBytes: - MOVB $0xf0, 0(DI) - MOVB BX, 1(DI) - ADDQ $2, DI - ADDQ $2, DX - JMP memmove - -oneByte: - SHLB $2, BX - MOVB BX, 0(DI) - ADDQ $1, DI - ADDQ $1, DX - -memmove: - MOVQ DX, ret+48(FP) - - // copy(dst[i:], lit) - // - // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push - // DI, R10 and AX as arguments. - MOVQ DI, 0(SP) - MOVQ R10, 8(SP) - MOVQ AX, 16(SP) - CALL runtime·memmove(SB) - RET - -// ---------------------------------------------------------------------------- - -// func emitCopy(dst []byte, offset, length int) int -// -// All local variables fit into registers. The register allocation: -// - AX length -// - SI &dst[0] -// - DI &dst[i] -// - R11 offset -// -// The unusual register allocation of local variables, such as R11 for the -// offset, matches the allocation used at the call site in encodeBlock, which -// makes it easier to manually inline this function. -TEXT ·emitCopy(SB), NOSPLIT, $0-48 - MOVQ dst_base+0(FP), DI - MOVQ DI, SI - MOVQ offset+24(FP), R11 - MOVQ length+32(FP), AX - -loop0: - // for length >= 68 { etc } - CMPL AX, $68 - JLT step1 - - // Emit a length 64 copy, encoded as 3 bytes. - MOVB $0xfe, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $64, AX - JMP loop0 - -step1: - // if length > 64 { etc } - CMPL AX, $64 - JLE step2 - - // Emit a length 60 copy, encoded as 3 bytes. - MOVB $0xee, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $60, AX - -step2: - // if length >= 12 || offset >= 2048 { goto step3 } - CMPL AX, $12 - JGE step3 - CMPL R11, $2048 - JGE step3 - - // Emit the remaining copy, encoded as 2 bytes. - MOVB R11, 1(DI) - SHRL $8, R11 - SHLB $5, R11 - SUBB $4, AX - SHLB $2, AX - ORB AX, R11 - ORB $1, R11 - MOVB R11, 0(DI) - ADDQ $2, DI - - // Return the number of bytes written. - SUBQ SI, DI - MOVQ DI, ret+40(FP) - RET - -step3: - // Emit the remaining copy, encoded as 3 bytes. - SUBL $1, AX - SHLB $2, AX - ORB $2, AX - MOVB AX, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - - // Return the number of bytes written. - SUBQ SI, DI - MOVQ DI, ret+40(FP) - RET - -// ---------------------------------------------------------------------------- - -// func extendMatch(src []byte, i, j int) int -// -// All local variables fit into registers. The register allocation: -// - DX &src[0] -// - SI &src[j] -// - R13 &src[len(src) - 8] -// - R14 &src[len(src)] -// - R15 &src[i] -// -// The unusual register allocation of local variables, such as R15 for a source -// pointer, matches the allocation used at the call site in encodeBlock, which -// makes it easier to manually inline this function. -TEXT ·extendMatch(SB), NOSPLIT, $0-48 - MOVQ src_base+0(FP), DX - MOVQ src_len+8(FP), R14 - MOVQ i+24(FP), R15 - MOVQ j+32(FP), SI - ADDQ DX, R14 - ADDQ DX, R15 - ADDQ DX, SI - MOVQ R14, R13 - SUBQ $8, R13 - -cmp8: - // As long as we are 8 or more bytes before the end of src, we can load and - // compare 8 bytes at a time. If those 8 bytes are equal, repeat. - CMPQ SI, R13 - JA cmp1 - MOVQ (R15), AX - MOVQ (SI), BX - CMPQ AX, BX - JNE bsf - ADDQ $8, R15 - ADDQ $8, SI - JMP cmp8 - -bsf: - // If those 8 bytes were not equal, XOR the two 8 byte values, and return - // the index of the first byte that differs. The BSF instruction finds the - // least significant 1 bit, the amd64 architecture is little-endian, and - // the shift by 3 converts a bit index to a byte index. - XORQ AX, BX - BSFQ BX, BX - SHRQ $3, BX - ADDQ BX, SI - - // Convert from &src[ret] to ret. - SUBQ DX, SI - MOVQ SI, ret+40(FP) - RET - -cmp1: - // In src's tail, compare 1 byte at a time. - CMPQ SI, R14 - JAE extendMatchEnd - MOVB (R15), AX - MOVB (SI), BX - CMPB AX, BX - JNE extendMatchEnd - ADDQ $1, R15 - ADDQ $1, SI - JMP cmp1 - -extendMatchEnd: - // Convert from &src[ret] to ret. - SUBQ DX, SI - MOVQ SI, ret+40(FP) - RET - -// ---------------------------------------------------------------------------- - -// func encodeBlock(dst, src []byte) (d int) -// -// All local variables fit into registers, other than "var table". The register -// allocation: -// - AX . . -// - BX . . -// - CX 56 shift (note that amd64 shifts by non-immediates must use CX). -// - DX 64 &src[0], tableSize -// - SI 72 &src[s] -// - DI 80 &dst[d] -// - R9 88 sLimit -// - R10 . &src[nextEmit] -// - R11 96 prevHash, currHash, nextHash, offset -// - R12 104 &src[base], skip -// - R13 . &src[nextS], &src[len(src) - 8] -// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x -// - R15 112 candidate -// -// The second column (56, 64, etc) is the stack offset to spill the registers -// when calling other functions. We could pack this slightly tighter, but it's -// simpler to have a dedicated spill map independent of the function called. -// -// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An -// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill -// local variables (registers) during calls gives 32768 + 56 + 64 = 32888. -TEXT ·encodeBlock(SB), 0, $32888-56 - MOVQ dst_base+0(FP), DI - MOVQ src_base+24(FP), SI - MOVQ src_len+32(FP), R14 - - // shift, tableSize := uint32(32-8), 1<<8 - MOVQ $24, CX - MOVQ $256, DX - -calcShift: - // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { - // shift-- - // } - CMPQ DX, $16384 - JGE varTable - CMPQ DX, R14 - JGE varTable - SUBQ $1, CX - SHLQ $1, DX - JMP calcShift - -varTable: - // var table [maxTableSize]uint16 - // - // In the asm code, unlike the Go code, we can zero-initialize only the - // first tableSize elements. Each uint16 element is 2 bytes and each MOVOU - // writes 16 bytes, so we can do only tableSize/8 writes instead of the - // 2048 writes that would zero-initialize all of table's 32768 bytes. - SHRQ $3, DX - LEAQ table-32768(SP), BX - PXOR X0, X0 - -memclr: - MOVOU X0, 0(BX) - ADDQ $16, BX - SUBQ $1, DX - JNZ memclr - - // !!! DX = &src[0] - MOVQ SI, DX - - // sLimit := len(src) - inputMargin - MOVQ R14, R9 - SUBQ $15, R9 - - // !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't - // change for the rest of the function. - MOVQ CX, 56(SP) - MOVQ DX, 64(SP) - MOVQ R9, 88(SP) - - // nextEmit := 0 - MOVQ DX, R10 - - // s := 1 - ADDQ $1, SI - - // nextHash := hash(load32(src, s), shift) - MOVL 0(SI), R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - -outer: - // for { etc } - - // skip := 32 - MOVQ $32, R12 - - // nextS := s - MOVQ SI, R13 - - // candidate := 0 - MOVQ $0, R15 - -inner0: - // for { etc } - - // s := nextS - MOVQ R13, SI - - // bytesBetweenHashLookups := skip >> 5 - MOVQ R12, R14 - SHRQ $5, R14 - - // nextS = s + bytesBetweenHashLookups - ADDQ R14, R13 - - // skip += bytesBetweenHashLookups - ADDQ R14, R12 - - // if nextS > sLimit { goto emitRemainder } - MOVQ R13, AX - SUBQ DX, AX - CMPQ AX, R9 - JA emitRemainder - - // candidate = int(table[nextHash]) - // XXX: MOVWQZX table-32768(SP)(R11*2), R15 - // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 - BYTE $0x4e - BYTE $0x0f - BYTE $0xb7 - BYTE $0x7c - BYTE $0x5c - BYTE $0x78 - - // table[nextHash] = uint16(s) - MOVQ SI, AX - SUBQ DX, AX - - // XXX: MOVW AX, table-32768(SP)(R11*2) - // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) - BYTE $0x66 - BYTE $0x42 - BYTE $0x89 - BYTE $0x44 - BYTE $0x5c - BYTE $0x78 - - // nextHash = hash(load32(src, nextS), shift) - MOVL 0(R13), R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // if load32(src, s) != load32(src, candidate) { continue } break - MOVL 0(SI), AX - MOVL (DX)(R15*1), BX - CMPL AX, BX - JNE inner0 - -fourByteMatch: - // As per the encode_other.go code: - // - // A 4-byte match has been found. We'll later see etc. - - // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment - // on inputMargin in encode.go. - MOVQ SI, AX - SUBQ R10, AX - CMPQ AX, $16 - JLE emitLiteralFastPath - - // ---------------------------------------- - // Begin inline of the emitLiteral call. - // - // d += emitLiteral(dst[d:], src[nextEmit:s]) - - MOVL AX, BX - SUBL $1, BX - - CMPL BX, $60 - JLT inlineEmitLiteralOneByte - CMPL BX, $256 - JLT inlineEmitLiteralTwoBytes - -inlineEmitLiteralThreeBytes: - MOVB $0xf4, 0(DI) - MOVW BX, 1(DI) - ADDQ $3, DI - JMP inlineEmitLiteralMemmove - -inlineEmitLiteralTwoBytes: - MOVB $0xf0, 0(DI) - MOVB BX, 1(DI) - ADDQ $2, DI - JMP inlineEmitLiteralMemmove - -inlineEmitLiteralOneByte: - SHLB $2, BX - MOVB BX, 0(DI) - ADDQ $1, DI - -inlineEmitLiteralMemmove: - // Spill local variables (registers) onto the stack; call; unspill. - // - // copy(dst[i:], lit) - // - // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push - // DI, R10 and AX as arguments. - MOVQ DI, 0(SP) - MOVQ R10, 8(SP) - MOVQ AX, 16(SP) - ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)". - MOVQ SI, 72(SP) - MOVQ DI, 80(SP) - MOVQ R15, 112(SP) - CALL runtime·memmove(SB) - MOVQ 56(SP), CX - MOVQ 64(SP), DX - MOVQ 72(SP), SI - MOVQ 80(SP), DI - MOVQ 88(SP), R9 - MOVQ 112(SP), R15 - JMP inner1 - -inlineEmitLiteralEnd: - // End inline of the emitLiteral call. - // ---------------------------------------- - -emitLiteralFastPath: - // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". - MOVB AX, BX - SUBB $1, BX - SHLB $2, BX - MOVB BX, (DI) - ADDQ $1, DI - - // !!! Implement the copy from lit to dst as a 16-byte load and store. - // (Encode's documentation says that dst and src must not overlap.) - // - // This always copies 16 bytes, instead of only len(lit) bytes, but that's - // OK. Subsequent iterations will fix up the overrun. - // - // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or - // 16-byte loads and stores. This technique probably wouldn't be as - // effective on architectures that are fussier about alignment. - MOVOU 0(R10), X0 - MOVOU X0, 0(DI) - ADDQ AX, DI - -inner1: - // for { etc } - - // base := s - MOVQ SI, R12 - - // !!! offset := base - candidate - MOVQ R12, R11 - SUBQ R15, R11 - SUBQ DX, R11 - - // ---------------------------------------- - // Begin inline of the extendMatch call. - // - // s = extendMatch(src, candidate+4, s+4) - - // !!! R14 = &src[len(src)] - MOVQ src_len+32(FP), R14 - ADDQ DX, R14 - - // !!! R13 = &src[len(src) - 8] - MOVQ R14, R13 - SUBQ $8, R13 - - // !!! R15 = &src[candidate + 4] - ADDQ $4, R15 - ADDQ DX, R15 - - // !!! s += 4 - ADDQ $4, SI - -inlineExtendMatchCmp8: - // As long as we are 8 or more bytes before the end of src, we can load and - // compare 8 bytes at a time. If those 8 bytes are equal, repeat. - CMPQ SI, R13 - JA inlineExtendMatchCmp1 - MOVQ (R15), AX - MOVQ (SI), BX - CMPQ AX, BX - JNE inlineExtendMatchBSF - ADDQ $8, R15 - ADDQ $8, SI - JMP inlineExtendMatchCmp8 - -inlineExtendMatchBSF: - // If those 8 bytes were not equal, XOR the two 8 byte values, and return - // the index of the first byte that differs. The BSF instruction finds the - // least significant 1 bit, the amd64 architecture is little-endian, and - // the shift by 3 converts a bit index to a byte index. - XORQ AX, BX - BSFQ BX, BX - SHRQ $3, BX - ADDQ BX, SI - JMP inlineExtendMatchEnd - -inlineExtendMatchCmp1: - // In src's tail, compare 1 byte at a time. - CMPQ SI, R14 - JAE inlineExtendMatchEnd - MOVB (R15), AX - MOVB (SI), BX - CMPB AX, BX - JNE inlineExtendMatchEnd - ADDQ $1, R15 - ADDQ $1, SI - JMP inlineExtendMatchCmp1 - -inlineExtendMatchEnd: - // End inline of the extendMatch call. - // ---------------------------------------- - - // ---------------------------------------- - // Begin inline of the emitCopy call. - // - // d += emitCopy(dst[d:], base-candidate, s-base) - - // !!! length := s - base - MOVQ SI, AX - SUBQ R12, AX - -inlineEmitCopyLoop0: - // for length >= 68 { etc } - CMPL AX, $68 - JLT inlineEmitCopyStep1 - - // Emit a length 64 copy, encoded as 3 bytes. - MOVB $0xfe, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $64, AX - JMP inlineEmitCopyLoop0 - -inlineEmitCopyStep1: - // if length > 64 { etc } - CMPL AX, $64 - JLE inlineEmitCopyStep2 - - // Emit a length 60 copy, encoded as 3 bytes. - MOVB $0xee, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $60, AX - -inlineEmitCopyStep2: - // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } - CMPL AX, $12 - JGE inlineEmitCopyStep3 - CMPL R11, $2048 - JGE inlineEmitCopyStep3 - - // Emit the remaining copy, encoded as 2 bytes. - MOVB R11, 1(DI) - SHRL $8, R11 - SHLB $5, R11 - SUBB $4, AX - SHLB $2, AX - ORB AX, R11 - ORB $1, R11 - MOVB R11, 0(DI) - ADDQ $2, DI - JMP inlineEmitCopyEnd - -inlineEmitCopyStep3: - // Emit the remaining copy, encoded as 3 bytes. - SUBL $1, AX - SHLB $2, AX - ORB $2, AX - MOVB AX, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - -inlineEmitCopyEnd: - // End inline of the emitCopy call. - // ---------------------------------------- - - // nextEmit = s - MOVQ SI, R10 - - // if s >= sLimit { goto emitRemainder } - MOVQ SI, AX - SUBQ DX, AX - CMPQ AX, R9 - JAE emitRemainder - - // As per the encode_other.go code: - // - // We could immediately etc. - - // x := load64(src, s-1) - MOVQ -1(SI), R14 - - // prevHash := hash(uint32(x>>0), shift) - MOVL R14, R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // table[prevHash] = uint16(s-1) - MOVQ SI, AX - SUBQ DX, AX - SUBQ $1, AX - - // XXX: MOVW AX, table-32768(SP)(R11*2) - // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) - BYTE $0x66 - BYTE $0x42 - BYTE $0x89 - BYTE $0x44 - BYTE $0x5c - BYTE $0x78 - - // currHash := hash(uint32(x>>8), shift) - SHRQ $8, R14 - MOVL R14, R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // candidate = int(table[currHash]) - // XXX: MOVWQZX table-32768(SP)(R11*2), R15 - // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 - BYTE $0x4e - BYTE $0x0f - BYTE $0xb7 - BYTE $0x7c - BYTE $0x5c - BYTE $0x78 - - // table[currHash] = uint16(s) - ADDQ $1, AX - - // XXX: MOVW AX, table-32768(SP)(R11*2) - // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) - BYTE $0x66 - BYTE $0x42 - BYTE $0x89 - BYTE $0x44 - BYTE $0x5c - BYTE $0x78 - - // if uint32(x>>8) == load32(src, candidate) { continue } - MOVL (DX)(R15*1), BX - CMPL R14, BX - JEQ inner1 - - // nextHash = hash(uint32(x>>16), shift) - SHRQ $8, R14 - MOVL R14, R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // s++ - ADDQ $1, SI - - // break out of the inner1 for loop, i.e. continue the outer loop. - JMP outer - -emitRemainder: - // if nextEmit < len(src) { etc } - MOVQ src_len+32(FP), AX - ADDQ DX, AX - CMPQ R10, AX - JEQ encodeBlockEnd - - // d += emitLiteral(dst[d:], src[nextEmit:]) - // - // Push args. - MOVQ DI, 0(SP) - MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative. - MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative. - MOVQ R10, 24(SP) - SUBQ R10, AX - MOVQ AX, 32(SP) - MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative. - - // Spill local variables (registers) onto the stack; call; unspill. - MOVQ DI, 80(SP) - CALL ·emitLiteral(SB) - MOVQ 80(SP), DI - - // Finish the "d +=" part of "d += emitLiteral(etc)". - ADDQ 48(SP), DI - -encodeBlockEnd: - MOVQ dst_base+0(FP), AX - SUBQ AX, DI - MOVQ DI, d+48(FP) - RET diff --git a/vendor/github.com/golang/snappy/encode_arm64.s b/vendor/github.com/golang/snappy/encode_arm64.s deleted file mode 100644 index f8d54adfc..000000000 --- a/vendor/github.com/golang/snappy/encode_arm64.s +++ /dev/null @@ -1,722 +0,0 @@ -// Copyright 2020 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" - -// The asm code generally follows the pure Go code in encode_other.go, except -// where marked with a "!!!". - -// ---------------------------------------------------------------------------- - -// func emitLiteral(dst, lit []byte) int -// -// All local variables fit into registers. The register allocation: -// - R3 len(lit) -// - R4 n -// - R6 return value -// - R8 &dst[i] -// - R10 &lit[0] -// -// The 32 bytes of stack space is to call runtime·memmove. -// -// The unusual register allocation of local variables, such as R10 for the -// source pointer, matches the allocation used at the call site in encodeBlock, -// which makes it easier to manually inline this function. -TEXT ·emitLiteral(SB), NOSPLIT, $32-56 - MOVD dst_base+0(FP), R8 - MOVD lit_base+24(FP), R10 - MOVD lit_len+32(FP), R3 - MOVD R3, R6 - MOVW R3, R4 - SUBW $1, R4, R4 - - CMPW $60, R4 - BLT oneByte - CMPW $256, R4 - BLT twoBytes - -threeBytes: - MOVD $0xf4, R2 - MOVB R2, 0(R8) - MOVW R4, 1(R8) - ADD $3, R8, R8 - ADD $3, R6, R6 - B memmove - -twoBytes: - MOVD $0xf0, R2 - MOVB R2, 0(R8) - MOVB R4, 1(R8) - ADD $2, R8, R8 - ADD $2, R6, R6 - B memmove - -oneByte: - LSLW $2, R4, R4 - MOVB R4, 0(R8) - ADD $1, R8, R8 - ADD $1, R6, R6 - -memmove: - MOVD R6, ret+48(FP) - - // copy(dst[i:], lit) - // - // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push - // R8, R10 and R3 as arguments. - MOVD R8, 8(RSP) - MOVD R10, 16(RSP) - MOVD R3, 24(RSP) - CALL runtime·memmove(SB) - RET - -// ---------------------------------------------------------------------------- - -// func emitCopy(dst []byte, offset, length int) int -// -// All local variables fit into registers. The register allocation: -// - R3 length -// - R7 &dst[0] -// - R8 &dst[i] -// - R11 offset -// -// The unusual register allocation of local variables, such as R11 for the -// offset, matches the allocation used at the call site in encodeBlock, which -// makes it easier to manually inline this function. -TEXT ·emitCopy(SB), NOSPLIT, $0-48 - MOVD dst_base+0(FP), R8 - MOVD R8, R7 - MOVD offset+24(FP), R11 - MOVD length+32(FP), R3 - -loop0: - // for length >= 68 { etc } - CMPW $68, R3 - BLT step1 - - // Emit a length 64 copy, encoded as 3 bytes. - MOVD $0xfe, R2 - MOVB R2, 0(R8) - MOVW R11, 1(R8) - ADD $3, R8, R8 - SUB $64, R3, R3 - B loop0 - -step1: - // if length > 64 { etc } - CMP $64, R3 - BLE step2 - - // Emit a length 60 copy, encoded as 3 bytes. - MOVD $0xee, R2 - MOVB R2, 0(R8) - MOVW R11, 1(R8) - ADD $3, R8, R8 - SUB $60, R3, R3 - -step2: - // if length >= 12 || offset >= 2048 { goto step3 } - CMP $12, R3 - BGE step3 - CMPW $2048, R11 - BGE step3 - - // Emit the remaining copy, encoded as 2 bytes. - MOVB R11, 1(R8) - LSRW $3, R11, R11 - AND $0xe0, R11, R11 - SUB $4, R3, R3 - LSLW $2, R3 - AND $0xff, R3, R3 - ORRW R3, R11, R11 - ORRW $1, R11, R11 - MOVB R11, 0(R8) - ADD $2, R8, R8 - - // Return the number of bytes written. - SUB R7, R8, R8 - MOVD R8, ret+40(FP) - RET - -step3: - // Emit the remaining copy, encoded as 3 bytes. - SUB $1, R3, R3 - AND $0xff, R3, R3 - LSLW $2, R3, R3 - ORRW $2, R3, R3 - MOVB R3, 0(R8) - MOVW R11, 1(R8) - ADD $3, R8, R8 - - // Return the number of bytes written. - SUB R7, R8, R8 - MOVD R8, ret+40(FP) - RET - -// ---------------------------------------------------------------------------- - -// func extendMatch(src []byte, i, j int) int -// -// All local variables fit into registers. The register allocation: -// - R6 &src[0] -// - R7 &src[j] -// - R13 &src[len(src) - 8] -// - R14 &src[len(src)] -// - R15 &src[i] -// -// The unusual register allocation of local variables, such as R15 for a source -// pointer, matches the allocation used at the call site in encodeBlock, which -// makes it easier to manually inline this function. -TEXT ·extendMatch(SB), NOSPLIT, $0-48 - MOVD src_base+0(FP), R6 - MOVD src_len+8(FP), R14 - MOVD i+24(FP), R15 - MOVD j+32(FP), R7 - ADD R6, R14, R14 - ADD R6, R15, R15 - ADD R6, R7, R7 - MOVD R14, R13 - SUB $8, R13, R13 - -cmp8: - // As long as we are 8 or more bytes before the end of src, we can load and - // compare 8 bytes at a time. If those 8 bytes are equal, repeat. - CMP R13, R7 - BHI cmp1 - MOVD (R15), R3 - MOVD (R7), R4 - CMP R4, R3 - BNE bsf - ADD $8, R15, R15 - ADD $8, R7, R7 - B cmp8 - -bsf: - // If those 8 bytes were not equal, XOR the two 8 byte values, and return - // the index of the first byte that differs. - // RBIT reverses the bit order, then CLZ counts the leading zeros, the - // combination of which finds the least significant bit which is set. - // The arm64 architecture is little-endian, and the shift by 3 converts - // a bit index to a byte index. - EOR R3, R4, R4 - RBIT R4, R4 - CLZ R4, R4 - ADD R4>>3, R7, R7 - - // Convert from &src[ret] to ret. - SUB R6, R7, R7 - MOVD R7, ret+40(FP) - RET - -cmp1: - // In src's tail, compare 1 byte at a time. - CMP R7, R14 - BLS extendMatchEnd - MOVB (R15), R3 - MOVB (R7), R4 - CMP R4, R3 - BNE extendMatchEnd - ADD $1, R15, R15 - ADD $1, R7, R7 - B cmp1 - -extendMatchEnd: - // Convert from &src[ret] to ret. - SUB R6, R7, R7 - MOVD R7, ret+40(FP) - RET - -// ---------------------------------------------------------------------------- - -// func encodeBlock(dst, src []byte) (d int) -// -// All local variables fit into registers, other than "var table". The register -// allocation: -// - R3 . . -// - R4 . . -// - R5 64 shift -// - R6 72 &src[0], tableSize -// - R7 80 &src[s] -// - R8 88 &dst[d] -// - R9 96 sLimit -// - R10 . &src[nextEmit] -// - R11 104 prevHash, currHash, nextHash, offset -// - R12 112 &src[base], skip -// - R13 . &src[nextS], &src[len(src) - 8] -// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x -// - R15 120 candidate -// - R16 . hash constant, 0x1e35a7bd -// - R17 . &table -// - . 128 table -// -// The second column (64, 72, etc) is the stack offset to spill the registers -// when calling other functions. We could pack this slightly tighter, but it's -// simpler to have a dedicated spill map independent of the function called. -// -// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An -// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill -// local variables (registers) during calls gives 32768 + 64 + 64 = 32896. -TEXT ·encodeBlock(SB), 0, $32896-56 - MOVD dst_base+0(FP), R8 - MOVD src_base+24(FP), R7 - MOVD src_len+32(FP), R14 - - // shift, tableSize := uint32(32-8), 1<<8 - MOVD $24, R5 - MOVD $256, R6 - MOVW $0xa7bd, R16 - MOVKW $(0x1e35<<16), R16 - -calcShift: - // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { - // shift-- - // } - MOVD $16384, R2 - CMP R2, R6 - BGE varTable - CMP R14, R6 - BGE varTable - SUB $1, R5, R5 - LSL $1, R6, R6 - B calcShift - -varTable: - // var table [maxTableSize]uint16 - // - // In the asm code, unlike the Go code, we can zero-initialize only the - // first tableSize elements. Each uint16 element is 2 bytes and each - // iterations writes 64 bytes, so we can do only tableSize/32 writes - // instead of the 2048 writes that would zero-initialize all of table's - // 32768 bytes. This clear could overrun the first tableSize elements, but - // it won't overrun the allocated stack size. - ADD $128, RSP, R17 - MOVD R17, R4 - - // !!! R6 = &src[tableSize] - ADD R6<<1, R17, R6 - -memclr: - STP.P (ZR, ZR), 64(R4) - STP (ZR, ZR), -48(R4) - STP (ZR, ZR), -32(R4) - STP (ZR, ZR), -16(R4) - CMP R4, R6 - BHI memclr - - // !!! R6 = &src[0] - MOVD R7, R6 - - // sLimit := len(src) - inputMargin - MOVD R14, R9 - SUB $15, R9, R9 - - // !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't - // change for the rest of the function. - MOVD R5, 64(RSP) - MOVD R6, 72(RSP) - MOVD R9, 96(RSP) - - // nextEmit := 0 - MOVD R6, R10 - - // s := 1 - ADD $1, R7, R7 - - // nextHash := hash(load32(src, s), shift) - MOVW 0(R7), R11 - MULW R16, R11, R11 - LSRW R5, R11, R11 - -outer: - // for { etc } - - // skip := 32 - MOVD $32, R12 - - // nextS := s - MOVD R7, R13 - - // candidate := 0 - MOVD $0, R15 - -inner0: - // for { etc } - - // s := nextS - MOVD R13, R7 - - // bytesBetweenHashLookups := skip >> 5 - MOVD R12, R14 - LSR $5, R14, R14 - - // nextS = s + bytesBetweenHashLookups - ADD R14, R13, R13 - - // skip += bytesBetweenHashLookups - ADD R14, R12, R12 - - // if nextS > sLimit { goto emitRemainder } - MOVD R13, R3 - SUB R6, R3, R3 - CMP R9, R3 - BHI emitRemainder - - // candidate = int(table[nextHash]) - MOVHU 0(R17)(R11<<1), R15 - - // table[nextHash] = uint16(s) - MOVD R7, R3 - SUB R6, R3, R3 - - MOVH R3, 0(R17)(R11<<1) - - // nextHash = hash(load32(src, nextS), shift) - MOVW 0(R13), R11 - MULW R16, R11 - LSRW R5, R11, R11 - - // if load32(src, s) != load32(src, candidate) { continue } break - MOVW 0(R7), R3 - MOVW (R6)(R15), R4 - CMPW R4, R3 - BNE inner0 - -fourByteMatch: - // As per the encode_other.go code: - // - // A 4-byte match has been found. We'll later see etc. - - // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment - // on inputMargin in encode.go. - MOVD R7, R3 - SUB R10, R3, R3 - CMP $16, R3 - BLE emitLiteralFastPath - - // ---------------------------------------- - // Begin inline of the emitLiteral call. - // - // d += emitLiteral(dst[d:], src[nextEmit:s]) - - MOVW R3, R4 - SUBW $1, R4, R4 - - MOVW $60, R2 - CMPW R2, R4 - BLT inlineEmitLiteralOneByte - MOVW $256, R2 - CMPW R2, R4 - BLT inlineEmitLiteralTwoBytes - -inlineEmitLiteralThreeBytes: - MOVD $0xf4, R1 - MOVB R1, 0(R8) - MOVW R4, 1(R8) - ADD $3, R8, R8 - B inlineEmitLiteralMemmove - -inlineEmitLiteralTwoBytes: - MOVD $0xf0, R1 - MOVB R1, 0(R8) - MOVB R4, 1(R8) - ADD $2, R8, R8 - B inlineEmitLiteralMemmove - -inlineEmitLiteralOneByte: - LSLW $2, R4, R4 - MOVB R4, 0(R8) - ADD $1, R8, R8 - -inlineEmitLiteralMemmove: - // Spill local variables (registers) onto the stack; call; unspill. - // - // copy(dst[i:], lit) - // - // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push - // R8, R10 and R3 as arguments. - MOVD R8, 8(RSP) - MOVD R10, 16(RSP) - MOVD R3, 24(RSP) - - // Finish the "d +=" part of "d += emitLiteral(etc)". - ADD R3, R8, R8 - MOVD R7, 80(RSP) - MOVD R8, 88(RSP) - MOVD R15, 120(RSP) - CALL runtime·memmove(SB) - MOVD 64(RSP), R5 - MOVD 72(RSP), R6 - MOVD 80(RSP), R7 - MOVD 88(RSP), R8 - MOVD 96(RSP), R9 - MOVD 120(RSP), R15 - ADD $128, RSP, R17 - MOVW $0xa7bd, R16 - MOVKW $(0x1e35<<16), R16 - B inner1 - -inlineEmitLiteralEnd: - // End inline of the emitLiteral call. - // ---------------------------------------- - -emitLiteralFastPath: - // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". - MOVB R3, R4 - SUBW $1, R4, R4 - AND $0xff, R4, R4 - LSLW $2, R4, R4 - MOVB R4, (R8) - ADD $1, R8, R8 - - // !!! Implement the copy from lit to dst as a 16-byte load and store. - // (Encode's documentation says that dst and src must not overlap.) - // - // This always copies 16 bytes, instead of only len(lit) bytes, but that's - // OK. Subsequent iterations will fix up the overrun. - // - // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or - // 16-byte loads and stores. This technique probably wouldn't be as - // effective on architectures that are fussier about alignment. - LDP 0(R10), (R0, R1) - STP (R0, R1), 0(R8) - ADD R3, R8, R8 - -inner1: - // for { etc } - - // base := s - MOVD R7, R12 - - // !!! offset := base - candidate - MOVD R12, R11 - SUB R15, R11, R11 - SUB R6, R11, R11 - - // ---------------------------------------- - // Begin inline of the extendMatch call. - // - // s = extendMatch(src, candidate+4, s+4) - - // !!! R14 = &src[len(src)] - MOVD src_len+32(FP), R14 - ADD R6, R14, R14 - - // !!! R13 = &src[len(src) - 8] - MOVD R14, R13 - SUB $8, R13, R13 - - // !!! R15 = &src[candidate + 4] - ADD $4, R15, R15 - ADD R6, R15, R15 - - // !!! s += 4 - ADD $4, R7, R7 - -inlineExtendMatchCmp8: - // As long as we are 8 or more bytes before the end of src, we can load and - // compare 8 bytes at a time. If those 8 bytes are equal, repeat. - CMP R13, R7 - BHI inlineExtendMatchCmp1 - MOVD (R15), R3 - MOVD (R7), R4 - CMP R4, R3 - BNE inlineExtendMatchBSF - ADD $8, R15, R15 - ADD $8, R7, R7 - B inlineExtendMatchCmp8 - -inlineExtendMatchBSF: - // If those 8 bytes were not equal, XOR the two 8 byte values, and return - // the index of the first byte that differs. - // RBIT reverses the bit order, then CLZ counts the leading zeros, the - // combination of which finds the least significant bit which is set. - // The arm64 architecture is little-endian, and the shift by 3 converts - // a bit index to a byte index. - EOR R3, R4, R4 - RBIT R4, R4 - CLZ R4, R4 - ADD R4>>3, R7, R7 - B inlineExtendMatchEnd - -inlineExtendMatchCmp1: - // In src's tail, compare 1 byte at a time. - CMP R7, R14 - BLS inlineExtendMatchEnd - MOVB (R15), R3 - MOVB (R7), R4 - CMP R4, R3 - BNE inlineExtendMatchEnd - ADD $1, R15, R15 - ADD $1, R7, R7 - B inlineExtendMatchCmp1 - -inlineExtendMatchEnd: - // End inline of the extendMatch call. - // ---------------------------------------- - - // ---------------------------------------- - // Begin inline of the emitCopy call. - // - // d += emitCopy(dst[d:], base-candidate, s-base) - - // !!! length := s - base - MOVD R7, R3 - SUB R12, R3, R3 - -inlineEmitCopyLoop0: - // for length >= 68 { etc } - MOVW $68, R2 - CMPW R2, R3 - BLT inlineEmitCopyStep1 - - // Emit a length 64 copy, encoded as 3 bytes. - MOVD $0xfe, R1 - MOVB R1, 0(R8) - MOVW R11, 1(R8) - ADD $3, R8, R8 - SUBW $64, R3, R3 - B inlineEmitCopyLoop0 - -inlineEmitCopyStep1: - // if length > 64 { etc } - MOVW $64, R2 - CMPW R2, R3 - BLE inlineEmitCopyStep2 - - // Emit a length 60 copy, encoded as 3 bytes. - MOVD $0xee, R1 - MOVB R1, 0(R8) - MOVW R11, 1(R8) - ADD $3, R8, R8 - SUBW $60, R3, R3 - -inlineEmitCopyStep2: - // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } - MOVW $12, R2 - CMPW R2, R3 - BGE inlineEmitCopyStep3 - MOVW $2048, R2 - CMPW R2, R11 - BGE inlineEmitCopyStep3 - - // Emit the remaining copy, encoded as 2 bytes. - MOVB R11, 1(R8) - LSRW $8, R11, R11 - LSLW $5, R11, R11 - SUBW $4, R3, R3 - AND $0xff, R3, R3 - LSLW $2, R3, R3 - ORRW R3, R11, R11 - ORRW $1, R11, R11 - MOVB R11, 0(R8) - ADD $2, R8, R8 - B inlineEmitCopyEnd - -inlineEmitCopyStep3: - // Emit the remaining copy, encoded as 3 bytes. - SUBW $1, R3, R3 - LSLW $2, R3, R3 - ORRW $2, R3, R3 - MOVB R3, 0(R8) - MOVW R11, 1(R8) - ADD $3, R8, R8 - -inlineEmitCopyEnd: - // End inline of the emitCopy call. - // ---------------------------------------- - - // nextEmit = s - MOVD R7, R10 - - // if s >= sLimit { goto emitRemainder } - MOVD R7, R3 - SUB R6, R3, R3 - CMP R3, R9 - BLS emitRemainder - - // As per the encode_other.go code: - // - // We could immediately etc. - - // x := load64(src, s-1) - MOVD -1(R7), R14 - - // prevHash := hash(uint32(x>>0), shift) - MOVW R14, R11 - MULW R16, R11, R11 - LSRW R5, R11, R11 - - // table[prevHash] = uint16(s-1) - MOVD R7, R3 - SUB R6, R3, R3 - SUB $1, R3, R3 - - MOVHU R3, 0(R17)(R11<<1) - - // currHash := hash(uint32(x>>8), shift) - LSR $8, R14, R14 - MOVW R14, R11 - MULW R16, R11, R11 - LSRW R5, R11, R11 - - // candidate = int(table[currHash]) - MOVHU 0(R17)(R11<<1), R15 - - // table[currHash] = uint16(s) - ADD $1, R3, R3 - MOVHU R3, 0(R17)(R11<<1) - - // if uint32(x>>8) == load32(src, candidate) { continue } - MOVW (R6)(R15), R4 - CMPW R4, R14 - BEQ inner1 - - // nextHash = hash(uint32(x>>16), shift) - LSR $8, R14, R14 - MOVW R14, R11 - MULW R16, R11, R11 - LSRW R5, R11, R11 - - // s++ - ADD $1, R7, R7 - - // break out of the inner1 for loop, i.e. continue the outer loop. - B outer - -emitRemainder: - // if nextEmit < len(src) { etc } - MOVD src_len+32(FP), R3 - ADD R6, R3, R3 - CMP R3, R10 - BEQ encodeBlockEnd - - // d += emitLiteral(dst[d:], src[nextEmit:]) - // - // Push args. - MOVD R8, 8(RSP) - MOVD $0, 16(RSP) // Unnecessary, as the callee ignores it, but conservative. - MOVD $0, 24(RSP) // Unnecessary, as the callee ignores it, but conservative. - MOVD R10, 32(RSP) - SUB R10, R3, R3 - MOVD R3, 40(RSP) - MOVD R3, 48(RSP) // Unnecessary, as the callee ignores it, but conservative. - - // Spill local variables (registers) onto the stack; call; unspill. - MOVD R8, 88(RSP) - CALL ·emitLiteral(SB) - MOVD 88(RSP), R8 - - // Finish the "d +=" part of "d += emitLiteral(etc)". - MOVD 56(RSP), R1 - ADD R1, R8, R8 - -encodeBlockEnd: - MOVD dst_base+0(FP), R3 - SUB R3, R8, R8 - MOVD R8, d+48(FP) - RET diff --git a/vendor/github.com/golang/snappy/encode_asm.go b/vendor/github.com/golang/snappy/encode_asm.go deleted file mode 100644 index 107c1e714..000000000 --- a/vendor/github.com/golang/snappy/encode_asm.go +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm -// +build amd64 arm64 - -package snappy - -// emitLiteral has the same semantics as in encode_other.go. -// -//go:noescape -func emitLiteral(dst, lit []byte) int - -// emitCopy has the same semantics as in encode_other.go. -// -//go:noescape -func emitCopy(dst []byte, offset, length int) int - -// extendMatch has the same semantics as in encode_other.go. -// -//go:noescape -func extendMatch(src []byte, i, j int) int - -// encodeBlock has the same semantics as in encode_other.go. -// -//go:noescape -func encodeBlock(dst, src []byte) (d int) diff --git a/vendor/github.com/golang/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go deleted file mode 100644 index 296d7f0be..000000000 --- a/vendor/github.com/golang/snappy/encode_other.go +++ /dev/null @@ -1,238 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !amd64,!arm64 appengine !gc noasm - -package snappy - -func load32(b []byte, i int) uint32 { - b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 -} - -func load64(b []byte, i int) uint64 { - b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 -} - -// emitLiteral writes a literal chunk and returns the number of bytes written. -// -// It assumes that: -// dst is long enough to hold the encoded bytes -// 1 <= len(lit) && len(lit) <= 65536 -func emitLiteral(dst, lit []byte) int { - i, n := 0, uint(len(lit)-1) - switch { - case n < 60: - dst[0] = uint8(n)<<2 | tagLiteral - i = 1 - case n < 1<<8: - dst[0] = 60<<2 | tagLiteral - dst[1] = uint8(n) - i = 2 - default: - dst[0] = 61<<2 | tagLiteral - dst[1] = uint8(n) - dst[2] = uint8(n >> 8) - i = 3 - } - return i + copy(dst[i:], lit) -} - -// emitCopy writes a copy chunk and returns the number of bytes written. -// -// It assumes that: -// dst is long enough to hold the encoded bytes -// 1 <= offset && offset <= 65535 -// 4 <= length && length <= 65535 -func emitCopy(dst []byte, offset, length int) int { - i := 0 - // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The - // threshold for this loop is a little higher (at 68 = 64 + 4), and the - // length emitted down below is is a little lower (at 60 = 64 - 4), because - // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed - // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as - // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as - // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a - // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an - // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1. - for length >= 68 { - // Emit a length 64 copy, encoded as 3 bytes. - dst[i+0] = 63<<2 | tagCopy2 - dst[i+1] = uint8(offset) - dst[i+2] = uint8(offset >> 8) - i += 3 - length -= 64 - } - if length > 64 { - // Emit a length 60 copy, encoded as 3 bytes. - dst[i+0] = 59<<2 | tagCopy2 - dst[i+1] = uint8(offset) - dst[i+2] = uint8(offset >> 8) - i += 3 - length -= 60 - } - if length >= 12 || offset >= 2048 { - // Emit the remaining copy, encoded as 3 bytes. - dst[i+0] = uint8(length-1)<<2 | tagCopy2 - dst[i+1] = uint8(offset) - dst[i+2] = uint8(offset >> 8) - return i + 3 - } - // Emit the remaining copy, encoded as 2 bytes. - dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 - dst[i+1] = uint8(offset) - return i + 2 -} - -// extendMatch returns the largest k such that k <= len(src) and that -// src[i:i+k-j] and src[j:k] have the same contents. -// -// It assumes that: -// 0 <= i && i < j && j <= len(src) -func extendMatch(src []byte, i, j int) int { - for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { - } - return j -} - -func hash(u, shift uint32) uint32 { - return (u * 0x1e35a7bd) >> shift -} - -// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It -// assumes that the varint-encoded length of the decompressed bytes has already -// been written. -// -// It also assumes that: -// len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize -func encodeBlock(dst, src []byte) (d int) { - // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. - // The table element type is uint16, as s < sLimit and sLimit < len(src) - // and len(src) <= maxBlockSize and maxBlockSize == 65536. - const ( - maxTableSize = 1 << 14 - // tableMask is redundant, but helps the compiler eliminate bounds - // checks. - tableMask = maxTableSize - 1 - ) - shift := uint32(32 - 8) - for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { - shift-- - } - // In Go, all array elements are zero-initialized, so there is no advantage - // to a smaller tableSize per se. However, it matches the C++ algorithm, - // and in the asm versions of this code, we can get away with zeroing only - // the first tableSize elements. - var table [maxTableSize]uint16 - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := len(src) - inputMargin - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := 0 - - // The encoded form must start with a literal, as there are no previous - // bytes to copy, so we start looking for hash matches at s == 1. - s := 1 - nextHash := hash(load32(src, s), shift) - - for { - // Copied from the C++ snappy implementation: - // - // Heuristic match skipping: If 32 bytes are scanned with no matches - // found, start looking only at every other byte. If 32 more bytes are - // scanned (or skipped), look at every third byte, etc.. When a match - // is found, immediately go back to looking at every byte. This is a - // small loss (~5% performance, ~0.1% density) for compressible data - // due to more bookkeeping, but for non-compressible data (such as - // JPEG) it's a huge win since the compressor quickly "realizes" the - // data is incompressible and doesn't bother looking for matches - // everywhere. - // - // The "skip" variable keeps track of how many bytes there are since - // the last match; dividing it by 32 (ie. right-shifting by five) gives - // the number of bytes to move ahead for each iteration. - skip := 32 - - nextS := s - candidate := 0 - for { - s = nextS - bytesBetweenHashLookups := skip >> 5 - nextS = s + bytesBetweenHashLookups - skip += bytesBetweenHashLookups - if nextS > sLimit { - goto emitRemainder - } - candidate = int(table[nextHash&tableMask]) - table[nextHash&tableMask] = uint16(s) - nextHash = hash(load32(src, nextS), shift) - if load32(src, s) == load32(src, candidate) { - break - } - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - d += emitLiteral(dst[d:], src[nextEmit:s]) - - // Call emitCopy, and then see if another emitCopy could be our next - // move. Repeat until we find no match for the input immediately after - // what was consumed by the last emitCopy call. - // - // If we exit this loop normally then we need to call emitLiteral next, - // though we don't yet know how big the literal will be. We handle that - // by proceeding to the next iteration of the main loop. We also can - // exit this loop via goto if we get close to exhausting the input. - for { - // Invariant: we have a 4-byte match at s, and no need to emit any - // literal bytes prior to s. - base := s - - // Extend the 4-byte match as long as possible. - // - // This is an inlined version of: - // s = extendMatch(src, candidate+4, s+4) - s += 4 - for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 { - } - - d += emitCopy(dst[d:], base-candidate, s-base) - nextEmit = s - if s >= sLimit { - goto emitRemainder - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-1 and at s. If - // another emitCopy is not our next move, also calculate nextHash - // at s+1. At least on GOARCH=amd64, these three hash calculations - // are faster as one load64 call (with some shifts) instead of - // three load32 calls. - x := load64(src, s-1) - prevHash := hash(uint32(x>>0), shift) - table[prevHash&tableMask] = uint16(s - 1) - currHash := hash(uint32(x>>8), shift) - candidate = int(table[currHash&tableMask]) - table[currHash&tableMask] = uint16(s) - if uint32(x>>8) != load32(src, candidate) { - nextHash = hash(uint32(x>>16), shift) - s++ - break - } - } - } - -emitRemainder: - if nextEmit < len(src) { - d += emitLiteral(dst[d:], src[nextEmit:]) - } - return d -} diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go new file mode 100644 index 000000000..f8435998e --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -0,0 +1,903 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright (c) 2015 Klaus Post +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "encoding/binary" + "fmt" + "io" + "math" +) + +const ( + NoCompression = 0 + BestSpeed = 1 + BestCompression = 9 + DefaultCompression = -1 + + // HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman + // entropy encoding. This mode is useful in compressing data that has + // already been compressed with an LZ style algorithm (e.g. Snappy or LZ4) + // that lacks an entropy encoder. Compression gains are achieved when + // certain bytes in the input stream occur more frequently than others. + // + // Note that HuffmanOnly produces a compressed output that is + // RFC 1951 compliant. That is, any valid DEFLATE decompressor will + // continue to be able to decompress this output. + HuffmanOnly = -2 + ConstantCompression = HuffmanOnly // compatibility alias. + + logWindowSize = 15 + windowSize = 1 << logWindowSize + windowMask = windowSize - 1 + logMaxOffsetSize = 15 // Standard DEFLATE + minMatchLength = 4 // The smallest match that the compressor looks for + maxMatchLength = 258 // The longest match for the compressor + minOffsetSize = 1 // The shortest offset that makes any sense + + // The maximum number of tokens we will encode at the time. + // Smaller sizes usually creates less optimal blocks. + // Bigger can make context switching slow. + // We use this for levels 7-9, so we make it big. + maxFlateBlockTokens = 1 << 15 + maxStoreBlockSize = 65535 + hashBits = 17 // After 17 performance degrades + hashSize = 1 << hashBits + hashMask = (1 << hashBits) - 1 + hashShift = (hashBits + minMatchLength - 1) / minMatchLength + maxHashOffset = 1 << 28 + + skipNever = math.MaxInt32 + + debugDeflate = false +) + +type compressionLevel struct { + good, lazy, nice, chain, fastSkipHashing, level int +} + +// Compression levels have been rebalanced from zlib deflate defaults +// to give a bigger spread in speed and compression. +// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/ +var levels = []compressionLevel{ + {}, // 0 + // Level 1-6 uses specialized algorithm - values not used + {0, 0, 0, 0, 0, 1}, + {0, 0, 0, 0, 0, 2}, + {0, 0, 0, 0, 0, 3}, + {0, 0, 0, 0, 0, 4}, + {0, 0, 0, 0, 0, 5}, + {0, 0, 0, 0, 0, 6}, + // Levels 7-9 use increasingly more lazy matching + // and increasingly stringent conditions for "good enough". + {8, 12, 16, 24, skipNever, 7}, + {16, 30, 40, 64, skipNever, 8}, + {32, 258, 258, 1024, skipNever, 9}, +} + +// advancedState contains state for the advanced levels, with bigger hash tables, etc. +type advancedState struct { + // deflate state + length int + offset int + maxInsertIndex int + chainHead int + hashOffset int + + ii uint16 // position of last match, intended to overflow to reset. + + // input window: unprocessed data is window[index:windowEnd] + index int + estBitsPerByte int + hashMatch [maxMatchLength + minMatchLength]uint32 + + // Input hash chains + // hashHead[hashValue] contains the largest inputIndex with the specified hash value + // If hashHead[hashValue] is within the current window, then + // hashPrev[hashHead[hashValue] & windowMask] contains the previous index + // with the same hash value. + hashHead [hashSize]uint32 + hashPrev [windowSize]uint32 +} + +type compressor struct { + compressionLevel + + h *huffmanEncoder + w *huffmanBitWriter + + // compression algorithm + fill func(*compressor, []byte) int // copy data to window + step func(*compressor) // process window + + window []byte + windowEnd int + blockStart int // window index where current tokens start + err error + + // queued output tokens + tokens tokens + fast fastEnc + state *advancedState + + sync bool // requesting flush + byteAvailable bool // if true, still need to process window[index-1]. +} + +func (d *compressor) fillDeflate(b []byte) int { + s := d.state + if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) { + // shift the window by windowSize + copy(d.window[:], d.window[windowSize:2*windowSize]) + s.index -= windowSize + d.windowEnd -= windowSize + if d.blockStart >= windowSize { + d.blockStart -= windowSize + } else { + d.blockStart = math.MaxInt32 + } + s.hashOffset += windowSize + if s.hashOffset > maxHashOffset { + delta := s.hashOffset - 1 + s.hashOffset -= delta + s.chainHead -= delta + // Iterate over slices instead of arrays to avoid copying + // the entire table onto the stack (Issue #18625). + for i, v := range s.hashPrev[:] { + if int(v) > delta { + s.hashPrev[i] = uint32(int(v) - delta) + } else { + s.hashPrev[i] = 0 + } + } + for i, v := range s.hashHead[:] { + if int(v) > delta { + s.hashHead[i] = uint32(int(v) - delta) + } else { + s.hashHead[i] = 0 + } + } + } + } + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error { + if index > 0 || eof { + var window []byte + if d.blockStart <= index { + window = d.window[d.blockStart:index] + } + d.blockStart = index + //d.w.writeBlock(tok, eof, window) + d.w.writeBlockDynamic(tok, eof, window, d.sync) + return d.w.err + } + return nil +} + +// writeBlockSkip writes the current block and uses the number of tokens +// to determine if the block should be stored on no matches, or +// only huffman encoded. +func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error { + if index > 0 || eof { + if d.blockStart <= index { + window := d.window[d.blockStart:index] + // If we removed less than a 64th of all literals + // we huffman compress the block. + if int(tok.n) > len(window)-int(tok.n>>6) { + d.w.writeBlockHuff(eof, window, d.sync) + } else { + // Write a dynamic huffman block. + d.w.writeBlockDynamic(tok, eof, window, d.sync) + } + } else { + d.w.writeBlock(tok, eof, nil) + } + d.blockStart = index + return d.w.err + } + return nil +} + +// fillWindow will fill the current window with the supplied +// dictionary and calculate all hashes. +// This is much faster than doing a full encode. +// Should only be used after a start/reset. +func (d *compressor) fillWindow(b []byte) { + // Do not fill window if we are in store-only or huffman mode. + if d.level <= 0 { + return + } + if d.fast != nil { + // encode the last data, but discard the result + if len(b) > maxMatchOffset { + b = b[len(b)-maxMatchOffset:] + } + d.fast.Encode(&d.tokens, b) + d.tokens.Reset() + return + } + s := d.state + // If we are given too much, cut it. + if len(b) > windowSize { + b = b[len(b)-windowSize:] + } + // Add all to window. + n := copy(d.window[d.windowEnd:], b) + + // Calculate 256 hashes at the time (more L1 cache hits) + loops := (n + 256 - minMatchLength) / 256 + for j := 0; j < loops; j++ { + startindex := j * 256 + end := startindex + 256 + minMatchLength - 1 + if end > n { + end = n + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + + if dstSize <= 0 { + continue + } + + dst := s.hashMatch[:dstSize] + bulkHash4(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + s.hashPrev[di&windowMask] = s.hashHead[newH] + // Set the head of the hash chain to us. + s.hashHead[newH] = uint32(di + s.hashOffset) + } + } + // Update window information. + d.windowEnd += n + s.index = n +} + +// Try to find a match starting at index whose length is greater than prevSize. +// We only look at chainCount possibilities before giving up. +// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead +func (d *compressor) findMatch(pos int, prevHead int, lookahead int) (length, offset int, ok bool) { + minMatchLook := maxMatchLength + if lookahead < minMatchLook { + minMatchLook = lookahead + } + + win := d.window[0 : pos+minMatchLook] + + // We quit when we get a match that's at least nice long + nice := len(win) - pos + if d.nice < nice { + nice = d.nice + } + + // If we've got a match that's good enough, only look in 1/4 the chain. + tries := d.chain + length = minMatchLength - 1 + + wEnd := win[pos+length] + wPos := win[pos:] + minIndex := pos - windowSize + if minIndex < 0 { + minIndex = 0 + } + offset = 0 + + cGain := 0 + if d.chain < 100 { + for i := prevHead; tries > 0; tries-- { + if wEnd == win[i+length] { + n := matchLen(win[i:i+minMatchLook], wPos) + if n > length { + length = n + offset = pos - i + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + if i <= minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset + if i < minIndex { + break + } + } + return + } + + // Some like it higher (CSV), some like it lower (JSON) + const baseCost = 6 + // Base is 4 bytes at with an additional cost. + // Matches must be better than this. + for i := prevHead; tries > 0; tries-- { + if wEnd == win[i+length] { + n := matchLen(win[i:i+minMatchLook], wPos) + if n > length { + // Calculate gain. Estimate + newGain := d.h.bitLengthRaw(wPos[:n]) - int(offsetExtraBits[offsetCode(uint32(pos-i))]) - baseCost - int(lengthExtraBits[lengthCodes[(n-3)&255]]) + + //fmt.Println(n, "gain:", newGain, "prev:", cGain, "raw:", d.h.bitLengthRaw(wPos[:n])) + if newGain > cGain { + length = n + offset = pos - i + cGain = newGain + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + } + if i <= minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset + if i < minIndex { + break + } + } + return +} + +func (d *compressor) writeStoredBlock(buf []byte) error { + if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { + return d.w.err + } + d.w.writeBytes(buf) + return d.w.err +} + +// hash4 returns a hash representation of the first 4 bytes +// of the supplied slice. +// The caller must ensure that len(b) >= 4. +func hash4(b []byte) uint32 { + return hash4u(binary.LittleEndian.Uint32(b), hashBits) +} + +// bulkHash4 will compute hashes using the same +// algorithm as hash4 +func bulkHash4(b []byte, dst []uint32) { + if len(b) < 4 { + return + } + hb := binary.LittleEndian.Uint32(b) + + dst[0] = hash4u(hb, hashBits) + end := len(b) - 4 + 1 + for i := 1; i < end; i++ { + hb = (hb >> 8) | uint32(b[i+3])<<24 + dst[i] = hash4u(hb, hashBits) + } +} + +func (d *compressor) initDeflate() { + d.window = make([]byte, 2*windowSize) + d.byteAvailable = false + d.err = nil + if d.state == nil { + return + } + s := d.state + s.index = 0 + s.hashOffset = 1 + s.length = minMatchLength - 1 + s.offset = 0 + s.chainHead = -1 +} + +// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever, +// meaning it always has lazy matching on. +func (d *compressor) deflateLazy() { + s := d.state + // Sanity enables additional runtime tests. + // It's intended to be used during development + // to supplement the currently ad-hoc unit tests. + const sanity = debugDeflate + + if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { + return + } + if d.windowEnd != s.index && d.chain > 100 { + // Get literal huffman coder. + if d.h == nil { + d.h = newHuffmanEncoder(maxFlateBlockTokens) + } + var tmp [256]uint16 + for _, v := range d.window[s.index:d.windowEnd] { + tmp[v]++ + } + d.h.generate(tmp[:], 15) + } + + s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) + + for { + if sanity && s.index > d.windowEnd { + panic("index > windowEnd") + } + lookahead := d.windowEnd - s.index + if lookahead < minMatchLength+maxMatchLength { + if !d.sync { + return + } + if sanity && s.index > d.windowEnd { + panic("index > windowEnd") + } + if lookahead == 0 { + // Flush current output block if any. + if d.byteAvailable { + // There is still one pending token that needs to be flushed + d.tokens.AddLiteral(d.window[s.index-1]) + d.byteAvailable = false + } + if d.tokens.n > 0 { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + return + } + } + if s.index < s.maxInsertIndex { + // Update the hash + hash := hash4(d.window[s.index:]) + ch := s.hashHead[hash] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[hash] = uint32(s.index + s.hashOffset) + } + prevLength := s.length + prevOffset := s.offset + s.length = minMatchLength - 1 + s.offset = 0 + minIndex := s.index - windowSize + if minIndex < 0 { + minIndex = 0 + } + + if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { + if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, lookahead); ok { + s.length = newLength + s.offset = newOffset + } + } + + if prevLength >= minMatchLength && s.length <= prevLength { + // Check for better match at end... + // + // checkOff must be >=2 since we otherwise risk checking s.index + // Offset of 2 seems to yield best results. + const checkOff = 2 + prevIndex := s.index - 1 + if prevIndex+prevLength+checkOff < s.maxInsertIndex { + end := lookahead + if lookahead > maxMatchLength { + end = maxMatchLength + } + end += prevIndex + idx := prevIndex + prevLength - (4 - checkOff) + h := hash4(d.window[idx:]) + ch2 := int(s.hashHead[h]) - s.hashOffset - prevLength + (4 - checkOff) + if ch2 > minIndex { + length := matchLen(d.window[prevIndex:end], d.window[ch2:]) + // It seems like a pure length metric is best. + if length > prevLength { + prevLength = length + prevOffset = prevIndex - ch2 + } + } + } + // There was a match at the previous step, and the current match is + // not better. Output the previous match. + d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)) + + // Insert in the hash table all strings up to the end of the match. + // index and index-1 are already inserted. If there is not enough + // lookahead, the last two strings are not inserted into the hash + // table. + newIndex := s.index + prevLength - 1 + // Calculate missing hashes + end := newIndex + if end > s.maxInsertIndex { + end = s.maxInsertIndex + } + end += minMatchLength - 1 + startindex := s.index + 1 + if startindex > s.maxInsertIndex { + startindex = s.maxInsertIndex + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + if dstSize > 0 { + dst := s.hashMatch[:dstSize] + bulkHash4(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + s.hashPrev[di&windowMask] = s.hashHead[newH] + // Set the head of the hash chain to us. + s.hashHead[newH] = uint32(di + s.hashOffset) + } + } + + s.index = newIndex + d.byteAvailable = false + s.length = minMatchLength - 1 + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.ii = 0 + } else { + // Reset, if we got a match this run. + if s.length >= minMatchLength { + s.ii = 0 + } + // We have a byte waiting. Emit it. + if d.byteAvailable { + s.ii++ + d.tokens.AddLiteral(d.window[s.index-1]) + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + + // If we have a long run of no matches, skip additional bytes + // Resets when s.ii overflows after 64KB. + if n := int(s.ii) - d.chain; n > 0 { + n = 1 + int(n>>6) + for j := 0; j < n; j++ { + if s.index >= d.windowEnd-1 { + break + } + d.tokens.AddLiteral(d.window[s.index-1]) + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + // Index... + if s.index < s.maxInsertIndex { + h := hash4(d.window[s.index:]) + ch := s.hashHead[h] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[h] = uint32(s.index + s.hashOffset) + } + s.index++ + } + // Flush last byte + d.tokens.AddLiteral(d.window[s.index-1]) + d.byteAvailable = false + // s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + } + } else { + s.index++ + d.byteAvailable = true + } + } + } +} + +func (d *compressor) store() { + if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + d.windowEnd = 0 + } +} + +// fillWindow will fill the buffer with data for huffman-only compression. +// The number of bytes copied is returned. +func (d *compressor) fillBlock(b []byte) int { + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +// storeHuff will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeHuff() { + if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 { + return + } + d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + d.windowEnd = 0 +} + +// storeFast will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeFast() { + // We only compress if we have maxStoreBlockSize. + if d.windowEnd < len(d.window) { + if !d.sync { + return + } + // Handle extremely small sizes. + if d.windowEnd < 128 { + if d.windowEnd == 0 { + return + } + if d.windowEnd <= 32 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + } else { + d.w.writeBlockHuff(false, d.window[:d.windowEnd], true) + d.err = d.w.err + } + d.tokens.Reset() + d.windowEnd = 0 + d.fast.Reset() + return + } + } + + d.fast.Encode(&d.tokens, d.window[:d.windowEnd]) + // If we made zero matches, store the block as is. + if d.tokens.n == 0 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + // If we removed less than 1/16th, huffman compress the block. + } else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) { + d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + } else { + d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + } + d.tokens.Reset() + d.windowEnd = 0 +} + +// write will add input byte to the stream. +// Unless an error occurs all bytes will be consumed. +func (d *compressor) write(b []byte) (n int, err error) { + if d.err != nil { + return 0, d.err + } + n = len(b) + for len(b) > 0 { + if d.windowEnd == len(d.window) || d.sync { + d.step(d) + } + b = b[d.fill(d, b):] + if d.err != nil { + return 0, d.err + } + } + return n, d.err +} + +func (d *compressor) syncFlush() error { + d.sync = true + if d.err != nil { + return d.err + } + d.step(d) + if d.err == nil { + d.w.writeStoredHeader(0, false) + d.w.flush() + d.err = d.w.err + } + d.sync = false + return d.err +} + +func (d *compressor) init(w io.Writer, level int) (err error) { + d.w = newHuffmanBitWriter(w) + + switch { + case level == NoCompression: + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).store + case level == ConstantCompression: + d.w.logNewTablePenalty = 10 + d.window = make([]byte, 32<<10) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeHuff + case level == DefaultCompression: + level = 5 + fallthrough + case level >= 1 && level <= 6: + d.w.logNewTablePenalty = 7 + d.fast = newFastEnc(level) + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeFast + case 7 <= level && level <= 9: + d.w.logNewTablePenalty = 8 + d.state = &advancedState{} + d.compressionLevel = levels[level] + d.initDeflate() + d.fill = (*compressor).fillDeflate + d.step = (*compressor).deflateLazy + default: + return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) + } + d.level = level + return nil +} + +// reset the state of the compressor. +func (d *compressor) reset(w io.Writer) { + d.w.reset(w) + d.sync = false + d.err = nil + // We only need to reset a few things for Snappy. + if d.fast != nil { + d.fast.Reset() + d.windowEnd = 0 + d.tokens.Reset() + return + } + switch d.compressionLevel.chain { + case 0: + // level was NoCompression or ConstantCompresssion. + d.windowEnd = 0 + default: + s := d.state + s.chainHead = -1 + for i := range s.hashHead { + s.hashHead[i] = 0 + } + for i := range s.hashPrev { + s.hashPrev[i] = 0 + } + s.hashOffset = 1 + s.index, d.windowEnd = 0, 0 + d.blockStart, d.byteAvailable = 0, false + d.tokens.Reset() + s.length = minMatchLength - 1 + s.offset = 0 + s.ii = 0 + s.maxInsertIndex = 0 + } +} + +func (d *compressor) close() error { + if d.err != nil { + return d.err + } + d.sync = true + d.step(d) + if d.err != nil { + return d.err + } + if d.w.writeStoredHeader(0, true); d.w.err != nil { + return d.w.err + } + d.w.flush() + d.w.reset(nil) + return d.w.err +} + +// NewWriter returns a new Writer compressing data at the given level. +// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); +// higher levels typically run slower but compress more. +// Level 0 (NoCompression) does not attempt any compression; it only adds the +// necessary DEFLATE framing. +// Level -1 (DefaultCompression) uses the default compression level. +// Level -2 (ConstantCompression) will use Huffman compression only, giving +// a very fast compression for all types of input, but sacrificing considerable +// compression efficiency. +// +// If level is in the range [-2, 9] then the error returned will be nil. +// Otherwise the error returned will be non-nil. +func NewWriter(w io.Writer, level int) (*Writer, error) { + var dw Writer + if err := dw.d.init(w, level); err != nil { + return nil, err + } + return &dw, nil +} + +// NewWriterDict is like NewWriter but initializes the new +// Writer with a preset dictionary. The returned Writer behaves +// as if the dictionary had been written to it without producing +// any compressed output. The compressed data written to w +// can only be decompressed by a Reader initialized with the +// same dictionary. +func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { + zw, err := NewWriter(w, level) + if err != nil { + return nil, err + } + zw.d.fillWindow(dict) + zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. + return zw, err +} + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see NewWriter). +type Writer struct { + d compressor + dict []byte +} + +// Write writes data to w, which will eventually write the +// compressed form of data to its underlying writer. +func (w *Writer) Write(data []byte) (n int, err error) { + return w.d.write(data) +} + +// Flush flushes any pending data to the underlying writer. +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. +// Flush does not return until the data has been written. +// Calling Flush when there is no pending data still causes the Writer +// to emit a sync marker of at least 4 bytes. +// If the underlying writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (w *Writer) Flush() error { + // For more about flushing: + // http://www.bolet.org/~pornin/deflate-flush.html + return w.d.syncFlush() +} + +// Close flushes and closes the writer. +func (w *Writer) Close() error { + return w.d.close() +} + +// Reset discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level and dictionary. +func (w *Writer) Reset(dst io.Writer) { + if len(w.dict) > 0 { + // w was created with NewWriterDict + w.d.reset(dst) + if dst != nil { + w.d.fillWindow(w.dict) + } + } else { + // w was created with NewWriter + w.d.reset(dst) + } +} + +// ResetDict discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level, but sets a specific dictionary. +func (w *Writer) ResetDict(dst io.Writer, dict []byte) { + w.dict = dict + w.d.reset(dst) + w.d.fillWindow(w.dict) +} diff --git a/vendor/github.com/klauspost/compress/flate/dict_decoder.go b/vendor/github.com/klauspost/compress/flate/dict_decoder.go new file mode 100644 index 000000000..71c75a065 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/dict_decoder.go @@ -0,0 +1,184 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// dictDecoder implements the LZ77 sliding dictionary as used in decompression. +// LZ77 decompresses data through sequences of two forms of commands: +// +// * Literal insertions: Runs of one or more symbols are inserted into the data +// stream as is. This is accomplished through the writeByte method for a +// single symbol, or combinations of writeSlice/writeMark for multiple symbols. +// Any valid stream must start with a literal insertion if no preset dictionary +// is used. +// +// * Backward copies: Runs of one or more symbols are copied from previously +// emitted data. Backward copies come as the tuple (dist, length) where dist +// determines how far back in the stream to copy from and length determines how +// many bytes to copy. Note that it is valid for the length to be greater than +// the distance. Since LZ77 uses forward copies, that situation is used to +// perform a form of run-length encoding on repeated runs of symbols. +// The writeCopy and tryWriteCopy are used to implement this command. +// +// For performance reasons, this implementation performs little to no sanity +// checks about the arguments. As such, the invariants documented for each +// method call must be respected. +type dictDecoder struct { + hist []byte // Sliding window history + + // Invariant: 0 <= rdPos <= wrPos <= len(hist) + wrPos int // Current output position in buffer + rdPos int // Have emitted hist[:rdPos] already + full bool // Has a full window length been written yet? +} + +// init initializes dictDecoder to have a sliding window dictionary of the given +// size. If a preset dict is provided, it will initialize the dictionary with +// the contents of dict. +func (dd *dictDecoder) init(size int, dict []byte) { + *dd = dictDecoder{hist: dd.hist} + + if cap(dd.hist) < size { + dd.hist = make([]byte, size) + } + dd.hist = dd.hist[:size] + + if len(dict) > len(dd.hist) { + dict = dict[len(dict)-len(dd.hist):] + } + dd.wrPos = copy(dd.hist, dict) + if dd.wrPos == len(dd.hist) { + dd.wrPos = 0 + dd.full = true + } + dd.rdPos = dd.wrPos +} + +// histSize reports the total amount of historical data in the dictionary. +func (dd *dictDecoder) histSize() int { + if dd.full { + return len(dd.hist) + } + return dd.wrPos +} + +// availRead reports the number of bytes that can be flushed by readFlush. +func (dd *dictDecoder) availRead() int { + return dd.wrPos - dd.rdPos +} + +// availWrite reports the available amount of output buffer space. +func (dd *dictDecoder) availWrite() int { + return len(dd.hist) - dd.wrPos +} + +// writeSlice returns a slice of the available buffer to write data to. +// +// This invariant will be kept: len(s) <= availWrite() +func (dd *dictDecoder) writeSlice() []byte { + return dd.hist[dd.wrPos:] +} + +// writeMark advances the writer pointer by cnt. +// +// This invariant must be kept: 0 <= cnt <= availWrite() +func (dd *dictDecoder) writeMark(cnt int) { + dd.wrPos += cnt +} + +// writeByte writes a single byte to the dictionary. +// +// This invariant must be kept: 0 < availWrite() +func (dd *dictDecoder) writeByte(c byte) { + dd.hist[dd.wrPos] = c + dd.wrPos++ +} + +// writeCopy copies a string at a given (dist, length) to the output. +// This returns the number of bytes copied and may be less than the requested +// length if the available space in the output buffer is too small. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) writeCopy(dist, length int) int { + dstBase := dd.wrPos + dstPos := dstBase + srcPos := dstPos - dist + endPos := dstPos + length + if endPos > len(dd.hist) { + endPos = len(dd.hist) + } + + // Copy non-overlapping section after destination position. + // + // This section is non-overlapping in that the copy length for this section + // is always less than or equal to the backwards distance. This can occur + // if a distance refers to data that wraps-around in the buffer. + // Thus, a backwards copy is performed here; that is, the exact bytes in + // the source prior to the copy is placed in the destination. + if srcPos < 0 { + srcPos += len(dd.hist) + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) + srcPos = 0 + } + + // Copy possibly overlapping section before destination position. + // + // This section can overlap if the copy length for this section is larger + // than the backwards distance. This is allowed by LZ77 so that repeated + // strings can be succinctly represented using (dist, length) pairs. + // Thus, a forwards copy is performed here; that is, the bytes copied is + // possibly dependent on the resulting bytes in the destination as the copy + // progresses along. This is functionally equivalent to the following: + // + // for i := 0; i < endPos-dstPos; i++ { + // dd.hist[dstPos+i] = dd.hist[srcPos+i] + // } + // dstPos = endPos + // + for dstPos < endPos { + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// tryWriteCopy tries to copy a string at a given (distance, length) to the +// output. This specialized version is optimized for short distances. +// +// This method is designed to be inlined for performance reasons. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) tryWriteCopy(dist, length int) int { + dstPos := dd.wrPos + endPos := dstPos + length + if dstPos < dist || endPos > len(dd.hist) { + return 0 + } + dstBase := dstPos + srcPos := dstPos - dist + + // Copy possibly overlapping section before destination position. +loop: + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + if dstPos < endPos { + goto loop // Avoid for-loop so that this function can be inlined + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// readFlush returns a slice of the historical buffer that is ready to be +// emitted to the user. The data returned by readFlush must be fully consumed +// before calling any other dictDecoder methods. +func (dd *dictDecoder) readFlush() []byte { + toRead := dd.hist[dd.rdPos:dd.wrPos] + dd.rdPos = dd.wrPos + if dd.wrPos == len(dd.hist) { + dd.wrPos, dd.rdPos = 0, 0 + dd.full = true + } + return toRead +} diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go new file mode 100644 index 000000000..f781aaa62 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -0,0 +1,233 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Modified for deflate by Klaus Post (c) 2015. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "encoding/binary" + "fmt" + "math/bits" +) + +type fastEnc interface { + Encode(dst *tokens, src []byte) + Reset() +} + +func newFastEnc(level int) fastEnc { + switch level { + case 1: + return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}} + case 2: + return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}} + case 3: + return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}} + case 4: + return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}} + case 5: + return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}} + case 6: + return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}} + default: + panic("invalid level specified") + } +} + +const ( + tableBits = 15 // Bits used in the table + tableSize = 1 << tableBits // Size of the table + tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. + baseMatchOffset = 1 // The smallest match offset + baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 + maxMatchOffset = 1 << 15 // The largest match offset + + bTableBits = 17 // Bits used in the big tables + bTableSize = 1 << bTableBits // Size of the table + allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history. + bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. +) + +const ( + prime3bytes = 506832829 + prime4bytes = 2654435761 + prime5bytes = 889523592379 + prime6bytes = 227718039650203 + prime7bytes = 58295818150454627 + prime8bytes = 0xcf1bbcdcb7a56463 +) + +func load32(b []byte, i int) uint32 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:4] + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load64(b []byte, i int) uint64 { + return binary.LittleEndian.Uint64(b[i:]) +} + +func load3232(b []byte, i int32) uint32 { + return binary.LittleEndian.Uint32(b[i:]) +} + +func load6432(b []byte, i int32) uint64 { + return binary.LittleEndian.Uint64(b[i:]) +} + +func hash(u uint32) uint32 { + return (u * 0x1e35a7bd) >> tableShift +} + +type tableEntry struct { + offset int32 +} + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastGen struct { + hist []byte + cur int32 +} + +func (e *fastGen) addBlock(src []byte) int32 { + // check if we have space already + if len(e.hist)+len(src) > cap(e.hist) { + if cap(e.hist) == 0 { + e.hist = make([]byte, 0, allocHistory) + } else { + if cap(e.hist) < maxMatchOffset*2 { + panic("unexpected buffer size") + } + // Move down + offset := int32(len(e.hist)) - maxMatchOffset + copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + e.cur += offset + e.hist = e.hist[:maxMatchOffset] + } + } + s := int32(len(e.hist)) + e.hist = append(e.hist, src...) + return s +} + +// hash4 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4u(u uint32, h uint8) uint32 { + return (u * prime4bytes) >> (32 - h) +} + +type tableEntryPrev struct { + Cur tableEntry + Prev tableEntry +} + +// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4x64(u uint64, h uint8) uint32 { + return (uint32(u) * prime4bytes) >> ((32 - h) & reg8SizeMask32) +} + +// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash7(u uint64, h uint8) uint32 { + return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64)) +} + +// hash8 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash8(u uint64, h uint8) uint32 { + return uint32((u * prime8bytes) >> ((64 - h) & reg8SizeMask64)) +} + +// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash6(u uint64, h uint8) uint32 { + return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & reg8SizeMask64)) +} + +// matchlen will return the match length between offsets and t in src. +// The maximum length returned is maxMatchLength - 4. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastGen) matchlen(s, t int32, src []byte) int32 { + if debugDecode { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > maxMatchOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + s1 := int(s) + maxMatchLength - 4 + if s1 > len(src) { + s1 = len(src) + } + + // Extend the match to be as long as possible. + return int32(matchLen(src[s:s1], src[t:])) +} + +// matchlenLong will return the match length between offsets and t in src. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 { + if debugDeflate { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > maxMatchOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + // Extend the match to be as long as possible. + return int32(matchLen(src[s:], src[t:])) +} + +// Reset the encoding table. +func (e *fastGen) Reset() { + if cap(e.hist) < allocHistory { + e.hist = make([]byte, 0, allocHistory) + } + // We offset current position so everything will be out of reach. + // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. + if e.cur <= bufferReset { + e.cur += maxMatchOffset + int32(len(e.hist)) + } + e.hist = e.hist[:0] +} + +// matchLen returns the maximum length. +// 'a' must be the shortest of the two. +func matchLen(a, b []byte) int { + var checked int + + for len(a) >= 8 { + if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { + return checked + (bits.TrailingZeros64(diff) >> 3) + } + checked += 8 + a = a[8:] + b = b[8:] + } + b = b[:len(a)] + for i := range a { + if a[i] != b[i] { + return i + checked + } + } + return len(a) + checked +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go new file mode 100644 index 000000000..40ef45c2f --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -0,0 +1,1185 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "encoding/binary" + "fmt" + "io" + "math" +) + +const ( + // The largest offset code. + offsetCodeCount = 30 + + // The special code used to mark the end of a block. + endBlockMarker = 256 + + // The first length code. + lengthCodesStart = 257 + + // The number of codegen codes. + codegenCodeCount = 19 + badCode = 255 + + // maxPredefinedTokens is the maximum number of tokens + // where we check if fixed size is smaller. + maxPredefinedTokens = 250 + + // bufferFlushSize indicates the buffer size + // after which bytes are flushed to the writer. + // Should preferably be a multiple of 6, since + // we accumulate 6 bytes between writes to the buffer. + bufferFlushSize = 246 + + // bufferSize is the actual output byte buffer size. + // It must have additional headroom for a flush + // which can contain up to 8 bytes. + bufferSize = bufferFlushSize + 8 +) + +// Minimum length code that emits bits. +const lengthExtraBitsMinCode = 8 + +// The number of extra bits needed by length code X - LENGTH_CODES_START. +var lengthExtraBits = [32]uint8{ + /* 257 */ 0, 0, 0, + /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, + /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, + /* 280 */ 4, 5, 5, 5, 5, 0, +} + +// The length indicated by length code X - LENGTH_CODES_START. +var lengthBase = [32]uint8{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, + 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, + 64, 80, 96, 112, 128, 160, 192, 224, 255, +} + +// Minimum offset code that emits bits. +const offsetExtraBitsMinCode = 4 + +// offset code word extra bits. +var offsetExtraBits = [32]int8{ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + /* extended window */ + 14, 14, +} + +var offsetCombined = [32]uint32{} + +func init() { + var offsetBase = [32]uint32{ + /* normal deflate */ + 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, + 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, + 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, + 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, + 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, + 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, + + /* extended window */ + 0x008000, 0x00c000, + } + + for i := range offsetCombined[:] { + // Don't use extended window values... + if offsetExtraBits[i] == 0 || offsetBase[i] > 0x006000 { + continue + } + offsetCombined[i] = uint32(offsetExtraBits[i]) | (offsetBase[i] << 8) + } +} + +// The odd order in which the codegen code sizes are written. +var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +type huffmanBitWriter struct { + // writer is the underlying writer. + // Do not use it directly; use the write method, which ensures + // that Write errors are sticky. + writer io.Writer + + // Data waiting to be written is bytes[0:nbytes] + // and then the low nbits of bits. + bits uint64 + nbits uint8 + nbytes uint8 + lastHuffMan bool + literalEncoding *huffmanEncoder + tmpLitEncoding *huffmanEncoder + offsetEncoding *huffmanEncoder + codegenEncoding *huffmanEncoder + err error + lastHeader int + // Set between 0 (reused block can be up to 2x the size) + logNewTablePenalty uint + bytes [256 + 8]byte + literalFreq [lengthCodesStart + 32]uint16 + offsetFreq [32]uint16 + codegenFreq [codegenCodeCount]uint16 + + // codegen must have an extra space for the final symbol. + codegen [literalCount + offsetCodeCount + 1]uint8 +} + +// Huffman reuse. +// +// The huffmanBitWriter supports reusing huffman tables and thereby combining block sections. +// +// This is controlled by several variables: +// +// If lastHeader is non-zero the Huffman table can be reused. +// This also indicates that a Huffman table has been generated that can output all +// possible symbols. +// It also indicates that an EOB has not yet been emitted, so if a new tabel is generated +// an EOB with the previous table must be written. +// +// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid. +// +// An incoming block estimates the output size of a new table using a 'fresh' by calculating the +// optimal size and adding a penalty in 'logNewTablePenalty'. +// A Huffman table is not optimal, which is why we add a penalty, and generating a new table +// is slower both for compression and decompression. + +func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { + return &huffmanBitWriter{ + writer: w, + literalEncoding: newHuffmanEncoder(literalCount), + tmpLitEncoding: newHuffmanEncoder(literalCount), + codegenEncoding: newHuffmanEncoder(codegenCodeCount), + offsetEncoding: newHuffmanEncoder(offsetCodeCount), + } +} + +func (w *huffmanBitWriter) reset(writer io.Writer) { + w.writer = writer + w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil + w.lastHeader = 0 + w.lastHuffMan = false +} + +func (w *huffmanBitWriter) canReuse(t *tokens) (ok bool) { + a := t.offHist[:offsetCodeCount] + b := w.offsetEncoding.codes + b = b[:len(a)] + for i, v := range a { + if v != 0 && b[i].zero() { + return false + } + } + + a = t.extraHist[:literalCount-256] + b = w.literalEncoding.codes[256:literalCount] + b = b[:len(a)] + for i, v := range a { + if v != 0 && b[i].zero() { + return false + } + } + + a = t.litHist[:256] + b = w.literalEncoding.codes[:len(a)] + for i, v := range a { + if v != 0 && b[i].zero() { + return false + } + } + return true +} + +func (w *huffmanBitWriter) flush() { + if w.err != nil { + w.nbits = 0 + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + n := w.nbytes + for w.nbits != 0 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + if w.nbits > 8 { // Avoid underflow + w.nbits -= 8 + } else { + w.nbits = 0 + } + n++ + } + w.bits = 0 + w.write(w.bytes[:n]) + w.nbytes = 0 +} + +func (w *huffmanBitWriter) write(b []byte) { + if w.err != nil { + return + } + _, w.err = w.writer.Write(b) +} + +func (w *huffmanBitWriter) writeBits(b int32, nb uint8) { + w.bits |= uint64(b) << (w.nbits & 63) + w.nbits += nb + if w.nbits >= 48 { + w.writeOutBits() + } +} + +func (w *huffmanBitWriter) writeBytes(bytes []byte) { + if w.err != nil { + return + } + n := w.nbytes + if w.nbits&7 != 0 { + w.err = InternalError("writeBytes with unfinished bits") + return + } + for w.nbits != 0 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + w.nbits -= 8 + n++ + } + if n != 0 { + w.write(w.bytes[:n]) + } + w.nbytes = 0 + w.write(bytes) +} + +// RFC 1951 3.2.7 specifies a special run-length encoding for specifying +// the literal and offset lengths arrays (which are concatenated into a single +// array). This method generates that run-length encoding. +// +// The result is written into the codegen array, and the frequencies +// of each code is written into the codegenFreq array. +// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional +// information. Code badCode is an end marker +// +// numLiterals The number of literals in literalEncoding +// numOffsets The number of offsets in offsetEncoding +// litenc, offenc The literal and offset encoder to use +func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { + for i := range w.codegenFreq { + w.codegenFreq[i] = 0 + } + // Note that we are using codegen both as a temporary variable for holding + // a copy of the frequencies, and as the place where we put the result. + // This is fine because the output is always shorter than the input used + // so far. + codegen := w.codegen[:] // cache + // Copy the concatenated code sizes to codegen. Put a marker at the end. + cgnl := codegen[:numLiterals] + for i := range cgnl { + cgnl[i] = litEnc.codes[i].len() + } + + cgnl = codegen[numLiterals : numLiterals+numOffsets] + for i := range cgnl { + cgnl[i] = offEnc.codes[i].len() + } + codegen[numLiterals+numOffsets] = badCode + + size := codegen[0] + count := 1 + outIndex := 0 + for inIndex := 1; size != badCode; inIndex++ { + // INVARIANT: We have seen "count" copies of size that have not yet + // had output generated for them. + nextSize := codegen[inIndex] + if nextSize == size { + count++ + continue + } + // We need to generate codegen indicating "count" of size. + if size != 0 { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + count-- + for count >= 3 { + n := 6 + if n > count { + n = count + } + codegen[outIndex] = 16 + outIndex++ + codegen[outIndex] = uint8(n - 3) + outIndex++ + w.codegenFreq[16]++ + count -= n + } + } else { + for count >= 11 { + n := 138 + if n > count { + n = count + } + codegen[outIndex] = 18 + outIndex++ + codegen[outIndex] = uint8(n - 11) + outIndex++ + w.codegenFreq[18]++ + count -= n + } + if count >= 3 { + // count >= 3 && count <= 10 + codegen[outIndex] = 17 + outIndex++ + codegen[outIndex] = uint8(count - 3) + outIndex++ + w.codegenFreq[17]++ + count = 0 + } + } + count-- + for ; count >= 0; count-- { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + } + // Set up invariant for next time through the loop. + size = nextSize + count = 1 + } + // Marker indicating the end of the codegen. + codegen[outIndex] = badCode +} + +func (w *huffmanBitWriter) codegens() int { + numCodegens := len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + return numCodegens +} + +func (w *huffmanBitWriter) headerSize() (size, numCodegens int) { + numCodegens = len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + return 3 + 5 + 5 + 4 + (3 * numCodegens) + + w.codegenEncoding.bitLength(w.codegenFreq[:]) + + int(w.codegenFreq[16])*2 + + int(w.codegenFreq[17])*3 + + int(w.codegenFreq[18])*7, numCodegens +} + +// dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) { + size = litEnc.bitLength(w.literalFreq[:]) + + offEnc.bitLength(w.offsetFreq[:]) + return size +} + +// dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) { + header, numCodegens := w.headerSize() + size = header + + litEnc.bitLength(w.literalFreq[:]) + + offEnc.bitLength(w.offsetFreq[:]) + + extraBits + return size, numCodegens +} + +// extraBitSize will return the number of bits that will be written +// as "extra" bits on matches. +func (w *huffmanBitWriter) extraBitSize() int { + total := 0 + for i, n := range w.literalFreq[257:literalCount] { + total += int(n) * int(lengthExtraBits[i&31]) + } + for i, n := range w.offsetFreq[:offsetCodeCount] { + total += int(n) * int(offsetExtraBits[i&31]) + } + return total +} + +// fixedSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) fixedSize(extraBits int) int { + return 3 + + fixedLiteralEncoding.bitLength(w.literalFreq[:]) + + fixedOffsetEncoding.bitLength(w.offsetFreq[:]) + + extraBits +} + +// storedSize calculates the stored size, including header. +// The function returns the size in bits and whether the block +// fits inside a single block. +func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) { + if in == nil { + return 0, false + } + if len(in) <= maxStoreBlockSize { + return (len(in) + 5) * 8, true + } + return 0, false +} + +func (w *huffmanBitWriter) writeCode(c hcode) { + // The function does not get inlined if we "& 63" the shift. + w.bits |= c.code64() << (w.nbits & 63) + w.nbits += c.len() + if w.nbits >= 48 { + w.writeOutBits() + } +} + +// writeOutBits will write bits to the buffer. +func (w *huffmanBitWriter) writeOutBits() { + bits := w.bits + w.bits >>= 48 + w.nbits -= 48 + n := w.nbytes + + // We over-write, but faster... + binary.LittleEndian.PutUint64(w.bytes[n:], bits) + n += 6 + + if n >= bufferFlushSize { + if w.err != nil { + n = 0 + return + } + w.write(w.bytes[:n]) + n = 0 + } + + w.nbytes = n +} + +// Write the header of a dynamic Huffman block to the output stream. +// +// numLiterals The number of literals specified in codegen +// numOffsets The number of offsets specified in codegen +// numCodegens The number of codegens used in codegen +func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { + if w.err != nil { + return + } + var firstBits int32 = 4 + if isEof { + firstBits = 5 + } + w.writeBits(firstBits, 3) + w.writeBits(int32(numLiterals-257), 5) + w.writeBits(int32(numOffsets-1), 5) + w.writeBits(int32(numCodegens-4), 4) + + for i := 0; i < numCodegens; i++ { + value := uint(w.codegenEncoding.codes[codegenOrder[i]].len()) + w.writeBits(int32(value), 3) + } + + i := 0 + for { + var codeWord = uint32(w.codegen[i]) + i++ + if codeWord == badCode { + break + } + w.writeCode(w.codegenEncoding.codes[codeWord]) + + switch codeWord { + case 16: + w.writeBits(int32(w.codegen[i]), 2) + i++ + case 17: + w.writeBits(int32(w.codegen[i]), 3) + i++ + case 18: + w.writeBits(int32(w.codegen[i]), 7) + i++ + } + } +} + +// writeStoredHeader will write a stored header. +// If the stored block is only used for EOF, +// it is replaced with a fixed huffman block. +func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { + if w.err != nil { + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + + // To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes. + if length == 0 && isEof { + w.writeFixedHeader(isEof) + // EOB: 7 bits, value: 0 + w.writeBits(0, 7) + w.flush() + return + } + + var flag int32 + if isEof { + flag = 1 + } + w.writeBits(flag, 3) + w.flush() + w.writeBits(int32(length), 16) + w.writeBits(int32(^uint16(length)), 16) +} + +func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { + if w.err != nil { + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + + // Indicate that we are a fixed Huffman block + var value int32 = 2 + if isEof { + value = 3 + } + w.writeBits(value, 3) +} + +// writeBlock will write a block of tokens with the smallest encoding. +// The original input can be supplied, and if the huffman encoded data +// is larger than the original bytes, the data will be written as a +// stored block. +// If the input is nil, the tokens will always be Huffman encoded. +func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { + if w.err != nil { + return + } + + tokens.AddEOB() + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + numLiterals, numOffsets := w.indexTokens(tokens, false) + w.generate() + var extraBits int + storedSize, storable := w.storedSize(input) + if storable { + extraBits = w.extraBitSize() + } + + // Figure out smallest code. + // Fixed Huffman baseline. + var literalEncoding = fixedLiteralEncoding + var offsetEncoding = fixedOffsetEncoding + var size = math.MaxInt32 + if tokens.n < maxPredefinedTokens { + size = w.fixedSize(extraBits) + } + + // Dynamic Huffman? + var numCodegens int + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) + + if dynamicSize < size { + size = dynamicSize + literalEncoding = w.literalEncoding + offsetEncoding = w.offsetEncoding + } + + // Stored bytes? + if storable && storedSize <= size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + // Huffman. + if literalEncoding == fixedLiteralEncoding { + w.writeFixedHeader(eof) + } else { + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + } + + // Write the tokens. + w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes) +} + +// writeBlockDynamic encodes a block using a dynamic Huffman table. +// This should be used if the symbols used have a disproportionate +// histogram distribution. +// If input is supplied and the compression savings are below 1/16th of the +// input size the block is stored. +func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) { + if w.err != nil { + return + } + + sync = sync || eof + if sync { + tokens.AddEOB() + } + + // We cannot reuse pure huffman table, and must mark as EOF. + if (w.lastHuffMan || eof) && w.lastHeader > 0 { + // We will not try to reuse. + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + w.lastHuffMan = false + } + + // fillReuse enables filling of empty values. + // This will make encodings always reusable without testing. + // However, this does not appear to benefit on most cases. + const fillReuse = false + + // Check if we can reuse... + if !fillReuse && w.lastHeader > 0 && !w.canReuse(tokens) { + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + + numLiterals, numOffsets := w.indexTokens(tokens, !sync) + extraBits := 0 + ssize, storable := w.storedSize(input) + + const usePrefs = true + if storable || w.lastHeader > 0 { + extraBits = w.extraBitSize() + } + + var size int + + // Check if we should reuse. + if w.lastHeader > 0 { + // Estimate size for using a new table. + // Use the previous header size as the best estimate. + newSize := w.lastHeader + tokens.EstimatedBits() + newSize += int(w.literalEncoding.codes[endBlockMarker].len()) + newSize>>w.logNewTablePenalty + + // The estimated size is calculated as an optimal table. + // We add a penalty to make it more realistic and re-use a bit more. + reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + extraBits + + // Check if a new table is better. + if newSize < reuseSize { + // Write the EOB we owe. + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + size = newSize + w.lastHeader = 0 + } else { + size = reuseSize + } + + if tokens.n < maxPredefinedTokens { + if preSize := w.fixedSize(extraBits) + 7; usePrefs && preSize < size { + // Check if we get a reasonable size decrease. + if storable && ssize <= size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + w.writeFixedHeader(eof) + if !sync { + tokens.AddEOB() + } + w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) + return + } + } + // Check if we get a reasonable size decrease. + if storable && ssize <= size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + } + + // We want a new block/table + if w.lastHeader == 0 { + if fillReuse && !sync { + w.fillTokens() + numLiterals, numOffsets = maxNumLit, maxNumDist + } else { + w.literalFreq[endBlockMarker] = 1 + } + + w.generate() + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + + var numCodegens int + if fillReuse && !sync { + // Reindex for accurate size... + w.indexTokens(tokens, true) + } + size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) + + // Store predefined, if we don't get a reasonable improvement. + if tokens.n < maxPredefinedTokens { + if preSize := w.fixedSize(extraBits); usePrefs && preSize <= size { + // Store bytes, if we don't get an improvement. + if storable && ssize <= preSize { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + w.writeFixedHeader(eof) + if !sync { + tokens.AddEOB() + } + w.writeTokens(tokens.Slice(), fixedLiteralEncoding.codes, fixedOffsetEncoding.codes) + return + } + } + + if storable && ssize <= size { + // Store bytes, if we don't get an improvement. + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + // Write Huffman table. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + if !sync { + w.lastHeader, _ = w.headerSize() + } + w.lastHuffMan = false + } + + if sync { + w.lastHeader = 0 + } + // Write the tokens. + w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes) +} + +func (w *huffmanBitWriter) fillTokens() { + for i, v := range w.literalFreq[:literalCount] { + if v == 0 { + w.literalFreq[i] = 1 + } + } + for i, v := range w.offsetFreq[:offsetCodeCount] { + if v == 0 { + w.offsetFreq[i] = 1 + } + } +} + +// indexTokens indexes a slice of tokens, and updates +// literalFreq and offsetFreq, and generates literalEncoding +// and offsetEncoding. +// The number of literal and offset tokens is returned. +func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) { + copy(w.literalFreq[:], t.litHist[:]) + copy(w.literalFreq[256:], t.extraHist[:]) + copy(w.offsetFreq[:], t.offHist[:offsetCodeCount]) + + if t.n == 0 { + return + } + if filled { + return maxNumLit, maxNumDist + } + // get the number of literals + numLiterals = len(w.literalFreq) + for w.literalFreq[numLiterals-1] == 0 { + numLiterals-- + } + // get the number of offsets + numOffsets = len(w.offsetFreq) + for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { + numOffsets-- + } + if numOffsets == 0 { + // We haven't found a single match. If we want to go with the dynamic encoding, + // we should count at least one offset to be sure that the offset huffman tree could be encoded. + w.offsetFreq[0] = 1 + numOffsets = 1 + } + return +} + +func (w *huffmanBitWriter) generate() { + w.literalEncoding.generate(w.literalFreq[:literalCount], 15) + w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15) +} + +// writeTokens writes a slice of tokens to the output. +// codes for literal and offset encoding must be supplied. +func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) { + if w.err != nil { + return + } + if len(tokens) == 0 { + return + } + + // Only last token should be endBlockMarker. + var deferEOB bool + if tokens[len(tokens)-1] == endBlockMarker { + tokens = tokens[:len(tokens)-1] + deferEOB = true + } + + // Create slices up to the next power of two to avoid bounds checks. + lits := leCodes[:256] + offs := oeCodes[:32] + lengths := leCodes[lengthCodesStart:] + lengths = lengths[:32] + + // Go 1.16 LOVES having these on stack. + bits, nbits, nbytes := w.bits, w.nbits, w.nbytes + + for _, t := range tokens { + if t < 256 { + //w.writeCode(lits[t.literal()]) + c := lits[t] + bits |= c.code64() << (nbits & 63) + nbits += c.len() + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + continue + } + + // Write the length + length := t.length() + lengthCode := lengthCode(length) & 31 + if false { + w.writeCode(lengths[lengthCode]) + } else { + // inlined + c := lengths[lengthCode] + bits |= c.code64() << (nbits & 63) + nbits += c.len() + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + } + + if lengthCode >= lengthExtraBitsMinCode { + extraLengthBits := lengthExtraBits[lengthCode] + //w.writeBits(extraLength, extraLengthBits) + extraLength := int32(length - lengthBase[lengthCode]) + bits |= uint64(extraLength) << (nbits & 63) + nbits += extraLengthBits + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + } + // Write the offset + offset := t.offset() + offsetCode := (offset >> 16) & 31 + if false { + w.writeCode(offs[offsetCode]) + } else { + // inlined + c := offs[offsetCode] + bits |= c.code64() << (nbits & 63) + nbits += c.len() + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + } + + if offsetCode >= offsetExtraBitsMinCode { + offsetComb := offsetCombined[offsetCode] + //w.writeBits(extraOffset, extraOffsetBits) + bits |= uint64((offset-(offsetComb>>8))&matchOffsetOnlyMask) << (nbits & 63) + nbits += uint8(offsetComb) + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + } + } + // Restore... + w.bits, w.nbits, w.nbytes = bits, nbits, nbytes + + if deferEOB { + w.writeCode(leCodes[endBlockMarker]) + } +} + +// huffOffset is a static offset encoder used for huffman only encoding. +// It can be reused since we will not be encoding offset values. +var huffOffset *huffmanEncoder + +func init() { + w := newHuffmanBitWriter(nil) + w.offsetFreq[0] = 1 + huffOffset = newHuffmanEncoder(offsetCodeCount) + huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15) +} + +// writeBlockHuff encodes a block of bytes as either +// Huffman encoded literals or uncompressed bytes if the +// results only gains very little from compression. +func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { + if w.err != nil { + return + } + + // Clear histogram + for i := range w.literalFreq[:] { + w.literalFreq[i] = 0 + } + if !w.lastHuffMan { + for i := range w.offsetFreq[:] { + w.offsetFreq[i] = 0 + } + } + + const numLiterals = endBlockMarker + 1 + const numOffsets = 1 + + // Add everything as literals + // We have to estimate the header size. + // Assume header is around 70 bytes: + // https://stackoverflow.com/a/25454430 + const guessHeaderSizeBits = 70 * 8 + histogram(input, w.literalFreq[:numLiterals]) + ssize, storable := w.storedSize(input) + if storable && len(input) > 1024 { + // Quick check for incompressible content. + abs := float64(0) + avg := float64(len(input)) / 256 + max := float64(len(input) * 2) + for _, v := range w.literalFreq[:256] { + diff := float64(v) - avg + abs += diff * diff + if abs > max { + break + } + } + if abs < max { + if debugDeflate { + fmt.Println("stored", abs, "<", max) + } + // No chance we can compress this... + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + } + w.literalFreq[endBlockMarker] = 1 + w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15) + estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals]) + if estBits < math.MaxInt32 { + estBits += w.lastHeader + if w.lastHeader == 0 { + estBits += guessHeaderSizeBits + } + estBits += estBits >> w.logNewTablePenalty + } + + // Store bytes, if we don't get a reasonable improvement. + if storable && ssize <= estBits { + if debugDeflate { + fmt.Println("stored,", ssize, "<=", estBits) + } + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + if w.lastHeader > 0 { + reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256]) + + if estBits < reuseSize { + if debugDeflate { + fmt.Println("NOT reusing, reuse:", reuseSize/8, "> new:", estBits/8, "header est:", w.lastHeader/8, "bytes") + } + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } else if debugDeflate { + fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8) + } + } + + count := 0 + if w.lastHeader == 0 { + // Use the temp encoding, so swap. + w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + numCodegens := w.codegens() + + // Huffman. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + w.lastHuffMan = true + w.lastHeader, _ = w.headerSize() + if debugDeflate { + count += w.lastHeader + fmt.Println("header:", count/8) + } + } + + encoding := w.literalEncoding.codes[:256] + // Go 1.16 LOVES having these on stack. At least 1.5x the speed. + bits, nbits, nbytes := w.bits, w.nbits, w.nbytes + + if debugDeflate { + count -= int(nbytes)*8 + int(nbits) + } + // Unroll, write 3 codes/loop. + // Fastest number of unrolls. + for len(input) > 3 { + // We must have at least 48 bits free. + if nbits >= 8 { + n := nbits >> 3 + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + bits >>= (n * 8) & 63 + nbits -= n * 8 + nbytes += n + } + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + if debugDeflate { + count += int(nbytes) * 8 + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + a, b := encoding[input[0]], encoding[input[1]] + bits |= a.code64() << (nbits & 63) + bits |= b.code64() << ((nbits + a.len()) & 63) + c := encoding[input[2]] + nbits += b.len() + a.len() + bits |= c.code64() << (nbits & 63) + nbits += c.len() + input = input[3:] + } + + // Remaining... + for _, t := range input { + if nbits >= 48 { + binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits) + //*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits + bits >>= 48 + nbits -= 48 + nbytes += 6 + if nbytes >= bufferFlushSize { + if w.err != nil { + nbytes = 0 + return + } + if debugDeflate { + count += int(nbytes) * 8 + } + _, w.err = w.writer.Write(w.bytes[:nbytes]) + nbytes = 0 + } + } + // Bitwriting inlined, ~30% speedup + c := encoding[t] + bits |= c.code64() << (nbits & 63) + + nbits += c.len() + if debugDeflate { + count += int(c.len()) + } + } + // Restore... + w.bits, w.nbits, w.nbytes = bits, nbits, nbytes + + if debugDeflate { + nb := count + int(nbytes)*8 + int(nbits) + fmt.Println("wrote", nb, "bits,", nb/8, "bytes.") + } + // Flush if needed to have space. + if w.nbits >= 48 { + w.writeOutBits() + } + + if eof || sync { + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + w.lastHuffMan = false + } +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go new file mode 100644 index 000000000..5ac144f28 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -0,0 +1,412 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "math" + "math/bits" +) + +const ( + maxBitsLimit = 16 + // number of valid literals + literalCount = 286 +) + +// hcode is a huffman code with a bit code and bit length. +type hcode uint32 + +func (h hcode) len() uint8 { + return uint8(h) +} + +func (h hcode) code64() uint64 { + return uint64(h >> 8) +} + +func (h hcode) zero() bool { + return h == 0 +} + +type huffmanEncoder struct { + codes []hcode + bitCount [17]int32 + + // Allocate a reusable buffer with the longest possible frequency table. + // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. + // The largest of these is literalCount, so we allocate for that case. + freqcache [literalCount + 1]literalNode +} + +type literalNode struct { + literal uint16 + freq uint16 +} + +// A levelInfo describes the state of the constructed tree for a given depth. +type levelInfo struct { + // Our level. for better printing + level int32 + + // The frequency of the last node at this level + lastFreq int32 + + // The frequency of the next character to add to this level + nextCharFreq int32 + + // The frequency of the next pair (from level below) to add to this level. + // Only valid if the "needed" value of the next lower level is 0. + nextPairFreq int32 + + // The number of chains remaining to generate for this level before moving + // up to the next level + needed int32 +} + +// set sets the code and length of an hcode. +func (h *hcode) set(code uint16, length uint8) { + *h = hcode(length) | (hcode(code) << 8) +} + +func newhcode(code uint16, length uint8) hcode { + return hcode(length) | (hcode(code) << 8) +} + +func reverseBits(number uint16, bitLength byte) uint16 { + return bits.Reverse16(number << ((16 - bitLength) & 15)) +} + +func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} } + +func newHuffmanEncoder(size int) *huffmanEncoder { + // Make capacity to next power of two. + c := uint(bits.Len32(uint32(size - 1))) + return &huffmanEncoder{codes: make([]hcode, size, 1<<c)} +} + +// Generates a HuffmanCode corresponding to the fixed literal table +func generateFixedLiteralEncoding() *huffmanEncoder { + h := newHuffmanEncoder(literalCount) + codes := h.codes + var ch uint16 + for ch = 0; ch < literalCount; ch++ { + var bits uint16 + var size uint8 + switch { + case ch < 144: + // size 8, 000110000 .. 10111111 + bits = ch + 48 + size = 8 + case ch < 256: + // size 9, 110010000 .. 111111111 + bits = ch + 400 - 144 + size = 9 + case ch < 280: + // size 7, 0000000 .. 0010111 + bits = ch - 256 + size = 7 + default: + // size 8, 11000000 .. 11000111 + bits = ch + 192 - 280 + size = 8 + } + codes[ch] = newhcode(reverseBits(bits, size), size) + } + return h +} + +func generateFixedOffsetEncoding() *huffmanEncoder { + h := newHuffmanEncoder(30) + codes := h.codes + for ch := range codes { + codes[ch] = newhcode(reverseBits(uint16(ch), 5), 5) + } + return h +} + +var fixedLiteralEncoding = generateFixedLiteralEncoding() +var fixedOffsetEncoding = generateFixedOffsetEncoding() + +func (h *huffmanEncoder) bitLength(freq []uint16) int { + var total int + for i, f := range freq { + if f != 0 { + total += int(f) * int(h.codes[i].len()) + } + } + return total +} + +func (h *huffmanEncoder) bitLengthRaw(b []byte) int { + var total int + for _, f := range b { + total += int(h.codes[f].len()) + } + return total +} + +// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused. +func (h *huffmanEncoder) canReuseBits(freq []uint16) int { + var total int + for i, f := range freq { + if f != 0 { + code := h.codes[i] + if code.zero() { + return math.MaxInt32 + } + total += int(f) * int(code.len()) + } + } + return total +} + +// Return the number of literals assigned to each bit size in the Huffman encoding +// +// This method is only called when list.length >= 3 +// The cases of 0, 1, and 2 literals are handled by special case code. +// +// list An array of the literals with non-zero frequencies +// and their associated frequencies. The array is in order of increasing +// frequency, and has as its last element a special element with frequency +// MaxInt32 +// maxBits The maximum number of bits that should be used to encode any literal. +// Must be less than 16. +// return An integer array in which array[i] indicates the number of literals +// that should be encoded in i bits. +func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { + if maxBits >= maxBitsLimit { + panic("flate: maxBits too large") + } + n := int32(len(list)) + list = list[0 : n+1] + list[n] = maxNode() + + // The tree can't have greater depth than n - 1, no matter what. This + // saves a little bit of work in some small cases + if maxBits > n-1 { + maxBits = n - 1 + } + + // Create information about each of the levels. + // A bogus "Level 0" whose sole purpose is so that + // level1.prev.needed==0. This makes level1.nextPairFreq + // be a legitimate value that never gets chosen. + var levels [maxBitsLimit]levelInfo + // leafCounts[i] counts the number of literals at the left + // of ancestors of the rightmost node at level i. + // leafCounts[i][j] is the number of literals at the left + // of the level j ancestor. + var leafCounts [maxBitsLimit][maxBitsLimit]int32 + + // Descending to only have 1 bounds check. + l2f := int32(list[2].freq) + l1f := int32(list[1].freq) + l0f := int32(list[0].freq) + int32(list[1].freq) + + for level := int32(1); level <= maxBits; level++ { + // For every level, the first two items are the first two characters. + // We initialize the levels as if we had already figured this out. + levels[level] = levelInfo{ + level: level, + lastFreq: l1f, + nextCharFreq: l2f, + nextPairFreq: l0f, + } + leafCounts[level][level] = 2 + if level == 1 { + levels[level].nextPairFreq = math.MaxInt32 + } + } + + // We need a total of 2*n - 2 items at top level and have already generated 2. + levels[maxBits].needed = 2*n - 4 + + level := uint32(maxBits) + for level < 16 { + l := &levels[level] + if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { + // We've run out of both leafs and pairs. + // End all calculations for this level. + // To make sure we never come back to this level or any lower level, + // set nextPairFreq impossibly large. + l.needed = 0 + levels[level+1].nextPairFreq = math.MaxInt32 + level++ + continue + } + + prevFreq := l.lastFreq + if l.nextCharFreq < l.nextPairFreq { + // The next item on this row is a leaf node. + n := leafCounts[level][level] + 1 + l.lastFreq = l.nextCharFreq + // Lower leafCounts are the same of the previous node. + leafCounts[level][level] = n + e := list[n] + if e.literal < math.MaxUint16 { + l.nextCharFreq = int32(e.freq) + } else { + l.nextCharFreq = math.MaxInt32 + } + } else { + // The next item on this row is a pair from the previous row. + // nextPairFreq isn't valid until we generate two + // more values in the level below + l.lastFreq = l.nextPairFreq + // Take leaf counts from the lower level, except counts[level] remains the same. + if true { + save := leafCounts[level][level] + leafCounts[level] = leafCounts[level-1] + leafCounts[level][level] = save + } else { + copy(leafCounts[level][:level], leafCounts[level-1][:level]) + } + levels[l.level-1].needed = 2 + } + + if l.needed--; l.needed == 0 { + // We've done everything we need to do for this level. + // Continue calculating one level up. Fill in nextPairFreq + // of that level with the sum of the two nodes we've just calculated on + // this level. + if l.level == maxBits { + // All done! + break + } + levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq + level++ + } else { + // If we stole from below, move down temporarily to replenish it. + for levels[level-1].needed > 0 { + level-- + } + } + } + + // Somethings is wrong if at the end, the top level is null or hasn't used + // all of the leaves. + if leafCounts[maxBits][maxBits] != n { + panic("leafCounts[maxBits][maxBits] != n") + } + + bitCount := h.bitCount[:maxBits+1] + bits := 1 + counts := &leafCounts[maxBits] + for level := maxBits; level > 0; level-- { + // chain.leafCount gives the number of literals requiring at least "bits" + // bits to encode. + bitCount[bits] = counts[level] - counts[level-1] + bits++ + } + return bitCount +} + +// Look at the leaves and assign them a bit count and an encoding as specified +// in RFC 1951 3.2.2 +func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { + code := uint16(0) + for n, bits := range bitCount { + code <<= 1 + if n == 0 || bits == 0 { + continue + } + // The literals list[len(list)-bits] .. list[len(list)-bits] + // are encoded using "bits" bits, and get the values + // code, code + 1, .... The code values are + // assigned in literal order (not frequency order). + chunk := list[len(list)-int(bits):] + + sortByLiteral(chunk) + for _, node := range chunk { + h.codes[node.literal] = newhcode(reverseBits(code, uint8(n)), uint8(n)) + code++ + } + list = list[0 : len(list)-int(bits)] + } +} + +// Update this Huffman Code object to be the minimum code for the specified frequency count. +// +// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. +// maxBits The maximum number of bits to use for any literal. +func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { + list := h.freqcache[:len(freq)+1] + codes := h.codes[:len(freq)] + // Number of non-zero literals + count := 0 + // Set list to be the set of all non-zero literals and their frequencies + for i, f := range freq { + if f != 0 { + list[count] = literalNode{uint16(i), f} + count++ + } else { + codes[i] = 0 + } + } + list[count] = literalNode{} + + list = list[:count] + if count <= 2 { + // Handle the small cases here, because they are awkward for the general case code. With + // two or fewer literals, everything has bit length 1. + for i, node := range list { + // "list" is in order of increasing literal value. + h.codes[node.literal].set(uint16(i), 1) + } + return + } + sortByFreq(list) + + // Get the number of literals for each bit count + bitCount := h.bitCounts(list, maxBits) + // And do the assignment + h.assignEncodingAndSize(bitCount, list) +} + +// atLeastOne clamps the result between 1 and 15. +func atLeastOne(v float32) float32 { + if v < 1 { + return 1 + } + if v > 15 { + return 15 + } + return v +} + +func histogram(b []byte, h []uint16) { + if true && len(b) >= 8<<10 { + // Split for bigger inputs + histogramSplit(b, h) + } else { + h = h[:256] + for _, t := range b { + h[t]++ + } + } +} + +func histogramSplit(b []byte, h []uint16) { + // Tested, and slightly faster than 2-way. + // Writing to separate arrays and combining is also slightly slower. + h = h[:256] + for len(b)&3 != 0 { + h[b[0]]++ + b = b[1:] + } + n := len(b) / 4 + x, y, z, w := b[:n], b[n:], b[n+n:], b[n+n+n:] + y, z, w = y[:len(x)], z[:len(x)], w[:len(x)] + for i, t := range x { + v0 := &h[t] + v1 := &h[y[i]] + v3 := &h[w[i]] + v2 := &h[z[i]] + *v0++ + *v1++ + *v2++ + *v3++ + } +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go new file mode 100644 index 000000000..207780299 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go @@ -0,0 +1,178 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByFreq(data []literalNode) { + n := len(data) + quickSortByFreq(data, 0, n, maxDepth(n)) +} + +func quickSortByFreq(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivotByFreq(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSortByFreq(data, a, mlo, maxDepth) + a = mhi // i.e., quickSortByFreq(data, mhi, b) + } else { + quickSortByFreq(data, mhi, b, maxDepth) + b = mlo // i.e., quickSortByFreq(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSortByFreq(data, a, b) + } +} + +// siftDownByFreq implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDownByFreq(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) { + child++ + } + if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) + medianOfThreeSortByFreq(data, m, m-s, m+s) + medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThreeSortByFreq(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { + } + b := a + for { + for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot + } + for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot + } + for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSortByFreq(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// quickSortByFreq, loosely following Bentley and McIlroy, +// ``Engineering a Sort Function,'' SP&E November 1993. + +// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go new file mode 100644 index 000000000..93f1aea10 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go @@ -0,0 +1,201 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByLiteral(data []literalNode) { + n := len(data) + quickSort(data, 0, n, maxDepth(n)) +} + +func quickSort(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivot(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSort(data, a, mlo, maxDepth) + a = mhi // i.e., quickSort(data, mhi, b) + } else { + quickSort(data, mhi, b, maxDepth) + b = mlo // i.e., quickSort(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].literal < data[i-6].literal { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSort(data, a, b) + } +} +func heapSort(data []literalNode, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + data[first], data[first+i] = data[first+i], data[first] + siftDown(data, lo, i, first) + } +} + +// siftDown implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDown(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && data[first+child].literal < data[first+child+1].literal { + child++ + } + if data[first+root].literal > data[first+child].literal { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThree(data, lo, lo+s, lo+2*s) + medianOfThree(data, m, m-s, m+s) + medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThree(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && data[a].literal < data[pivot].literal; a++ { + } + b := a + for { + for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot + } + for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].literal > data[pivot].literal { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot + } + for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSort(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && data[j].literal < data[j-1].literal; j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// maxDepth returns a threshold at which quicksort should switch +// to heapsort. It returns 2*ceil(lg(n+1)). +func maxDepth(n int) int { + var depth int + for i := n; i > 0; i >>= 1 { + depth++ + } + return depth * 2 +} + +// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThree(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].literal < data[m1].literal { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go new file mode 100644 index 000000000..414c0bea9 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -0,0 +1,793 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package flate implements the DEFLATE compressed data format, described in +// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file +// formats. +package flate + +import ( + "bufio" + "compress/flate" + "fmt" + "io" + "math/bits" + "sync" +) + +const ( + maxCodeLen = 16 // max length of Huffman code + maxCodeLenMask = 15 // mask for max length of Huffman code + // The next three numbers come from the RFC section 3.2.7, with the + // additional proviso in section 3.2.5 which implies that distance codes + // 30 and 31 should never occur in compressed data. + maxNumLit = 286 + maxNumDist = 30 + numCodes = 19 // number of codes in Huffman meta-code + + debugDecode = false +) + +// Value of length - 3 and extra bits. +type lengthExtra struct { + length, extra uint8 +} + +var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}} + +var bitMask32 = [32]uint32{ + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, + 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, + 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF, + 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF, +} // up to 32 bits + +// Initialize the fixedHuffmanDecoder only once upon first use. +var fixedOnce sync.Once +var fixedHuffmanDecoder huffmanDecoder + +// A CorruptInputError reports the presence of corrupt input at a given offset. +type CorruptInputError = flate.CorruptInputError + +// An InternalError reports an error in the flate code itself. +type InternalError string + +func (e InternalError) Error() string { return "flate: internal error: " + string(e) } + +// A ReadError reports an error encountered while reading input. +// +// Deprecated: No longer returned. +type ReadError = flate.ReadError + +// A WriteError reports an error encountered while writing output. +// +// Deprecated: No longer returned. +type WriteError = flate.WriteError + +// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to +// to switch to a new underlying Reader. This permits reusing a ReadCloser +// instead of allocating a new one. +type Resetter interface { + // Reset discards any buffered data and resets the Resetter as if it was + // newly initialized with the given reader. + Reset(r io.Reader, dict []byte) error +} + +// The data structure for decoding Huffman tables is based on that of +// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), +// For codes smaller than the table width, there are multiple entries +// (each combination of trailing bits has the same value). For codes +// larger than the table width, the table contains a link to an overflow +// table. The width of each entry in the link table is the maximum code +// size minus the chunk width. +// +// Note that you can do a lookup in the table even without all bits +// filled. Since the extra bits are zero, and the DEFLATE Huffman codes +// have the property that shorter codes come before longer ones, the +// bit length estimate in the result is a lower bound on the actual +// number of bits. +// +// See the following: +// http://www.gzip.org/algorithm.txt + +// chunk & 15 is number of bits +// chunk >> 4 is value, including table link + +const ( + huffmanChunkBits = 9 + huffmanNumChunks = 1 << huffmanChunkBits + huffmanCountMask = 15 + huffmanValueShift = 4 +) + +type huffmanDecoder struct { + maxRead int // the maximum number of bits we can read and not overread + chunks *[huffmanNumChunks]uint16 // chunks as described above + links [][]uint16 // overflow links + linkMask uint32 // mask the width of the link table +} + +// Initialize Huffman decoding tables from array of code lengths. +// Following this function, h is guaranteed to be initialized into a complete +// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a +// degenerate case where the tree has only a single symbol with length 1. Empty +// trees are permitted. +func (h *huffmanDecoder) init(lengths []int) bool { + // Sanity enables additional runtime tests during Huffman + // table construction. It's intended to be used during + // development to supplement the currently ad-hoc unit tests. + const sanity = false + + if h.chunks == nil { + h.chunks = &[huffmanNumChunks]uint16{} + } + if h.maxRead != 0 { + *h = huffmanDecoder{chunks: h.chunks, links: h.links} + } + + // Count number of codes of each length, + // compute maxRead and max length. + var count [maxCodeLen]int + var min, max int + for _, n := range lengths { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n&maxCodeLenMask]++ + } + + // Empty tree. The decompressor.huffSym function will fail later if the tree + // is used. Technically, an empty tree is only valid for the HDIST tree and + // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree + // is guaranteed to fail since it will attempt to use the tree to decode the + // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is + // guaranteed to fail later since the compressed data section must be + // composed of at least one symbol (the end-of-block marker). + if max == 0 { + return true + } + + code := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + code <<= 1 + nextcode[i&maxCodeLenMask] = code + code += count[i&maxCodeLenMask] + } + + // Check that the coding is complete (i.e., that we've + // assigned all 2-to-the-max possible bit sequences). + // Exception: To be compatible with zlib, we also need to + // accept degenerate single-code codings. See also + // TestDegenerateHuffmanCoding. + if code != 1<<uint(max) && !(code == 1 && max == 1) { + if debugDecode { + fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)") + } + return false + } + + h.maxRead = min + chunks := h.chunks[:] + for i := range chunks { + chunks[i] = 0 + } + + if max > huffmanChunkBits { + numLinks := 1 << (uint(max) - huffmanChunkBits) + h.linkMask = uint32(numLinks - 1) + + // create link tables + link := nextcode[huffmanChunkBits+1] >> 1 + if cap(h.links) < huffmanNumChunks-link { + h.links = make([][]uint16, huffmanNumChunks-link) + } else { + h.links = h.links[:huffmanNumChunks-link] + } + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(bits.Reverse16(uint16(j))) + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + if sanity && h.chunks[reverse] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1)) + if cap(h.links[off]) < numLinks { + h.links[off] = make([]uint16, numLinks) + } else { + links := h.links[off][:0] + h.links[off] = links[:numLinks] + } + } + } else { + h.links = h.links[:0] + } + + for i, n := range lengths { + if n == 0 { + continue + } + code := nextcode[n] + nextcode[n]++ + chunk := uint16(i<<huffmanValueShift | n) + reverse := int(bits.Reverse16(uint16(code))) + reverse >>= uint(16 - n) + if n <= huffmanChunkBits { + for off := reverse; off < len(h.chunks); off += 1 << uint(n) { + // We should never need to overwrite + // an existing chunk. Also, 0 is + // never a valid chunk, because the + // lower 4 "count" bits should be + // between 1 and 15. + if sanity && h.chunks[off] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[off] = chunk + } + } else { + j := reverse & (huffmanNumChunks - 1) + if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { + // Longer codes should have been + // associated with a link table above. + panic("impossible: not an indirect chunk") + } + value := h.chunks[j] >> huffmanValueShift + linktab := h.links[value] + reverse >>= huffmanChunkBits + for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { + if sanity && linktab[off] != 0 { + panic("impossible: overwriting existing chunk") + } + linktab[off] = chunk + } + } + } + + if sanity { + // Above we've sanity checked that we never overwrote + // an existing entry. Here we additionally check that + // we filled the tables completely. + for i, chunk := range h.chunks { + if chunk == 0 { + // As an exception, in the degenerate + // single-code case, we allow odd + // chunks to be missing. + if code == 1 && i%2 == 1 { + continue + } + panic("impossible: missing chunk") + } + } + for _, linktab := range h.links { + for _, chunk := range linktab { + if chunk == 0 { + panic("impossible: missing chunk") + } + } + } + } + + return true +} + +// The actual read interface needed by NewReader. +// If the passed in io.Reader does not also have ReadByte, +// the NewReader will introduce its own buffering. +type Reader interface { + io.Reader + io.ByteReader +} + +// Decompress state. +type decompressor struct { + // Input source. + r Reader + roffset int64 + + // Huffman decoders for literal/length, distance. + h1, h2 huffmanDecoder + + // Length arrays used to define Huffman codes. + bits *[maxNumLit + maxNumDist]int + codebits *[numCodes]int + + // Output history, buffer. + dict dictDecoder + + // Next step in the decompression, + // and decompression state. + step func(*decompressor) + stepState int + err error + toRead []byte + hl, hd *huffmanDecoder + copyLen int + copyDist int + + // Temporary buffer (avoids repeated allocation). + buf [4]byte + + // Input bits, in top of b. + b uint32 + + nb uint + final bool +} + +func (f *decompressor) nextBlock() { + for f.nb < 1+2 { + if f.err = f.moreBits(); f.err != nil { + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + if debugDecode { + fmt.Println("stored block") + } + case 1: + // compressed, fixed Huffman tables + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlockDecoder()() + if debugDecode { + fmt.Println("predefinied huffman block") + } + case 2: + // compressed, dynamic Huffman tables + if f.err = f.readHuffman(); f.err != nil { + break + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlockDecoder()() + if debugDecode { + fmt.Println("dynamic huffman block") + } + default: + // 3 is reserved. + if debugDecode { + fmt.Println("reserved data block encountered") + } + f.err = CorruptInputError(f.roffset) + } +} + +func (f *decompressor) Read(b []byte) (int, error) { + for { + if len(f.toRead) > 0 { + n := copy(b, f.toRead) + f.toRead = f.toRead[n:] + if len(f.toRead) == 0 { + return n, f.err + } + return n, nil + } + if f.err != nil { + return 0, f.err + } + f.step(f) + if f.err != nil && len(f.toRead) == 0 { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + } + } +} + +// Support the io.WriteTo interface for io.Copy and friends. +func (f *decompressor) WriteTo(w io.Writer) (int64, error) { + total := int64(0) + flushed := false + for { + if len(f.toRead) > 0 { + n, err := w.Write(f.toRead) + total += int64(n) + if err != nil { + f.err = err + return total, err + } + if n != len(f.toRead) { + return total, io.ErrShortWrite + } + f.toRead = f.toRead[:0] + } + if f.err != nil && flushed { + if f.err == io.EOF { + return total, nil + } + return total, f.err + } + if f.err == nil { + f.step(f) + } + if len(f.toRead) == 0 && f.err != nil && !flushed { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + flushed = true + } + } +} + +func (f *decompressor) Close() error { + if f.err == io.EOF { + return nil + } + return f.err +} + +// RFC 1951 section 3.2.7. +// Compression with dynamic Huffman codes + +var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +func (f *decompressor) readHuffman() error { + // HLIT[5], HDIST[5], HCLEN[4]. + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + if nlit > maxNumLit { + if debugDecode { + fmt.Println("nlit > maxNumLit", nlit) + } + return CorruptInputError(f.roffset) + } + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + if ndist > maxNumDist { + if debugDecode { + fmt.Println("ndist > maxNumDist", ndist) + } + return CorruptInputError(f.roffset) + } + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + // numCodes is 19, so nclen is always valid. + f.b >>= 4 + f.nb -= 5 + 5 + 4 + + // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. + for i := 0; i < nclen; i++ { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + f.codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + for i := nclen; i < len(codeOrder); i++ { + f.codebits[codeOrder[i]] = 0 + } + if !f.h1.init(f.codebits[0:]) { + if debugDecode { + fmt.Println("init codebits failed") + } + return CorruptInputError(f.roffset) + } + + // HLIT + 257 code lengths, HDIST + 1 code lengths, + // using the code length Huffman code. + for i, n := 0, nlit+ndist; i < n; { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + if x < 16 { + // Actual length. + f.bits[i] = x + i++ + continue + } + // Repeat previous length or zero. + var rep int + var nb uint + var b int + switch x { + default: + return InternalError("unexpected length code") + case 16: + rep = 3 + nb = 2 + if i == 0 { + if debugDecode { + fmt.Println("i==0") + } + return CorruptInputError(f.roffset) + } + b = f.bits[i-1] + case 17: + rep = 3 + nb = 3 + b = 0 + case 18: + rep = 11 + nb = 7 + b = 0 + } + for f.nb < nb { + if err := f.moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits:", err) + } + return err + } + } + rep += int(f.b & uint32(1<<(nb®SizeMaskUint32)-1)) + f.b >>= nb & regSizeMaskUint32 + f.nb -= nb + if i+rep > n { + if debugDecode { + fmt.Println("i+rep > n", i, rep, n) + } + return CorruptInputError(f.roffset) + } + for j := 0; j < rep; j++ { + f.bits[i] = b + i++ + } + } + + if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { + if debugDecode { + fmt.Println("init2 failed") + } + return CorruptInputError(f.roffset) + } + + // As an optimization, we can initialize the maxRead bits to read at a time + // for the HLIT tree to the length of the EOB marker since we know that + // every block must terminate with one. This preserves the property that + // we never read any extra bytes after the end of the DEFLATE stream. + if f.h1.maxRead < f.bits[endBlockMarker] { + f.h1.maxRead = f.bits[endBlockMarker] + } + if !f.final { + // If not the final block, the smallest block possible is + // a predefined table, BTYPE=01, with a single EOB marker. + // This will take up 3 + 7 bits. + f.h1.maxRead += 10 + } + + return nil +} + +// Copy a single uncompressed data block from input to output. +func (f *decompressor) dataBlock() { + // Uncompressed. + // Discard current half-byte. + left := (f.nb) & 7 + f.nb -= left + f.b >>= left + + offBytes := f.nb >> 3 + // Unfilled values will be overwritten. + f.buf[0] = uint8(f.b) + f.buf[1] = uint8(f.b >> 8) + f.buf[2] = uint8(f.b >> 16) + f.buf[3] = uint8(f.b >> 24) + + f.roffset += int64(offBytes) + f.nb, f.b = 0, 0 + + // Length then ones-complement of length. + nr, err := io.ReadFull(f.r, f.buf[offBytes:4]) + f.roffset += int64(nr) + if err != nil { + f.err = noEOF(err) + return + } + n := uint16(f.buf[0]) | uint16(f.buf[1])<<8 + nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8 + if nn != ^n { + if debugDecode { + ncomp := ^n + fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp) + } + f.err = CorruptInputError(f.roffset) + return + } + + if n == 0 { + f.toRead = f.dict.readFlush() + f.finishBlock() + return + } + + f.copyLen = int(n) + f.copyData() +} + +// copyData copies f.copyLen bytes from the underlying reader into f.hist. +// It pauses for reads when f.hist is full. +func (f *decompressor) copyData() { + buf := f.dict.writeSlice() + if len(buf) > f.copyLen { + buf = buf[:f.copyLen] + } + + cnt, err := io.ReadFull(f.r, buf) + f.roffset += int64(cnt) + f.copyLen -= cnt + f.dict.writeMark(cnt) + if err != nil { + f.err = noEOF(err) + return + } + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).copyData + return + } + f.finishBlock() +} + +func (f *decompressor) finishBlock() { + if f.final { + if f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() + } + f.err = io.EOF + } + f.step = (*decompressor).nextBlock +} + +// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. +func noEOF(e error) error { + if e == io.EOF { + return io.ErrUnexpectedEOF + } + return e +} + +func (f *decompressor) moreBits() error { + c, err := f.r.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << (f.nb & regSizeMaskUint32) + f.nb += 8 + return nil +} + +// Read the next Huffman-encoded symbol from f according to h. +func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(h.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.r.ReadByte() + if err != nil { + f.b = b + f.nb = nb + return 0, noEOF(err) + } + f.roffset++ + b |= uint32(c) << (nb & regSizeMaskUint32) + nb += 8 + } + chunk := h.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return 0, f.err + } + f.b = b >> (n & regSizeMaskUint32) + f.nb = nb - n + return int(chunk >> huffmanValueShift), nil + } + } +} + +func makeReader(r io.Reader) Reader { + if rr, ok := r.(Reader); ok { + return rr + } + return bufio.NewReader(r) +} + +func fixedHuffmanDecoderInit() { + fixedOnce.Do(func() { + // These come from the RFC section 3.2.6. + var bits [288]int + for i := 0; i < 144; i++ { + bits[i] = 8 + } + for i := 144; i < 256; i++ { + bits[i] = 9 + } + for i := 256; i < 280; i++ { + bits[i] = 7 + } + for i := 280; i < 288; i++ { + bits[i] = 8 + } + fixedHuffmanDecoder.init(bits[:]) + }) +} + +func (f *decompressor) Reset(r io.Reader, dict []byte) error { + *f = decompressor{ + r: makeReader(r), + bits: f.bits, + codebits: f.codebits, + h1: f.h1, + h2: f.h2, + dict: f.dict, + step: (*decompressor).nextBlock, + } + f.dict.init(maxMatchOffset, dict) + return nil +} + +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser +// when finished reading. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReader(r io.Reader) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.r = makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, nil) + return &f +} + +// NewReaderDict is like NewReader but initializes the reader +// with a preset dictionary. The returned Reader behaves as if +// the uncompressed data stream started with the given dictionary, +// which has already been read. NewReaderDict is typically used +// to read data compressed by NewWriterDict. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.r = makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, dict) + return &f +} diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go new file mode 100644 index 000000000..61342b6b8 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go @@ -0,0 +1,1283 @@ +// Code generated by go generate gen_inflate.go. DO NOT EDIT. + +package flate + +import ( + "bufio" + "bytes" + "fmt" + "math/bits" + "strings" +) + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBytesBuffer() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bytes.Buffer) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanBytesBuffer + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<nb:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBytesReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bytes.Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanBytesReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<nb:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanBytesReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBufioReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bufio.Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanBufioReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<nb:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanBufioReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanStringsReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*strings.Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanStringsReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<nb:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanStringsReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanGenericReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(Reader) + + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + fnb, fb, dict := f.nb, f.b, &f.dict + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hl.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var length int + switch { + case v < 256: + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanGenericReader + f.stepState = stateInit + f.b, f.nb = fb, fnb + return + } + goto readLiteral + case v == 256: + f.b, f.nb = fb, fnb + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + case v < maxNumLit: + val := decCodeToLen[(v - 257)] + length = int(val.length) + 3 + n := uint(val.extra) + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + length += int(fb & bitMask32[n]) + fb >>= n & regSizeMaskUint32 + fnb -= n + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + f.b, f.nb = fb, fnb + return + } + + var dist uint32 + if f.hd == nil { + for fnb < 5 { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + dist = uint32(bits.Reverse8(uint8(fb & 0x1F << 3))) + fb >>= 5 + fnb -= 5 + } else { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hd.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + for { + for fnb < n { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + f.err = noEOF(err) + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + chunk := f.hd.chunks[fb&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hd.links[chunk>>huffmanValueShift][(fb>>huffmanChunkBits)&f.hd.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= fnb { + if n == 0 { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + fb = fb >> (n & regSizeMaskUint32) + fnb = fnb - n + dist = uint32(chunk >> huffmanValueShift) + break + } + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << (nb & regSizeMaskUint32) + for fnb < nb { + c, err := fr.ReadByte() + if err != nil { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("morebits f.nb<nb:", err) + } + f.err = err + return + } + f.roffset++ + fb |= uint32(c) << (fnb & regSizeMaskUint32) + fnb += 8 + } + extra |= fb & bitMask32[nb] + fb >>= nb & regSizeMaskUint32 + fnb -= nb + dist = 1<<((nb+1)®SizeMaskUint32) + 1 + extra + // slower: dist = bitMask32[nb+1] + 2 + extra + default: + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > uint32(dict.histSize()) { + f.b, f.nb = fb, fnb + if debugDecode { + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, int(dist) + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() + f.step = (*decompressor).huffmanGenericReader // We need to continue this work + f.stepState = stateDict + f.b, f.nb = fb, fnb + return + } + goto readLiteral + } + // Not reached +} + +func (f *decompressor) huffmanBlockDecoder() func() { + switch f.r.(type) { + case *bytes.Buffer: + return f.huffmanBytesBuffer + case *bytes.Reader: + return f.huffmanBytesReader + case *bufio.Reader: + return f.huffmanBufioReader + case *strings.Reader: + return f.huffmanStringsReader + case Reader: + return f.huffmanGenericReader + default: + return f.huffmanGenericReader + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go new file mode 100644 index 000000000..0f14f8d63 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level1.go @@ -0,0 +1,240 @@ +package flate + +import ( + "encoding/binary" + "fmt" + "math/bits" +) + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastEncL1 struct { + fastGen + table [tableSize]tableEntry +} + +// EncodeL1 uses a similar algorithm to level 1 +func (e *fastEncL1) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3232(src, s) + + for { + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hash(cv) + candidate = e.table[nextHash] + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + + now := load6432(src, nextS) + e.table[nextHash] = tableEntry{offset: s + e.cur} + nextHash = hash(uint32(now)) + + offset := s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = e.table[nextHash] + now >>= 8 + e.table[nextHash] = tableEntry{offset: s + e.cur} + + offset = s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} + break + } + cv = uint32(now) + s = nextS + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset - e.cur + var l = int32(4) + if false { + l = e.matchlenLong(s+4, t+4, src) + 4 + } else { + // inlined: + a := src[s+4:] + b := src[t+4:] + for len(a) >= 8 { + if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 { + l += int32(bits.TrailingZeros64(diff) >> 3) + break + } + l += 8 + a = a[8:] + b = b[8:] + } + if len(a) < 8 { + b = b[:len(a)] + for i := range a { + if a[i] != b[i] { + break + } + l++ + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + + // Save the match found + if false { + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + } else { + // Inlined... + xoffset := uint32(s - t - baseMatchOffset) + xlength := l + oc := offsetCode(xoffset) + xoffset |= oc << 16 + for xlength > 0 { + xl := xlength + if xl > 258 { + if xl > 258+baseMatchLength { + xl = 258 + } else { + xl = 258 - baseMatchLength + } + } + xlength -= xl + xl -= baseMatchLength + dst.extraHist[lengthCodes1[uint8(xl)]]++ + dst.offHist[oc]++ + dst.tokens[dst.n] = token(matchType | uint32(xl)<<lengthShift | xoffset) + dst.n++ + } + } + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + if s >= sLimit { + // Index first pair after match end. + if int(s+l+4) < len(src) { + cv := load3232(src, s) + e.table[hash(cv)] = tableEntry{offset: s + e.cur} + } + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-2) + o := e.cur + s - 2 + prevHash := hash(uint32(x)) + e.table[prevHash] = tableEntry{offset: o} + x >>= 16 + currHash := hash(uint32(x)) + candidate = e.table[currHash] + e.table[currHash] = tableEntry{offset: o + 2} + + offset := s - (candidate.offset - e.cur) + if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) { + cv = uint32(x >> 8) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go new file mode 100644 index 000000000..8603fbd55 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level2.go @@ -0,0 +1,213 @@ +package flate + +import "fmt" + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastEncL2 struct { + fastGen + table [bTableSize]tableEntry +} + +// EncodeL2 uses a similar algorithm to level 1, but is capable +// of matching across blocks giving better compression at a small slowdown. +func (e *fastEncL2) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3232(src, s) + for { + // When should we start skipping if we haven't found matches in a long while. + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hash4u(cv, bTableBits) + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + candidate = e.table[nextHash] + now := load6432(src, nextS) + e.table[nextHash] = tableEntry{offset: s + e.cur} + nextHash = hash4u(uint32(now), bTableBits) + + offset := s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = e.table[nextHash] + now >>= 8 + e.table[nextHash] = tableEntry{offset: s + e.cur} + + offset = s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + break + } + cv = uint32(now) + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset - e.cur + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index first pair after match end. + if int(s+l+4) < len(src) { + cv := load3232(src, s) + e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur} + } + goto emitRemainder + } + + // Store every second hash in-between, but offset by 1. + for i := s - l + 2; i < s-5; i += 7 { + x := load6432(src, i) + nextHash := hash4u(uint32(x), bTableBits) + e.table[nextHash] = tableEntry{offset: e.cur + i} + // Skip one + x >>= 16 + nextHash = hash4u(uint32(x), bTableBits) + e.table[nextHash] = tableEntry{offset: e.cur + i + 2} + // Skip one + x >>= 16 + nextHash = hash4u(uint32(x), bTableBits) + e.table[nextHash] = tableEntry{offset: e.cur + i + 4} + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 to s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-2) + o := e.cur + s - 2 + prevHash := hash4u(uint32(x), bTableBits) + prevHash2 := hash4u(uint32(x>>8), bTableBits) + e.table[prevHash] = tableEntry{offset: o} + e.table[prevHash2] = tableEntry{offset: o + 1} + currHash := hash4u(uint32(x>>16), bTableBits) + candidate = e.table[currHash] + e.table[currHash] = tableEntry{offset: o + 2} + + offset := s - (candidate.offset - e.cur) + if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) { + cv = uint32(x >> 24) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go new file mode 100644 index 000000000..039639f89 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level3.go @@ -0,0 +1,240 @@ +package flate + +import "fmt" + +// fastEncL3 +type fastEncL3 struct { + fastGen + table [1 << 16]tableEntryPrev +} + +// Encode uses a similar algorithm to level 2, will check up to two candidates. +func (e *fastEncL3) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 8 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + tableBits = 16 + tableSize = 1 << tableBits + ) + + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + } + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + e.table[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // Skip if too small. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3232(src, s) + for { + const skipLog = 6 + nextS := s + var candidate tableEntry + for { + nextHash := hash4u(cv, tableBits) + s = nextS + nextS = s + 1 + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + candidates := e.table[nextHash] + now := load3232(src, nextS) + + // Safe offset distance until s + 4... + minOffset := e.cur + s - (maxMatchOffset - 4) + e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}} + + // Check both candidates + candidate = candidates.Cur + if candidate.offset < minOffset { + cv = now + // Previous will also be invalid, we have nothing. + continue + } + + if cv == load3232(src, candidate.offset-e.cur) { + if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) { + break + } + // Both match and are valid, pick longest. + offset := s - (candidate.offset - e.cur) + o2 := s - (candidates.Prev.offset - e.cur) + l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:]) + if l2 > l1 { + candidate = candidates.Prev + } + break + } else { + // We only check if value mismatches. + // Offset will always be invalid in other cases. + candidate = candidates.Prev + if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + break + } + } + cv = now + } + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + // + t := candidate.offset - e.cur + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + t += l + // Index first pair after match end. + if int(t+4) < len(src) && t > 0 { + cv := load3232(src, t) + nextHash := hash4u(cv, tableBits) + e.table[nextHash] = tableEntryPrev{ + Prev: e.table[nextHash].Cur, + Cur: tableEntry{offset: e.cur + t}, + } + } + goto emitRemainder + } + + // Store every 5th hash in-between. + for i := s - l + 2; i < s-5; i += 5 { + nextHash := hash4u(load3232(src, i), tableBits) + e.table[nextHash] = tableEntryPrev{ + Prev: e.table[nextHash].Cur, + Cur: tableEntry{offset: e.cur + i}} + } + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 to s. + x := load6432(src, s-2) + prevHash := hash4u(uint32(x), tableBits) + + e.table[prevHash] = tableEntryPrev{ + Prev: e.table[prevHash].Cur, + Cur: tableEntry{offset: e.cur + s - 2}, + } + x >>= 8 + prevHash = hash4u(uint32(x), tableBits) + + e.table[prevHash] = tableEntryPrev{ + Prev: e.table[prevHash].Cur, + Cur: tableEntry{offset: e.cur + s - 1}, + } + x >>= 8 + currHash := hash4u(uint32(x), tableBits) + candidates := e.table[currHash] + cv = uint32(x) + e.table[currHash] = tableEntryPrev{ + Prev: candidates.Cur, + Cur: tableEntry{offset: s + e.cur}, + } + + // Check both candidates + candidate = candidates.Cur + minOffset := e.cur + s - (maxMatchOffset - 4) + + if candidate.offset > minOffset { + if cv == load3232(src, candidate.offset-e.cur) { + // Found a match... + continue + } + candidate = candidates.Prev + if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + // Match at prev... + continue + } + } + cv = uint32(x >> 8) + s++ + break + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go new file mode 100644 index 000000000..1cbffa1ae --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level4.go @@ -0,0 +1,220 @@ +package flate + +import "fmt" + +type fastEncL4 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntry +} + +func (e *fastEncL4) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.bTable[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var t int32 + for { + nextHashS := hash4x64(cv, tableBits) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + e.bTable[nextHashL] = entry + + t = lCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) { + // We got a long match. Use that. + break + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + lCandidate = e.bTable[hash7(next, tableBits)] + + // If the next long is a candidate, check if we should use that instead... + lOff := nextS - (lCandidate.offset - e.cur) + if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) { + l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:]) + if l2 > l1 { + s = nextS + t = lCandidate.offset - e.cur + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Extend the 4-byte match as long as possible. + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if debugDeflate { + if t >= s { + panic("s-t") + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index first pair after match end. + if int(s+8) < len(src) { + cv := load6432(src, s) + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur} + e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur} + } + goto emitRemainder + } + + // Store every 3rd hash in-between + if true { + i := nextS + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + e.bTable[hash7(cv, tableBits)] = t + e.bTable[hash7(cv>>8, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + + i += 3 + for ; i < s-1; i += 3 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + e.bTable[hash7(cv, tableBits)] = t + e.bTable[hash7(cv>>8, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hash4x64(x, tableBits) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + e.bTable[prevHashL] = tableEntry{offset: o} + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go new file mode 100644 index 000000000..4b97576bd --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -0,0 +1,302 @@ +package flate + +import "fmt" + +type fastEncL5 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL5) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hash4x64(cv, tableBits) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + nextHashS = hash4x64(next, tableBits) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + lCandidate = e.bTable[nextHashL] + // Store the next match + + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // If the next long is a candidate, use that... + t2 := lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + if l == 0 { + // Extend the 4-byte match as long as possible. + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + + // Try to locate a better match by checking the end of best match... + if sAt := s + l; l < 30 && sAt < sLimit { + eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset + // Test current + t2 := eLong - e.cur - l + off := s - t2 + if t2 >= 0 && off < maxMatchOffset && off > 0 { + if l2 := e.matchlenLong(s, t2, src); l2 > l { + t = t2 + l = l2 + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if debugDeflate { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + goto emitRemainder + } + + // Store every 3rd hash in-between. + if true { + const hashEvery = 3 + i := s - l + 1 + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + e.table[hash4x64(cv, tableBits)] = t + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // Do an long at i+1 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + eLong = &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // We only have enough bits for a short entry at i+2 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + e.table[hash4x64(cv, tableBits)] = t + + // Skip one - otherwise we risk hitting 's' + i += 4 + for ; i < s-1; i += hashEvery { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hash4x64(x, tableBits) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + eLong := &e.bTable[prevHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go new file mode 100644 index 000000000..62888edf3 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level6.go @@ -0,0 +1,315 @@ +package flate + +import "fmt" + +type fastEncL6 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL6) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + // Repeat MUST be > 1 and within range + repeat := int32(1) + for { + const skipLog = 7 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hash4x64(cv, tableBits) + nextHashL := hash7(cv, tableBits) + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + // Calculate hashes of 'next' + nextHashS = hash4x64(next, tableBits) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { + // Long candidate matches at least 4 bytes. + + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // Check the previous long candidate as well. + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + // Current value did not match, but check if previous long value does. + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + + // Look up next long candidate (at nextS) + lCandidate = e.bTable[nextHashL] + + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // Check repeat at s + repOff + const repOff = 1 + t2 := s - repeat + repOff + if load3232(src, t2) == uint32(cv>>(8*repOff)) { + ml := e.matchlen(s+4+repOff, t2+4, src) + 4 + if ml > l { + t = t2 + l = ml + s += repOff + // Not worth checking more. + break + } + } + + // If the next long is a candidate, use that... + t2 = lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + // This is ok, but check previous as well. + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Extend the 4-byte match as long as possible. + if l == 0 { + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + + // Try to locate a better match by checking the end-of-match... + if sAt := s + l; sAt < sLimit { + eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)] + // Test current + t2 := eLong.Cur.offset - e.cur - l + off := s - t2 + if off < maxMatchOffset { + if off > 0 && t2 >= 0 { + if l2 := e.matchlenLong(s, t2, src); l2 > l { + t = t2 + l = l2 + } + } + // Test next: + t2 = eLong.Prev.offset - e.cur - l + off := s - t2 + if off > 0 && off < maxMatchOffset && t2 >= 0 { + if l2 := e.matchlenLong(s, t2, src); l2 > l { + t = t2 + l = l2 + } + } + } + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + if false { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + repeat = s - t + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index after match end. + for i := nextS + 1; i < int32(len(src))-8; i += 2 { + cv := load6432(src, i) + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur + } + goto emitRemainder + } + + // Store every long hash in-between and every second short. + if true { + for i := nextS + 1; i < s-1; i += 2 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong2 := &e.bTable[hash7(cv>>8, tableBits)] + e.table[hash4x64(cv, tableBits)] = t + eLong.Cur, eLong.Prev = t, eLong.Cur + eLong2.Cur, eLong2.Prev = t2, eLong2.Cur + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + cv = load6432(src, s) + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/regmask_amd64.go b/vendor/github.com/klauspost/compress/flate/regmask_amd64.go new file mode 100644 index 000000000..6ed28061b --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/regmask_amd64.go @@ -0,0 +1,37 @@ +package flate + +const ( + // Masks for shifts with register sizes of the shift value. + // This can be used to work around the x86 design of shifting by mod register size. + // It can be used when a variable shift is always smaller than the register size. + + // reg8SizeMaskX - shift value is 8 bits, shifted is X + reg8SizeMask8 = 7 + reg8SizeMask16 = 15 + reg8SizeMask32 = 31 + reg8SizeMask64 = 63 + + // reg16SizeMaskX - shift value is 16 bits, shifted is X + reg16SizeMask8 = reg8SizeMask8 + reg16SizeMask16 = reg8SizeMask16 + reg16SizeMask32 = reg8SizeMask32 + reg16SizeMask64 = reg8SizeMask64 + + // reg32SizeMaskX - shift value is 32 bits, shifted is X + reg32SizeMask8 = reg8SizeMask8 + reg32SizeMask16 = reg8SizeMask16 + reg32SizeMask32 = reg8SizeMask32 + reg32SizeMask64 = reg8SizeMask64 + + // reg64SizeMaskX - shift value is 64 bits, shifted is X + reg64SizeMask8 = reg8SizeMask8 + reg64SizeMask16 = reg8SizeMask16 + reg64SizeMask32 = reg8SizeMask32 + reg64SizeMask64 = reg8SizeMask64 + + // regSizeMaskUintX - shift value is uint, shifted is X + regSizeMaskUint8 = reg8SizeMask8 + regSizeMaskUint16 = reg8SizeMask16 + regSizeMaskUint32 = reg8SizeMask32 + regSizeMaskUint64 = reg8SizeMask64 +) diff --git a/vendor/github.com/klauspost/compress/flate/regmask_other.go b/vendor/github.com/klauspost/compress/flate/regmask_other.go new file mode 100644 index 000000000..1b7a2cbd7 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/regmask_other.go @@ -0,0 +1,40 @@ +//go:build !amd64 +// +build !amd64 + +package flate + +const ( + // Masks for shifts with register sizes of the shift value. + // This can be used to work around the x86 design of shifting by mod register size. + // It can be used when a variable shift is always smaller than the register size. + + // reg8SizeMaskX - shift value is 8 bits, shifted is X + reg8SizeMask8 = 0xff + reg8SizeMask16 = 0xff + reg8SizeMask32 = 0xff + reg8SizeMask64 = 0xff + + // reg16SizeMaskX - shift value is 16 bits, shifted is X + reg16SizeMask8 = 0xffff + reg16SizeMask16 = 0xffff + reg16SizeMask32 = 0xffff + reg16SizeMask64 = 0xffff + + // reg32SizeMaskX - shift value is 32 bits, shifted is X + reg32SizeMask8 = 0xffffffff + reg32SizeMask16 = 0xffffffff + reg32SizeMask32 = 0xffffffff + reg32SizeMask64 = 0xffffffff + + // reg64SizeMaskX - shift value is 64 bits, shifted is X + reg64SizeMask8 = 0xffffffffffffffff + reg64SizeMask16 = 0xffffffffffffffff + reg64SizeMask32 = 0xffffffffffffffff + reg64SizeMask64 = 0xffffffffffffffff + + // regSizeMaskUintX - shift value is uint, shifted is X + regSizeMaskUint8 = ^uint(0) + regSizeMaskUint16 = ^uint(0) + regSizeMaskUint32 = ^uint(0) + regSizeMaskUint64 = ^uint(0) +) diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go new file mode 100644 index 000000000..93a1d1503 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/stateless.go @@ -0,0 +1,305 @@ +package flate + +import ( + "io" + "math" + "sync" +) + +const ( + maxStatelessBlock = math.MaxInt16 + // dictionary will be taken from maxStatelessBlock, so limit it. + maxStatelessDict = 8 << 10 + + slTableBits = 13 + slTableSize = 1 << slTableBits + slTableShift = 32 - slTableBits +) + +type statelessWriter struct { + dst io.Writer + closed bool +} + +func (s *statelessWriter) Close() error { + if s.closed { + return nil + } + s.closed = true + // Emit EOF block + return StatelessDeflate(s.dst, nil, true, nil) +} + +func (s *statelessWriter) Write(p []byte) (n int, err error) { + err = StatelessDeflate(s.dst, p, false, nil) + if err != nil { + return 0, err + } + return len(p), nil +} + +func (s *statelessWriter) Reset(w io.Writer) { + s.dst = w + s.closed = false +} + +// NewStatelessWriter will do compression but without maintaining any state +// between Write calls. +// There will be no memory kept between Write calls, +// but compression and speed will be suboptimal. +// Because of this, the size of actual Write calls will affect output size. +func NewStatelessWriter(dst io.Writer) io.WriteCloser { + return &statelessWriter{dst: dst} +} + +// bitWriterPool contains bit writers that can be reused. +var bitWriterPool = sync.Pool{ + New: func() interface{} { + return newHuffmanBitWriter(nil) + }, +} + +// StatelessDeflate allows compressing directly to a Writer without retaining state. +// When returning everything will be flushed. +// Up to 8KB of an optional dictionary can be given which is presumed to precede the block. +// Longer dictionaries will be truncated and will still produce valid output. +// Sending nil dictionary is perfectly fine. +func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { + var dst tokens + bw := bitWriterPool.Get().(*huffmanBitWriter) + bw.reset(out) + defer func() { + // don't keep a reference to our output + bw.reset(nil) + bitWriterPool.Put(bw) + }() + if eof && len(in) == 0 { + // Just write an EOF block. + // Could be faster... + bw.writeStoredHeader(0, true) + bw.flush() + return bw.err + } + + // Truncate dict + if len(dict) > maxStatelessDict { + dict = dict[len(dict)-maxStatelessDict:] + } + + for len(in) > 0 { + todo := in + if len(todo) > maxStatelessBlock-len(dict) { + todo = todo[:maxStatelessBlock-len(dict)] + } + in = in[len(todo):] + uncompressed := todo + if len(dict) > 0 { + // combine dict and source + bufLen := len(todo) + len(dict) + combined := make([]byte, bufLen) + copy(combined, dict) + copy(combined[len(dict):], todo) + todo = combined + } + // Compress + statelessEnc(&dst, todo, int16(len(dict))) + isEof := eof && len(in) == 0 + + if dst.n == 0 { + bw.writeStoredHeader(len(uncompressed), isEof) + if bw.err != nil { + return bw.err + } + bw.writeBytes(uncompressed) + } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 { + // If we removed less than 1/16th, huffman compress the block. + bw.writeBlockHuff(isEof, uncompressed, len(in) == 0) + } else { + bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0) + } + if len(in) > 0 { + // Retain a dict if we have more + dict = todo[len(todo)-maxStatelessDict:] + dst.Reset() + } + if bw.err != nil { + return bw.err + } + } + if !eof { + // Align, only a stored block can do that. + bw.writeStoredHeader(0, false) + } + bw.flush() + return bw.err +} + +func hashSL(u uint32) uint32 { + return (u * 0x1e35a7bd) >> slTableShift +} + +func load3216(b []byte, i int16) uint32 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:4] + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load6416(b []byte, i int16) uint64 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:8] + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func statelessEnc(dst *tokens, src []byte, startAt int16) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + type tableEntry struct { + offset int16 + } + + var table [slTableSize]tableEntry + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src)-int(startAt) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = 0 + return + } + // Index until startAt + if startAt > 0 { + cv := load3232(src, 0) + for i := int16(0); i < startAt; i++ { + table[hashSL(cv)] = tableEntry{offset: i} + cv = (cv >> 8) | (uint32(src[i+4]) << 24) + } + } + + s := startAt + 1 + nextEmit := startAt + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int16(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3216(src, s) + + for { + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hashSL(cv) + candidate = table[nextHash] + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit || nextS <= 0 { + goto emitRemainder + } + + now := load6416(src, nextS) + table[nextHash] = tableEntry{offset: s} + nextHash = hashSL(uint32(now)) + + if cv == load3216(src, candidate.offset) { + table[nextHash] = tableEntry{offset: nextS} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = table[nextHash] + now >>= 8 + table[nextHash] = tableEntry{offset: s} + + if cv == load3216(src, candidate.offset) { + table[nextHash] = tableEntry{offset: nextS} + break + } + cv = uint32(now) + s = nextS + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset + l := int16(matchLen(src[s+4:], src[t+4:]) + 4) + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + if false { + emitLiteral(dst, src[nextEmit:s]) + } else { + for _, v := range src[nextEmit:s] { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } + } + } + + // Save the match found + dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + if s >= sLimit { + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6416(src, s-2) + o := s - 2 + prevHash := hashSL(uint32(x)) + table[prevHash] = tableEntry{offset: o} + x >>= 16 + currHash := hashSL(uint32(x)) + candidate = table[currHash] + table[currHash] = tableEntry{offset: o + 2} + + if uint32(x) != load3216(src, candidate.offset) { + cv = uint32(x >> 8) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go new file mode 100644 index 000000000..d818790c1 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/token.go @@ -0,0 +1,379 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "math" +) + +const ( + // bits 0-16 xoffset = offset - MIN_OFFSET_SIZE, or literal - 16 bits + // bits 16-22 offsetcode - 5 bits + // bits 22-30 xlength = length - MIN_MATCH_LENGTH - 8 bits + // bits 30-32 type 0 = literal 1=EOF 2=Match 3=Unused - 2 bits + lengthShift = 22 + offsetMask = 1<<lengthShift - 1 + typeMask = 3 << 30 + literalType = 0 << 30 + matchType = 1 << 30 + matchOffsetOnlyMask = 0xffff +) + +// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH) +// is lengthCodes[length - MIN_MATCH_LENGTH] +var lengthCodes = [256]uint8{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, + 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, + 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, + 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, + 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 28, +} + +// lengthCodes1 is length codes, but starting at 1. +var lengthCodes1 = [256]uint8{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, + 10, 10, 11, 11, 12, 12, 13, 13, 13, 13, + 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, + 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, + 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, + 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, + 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 29, +} + +var offsetCodes = [256]uint32{ + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +} + +// offsetCodes14 are offsetCodes, but with 14 added. +var offsetCodes14 = [256]uint32{ + 14, 15, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +} + +type token uint32 + +type tokens struct { + extraHist [32]uint16 // codes 256->maxnumlit + offHist [32]uint16 // offset codes + litHist [256]uint16 // codes 0->255 + nFilled int + n uint16 // Must be able to contain maxStoreBlockSize + tokens [maxStoreBlockSize + 1]token +} + +func (t *tokens) Reset() { + if t.n == 0 { + return + } + t.n = 0 + t.nFilled = 0 + for i := range t.litHist[:] { + t.litHist[i] = 0 + } + for i := range t.extraHist[:] { + t.extraHist[i] = 0 + } + for i := range t.offHist[:] { + t.offHist[i] = 0 + } +} + +func (t *tokens) Fill() { + if t.n == 0 { + return + } + for i, v := range t.litHist[:] { + if v == 0 { + t.litHist[i] = 1 + t.nFilled++ + } + } + for i, v := range t.extraHist[:literalCount-256] { + if v == 0 { + t.nFilled++ + t.extraHist[i] = 1 + } + } + for i, v := range t.offHist[:offsetCodeCount] { + if v == 0 { + t.offHist[i] = 1 + } + } +} + +func indexTokens(in []token) tokens { + var t tokens + t.indexTokens(in) + return t +} + +func (t *tokens) indexTokens(in []token) { + t.Reset() + for _, tok := range in { + if tok < matchType { + t.AddLiteral(tok.literal()) + continue + } + t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask) + } +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +func emitLiteral(dst *tokens, lit []byte) { + for _, v := range lit { + dst.tokens[dst.n] = token(v) + dst.litHist[v]++ + dst.n++ + } +} + +func (t *tokens) AddLiteral(lit byte) { + t.tokens[t.n] = token(lit) + t.litHist[lit]++ + t.n++ +} + +// from https://stackoverflow.com/a/28730362 +func mFastLog2(val float32) float32 { + ux := int32(math.Float32bits(val)) + log2 := (float32)(((ux >> 23) & 255) - 128) + ux &= -0x7f800001 + ux += 127 << 23 + uval := math.Float32frombits(uint32(ux)) + log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759 + return log2 +} + +// EstimatedBits will return an minimum size estimated by an *optimal* +// compression of the block. +// The size of the block +func (t *tokens) EstimatedBits() int { + shannon := float32(0) + bits := int(0) + nMatches := 0 + total := int(t.n) + t.nFilled + if total > 0 { + invTotal := 1.0 / float32(total) + for _, v := range t.litHist[:] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + } + } + // Just add 15 for EOB + shannon += 15 + for i, v := range t.extraHist[1 : literalCount-256] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + bits += int(lengthExtraBits[i&31]) * int(v) + nMatches += int(v) + } + } + } + if nMatches > 0 { + invTotal := 1.0 / float32(nMatches) + for i, v := range t.offHist[:offsetCodeCount] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + bits += int(offsetExtraBits[i&31]) * int(v) + } + } + } + return int(shannon) + bits +} + +// AddMatch adds a match to the tokens. +// This function is very sensitive to inlining and right on the border. +func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { + if debugDeflate { + if xlength >= maxMatchLength+baseMatchLength { + panic(fmt.Errorf("invalid length: %v", xlength)) + } + if xoffset >= maxMatchOffset+baseMatchOffset { + panic(fmt.Errorf("invalid offset: %v", xoffset)) + } + } + oCode := offsetCode(xoffset) + xoffset |= oCode << 16 + + t.extraHist[lengthCodes1[uint8(xlength)]]++ + t.offHist[oCode&31]++ + t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset) + t.n++ +} + +// AddMatchLong adds a match to the tokens, potentially longer than max match length. +// Length should NOT have the base subtracted, only offset should. +func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) { + if debugDeflate { + if xoffset >= maxMatchOffset+baseMatchOffset { + panic(fmt.Errorf("invalid offset: %v", xoffset)) + } + } + oc := offsetCode(xoffset) + xoffset |= oc << 16 + for xlength > 0 { + xl := xlength + if xl > 258 { + // We need to have at least baseMatchLength left over for next loop. + if xl > 258+baseMatchLength { + xl = 258 + } else { + xl = 258 - baseMatchLength + } + } + xlength -= xl + xl -= baseMatchLength + t.extraHist[lengthCodes1[uint8(xl)]]++ + t.offHist[oc&31]++ + t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset) + t.n++ + } +} + +func (t *tokens) AddEOB() { + t.tokens[t.n] = token(endBlockMarker) + t.extraHist[0]++ + t.n++ +} + +func (t *tokens) Slice() []token { + return t.tokens[:t.n] +} + +// VarInt returns the tokens as varint encoded bytes. +func (t *tokens) VarInt() []byte { + var b = make([]byte, binary.MaxVarintLen32*int(t.n)) + var off int + for _, v := range t.tokens[:t.n] { + off += binary.PutUvarint(b[off:], uint64(v)) + } + return b[:off] +} + +// FromVarInt restores t to the varint encoded tokens provided. +// Any data in t is removed. +func (t *tokens) FromVarInt(b []byte) error { + var buf = bytes.NewReader(b) + var toks []token + for { + r, err := binary.ReadUvarint(buf) + if err == io.EOF { + break + } + if err != nil { + return err + } + toks = append(toks, token(r)) + } + t.indexTokens(toks) + return nil +} + +// Returns the type of a token +func (t token) typ() uint32 { return uint32(t) & typeMask } + +// Returns the literal of a literal token +func (t token) literal() uint8 { return uint8(t) } + +// Returns the extra offset of a match token +func (t token) offset() uint32 { return uint32(t) & offsetMask } + +func (t token) length() uint8 { return uint8(t >> lengthShift) } + +// Convert length to code. +func lengthCode(len uint8) uint8 { return lengthCodes[len] } + +// Returns the offset code corresponding to a specific offset +func offsetCode(off uint32) uint32 { + if false { + if off < uint32(len(offsetCodes)) { + return offsetCodes[off&255] + } else if off>>7 < uint32(len(offsetCodes)) { + return offsetCodes[(off>>7)&255] + 14 + } else { + return offsetCodes[(off>>14)&255] + 28 + } + } + if off < uint32(len(offsetCodes)) { + return offsetCodes[uint8(off)] + } + return offsetCodes14[uint8(off>>7)] +} diff --git a/vendor/github.com/klauspost/compress/gzip/gunzip.go b/vendor/github.com/klauspost/compress/gzip/gunzip.go new file mode 100644 index 000000000..66fe5ddf7 --- /dev/null +++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go @@ -0,0 +1,349 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gzip implements reading and writing of gzip format compressed files, +// as specified in RFC 1952. +package gzip + +import ( + "bufio" + "compress/gzip" + "encoding/binary" + "hash/crc32" + "io" + "time" + + "github.com/klauspost/compress/flate" +) + +const ( + gzipID1 = 0x1f + gzipID2 = 0x8b + gzipDeflate = 8 + flagText = 1 << 0 + flagHdrCrc = 1 << 1 + flagExtra = 1 << 2 + flagName = 1 << 3 + flagComment = 1 << 4 +) + +var ( + // ErrChecksum is returned when reading GZIP data that has an invalid checksum. + ErrChecksum = gzip.ErrChecksum + // ErrHeader is returned when reading GZIP data that has an invalid header. + ErrHeader = gzip.ErrHeader +) + +var le = binary.LittleEndian + +// noEOF converts io.EOF to io.ErrUnexpectedEOF. +func noEOF(err error) error { + if err == io.EOF { + return io.ErrUnexpectedEOF + } + return err +} + +// The gzip file stores a header giving metadata about the compressed file. +// That header is exposed as the fields of the Writer and Reader structs. +// +// Strings must be UTF-8 encoded and may only contain Unicode code points +// U+0001 through U+00FF, due to limitations of the GZIP file format. +type Header struct { + Comment string // comment + Extra []byte // "extra data" + ModTime time.Time // modification time + Name string // file name + OS byte // operating system type +} + +// A Reader is an io.Reader that can be read to retrieve +// uncompressed data from a gzip-format compressed file. +// +// In general, a gzip file can be a concatenation of gzip files, +// each with its own header. Reads from the Reader +// return the concatenation of the uncompressed data of each. +// Only the first header is recorded in the Reader fields. +// +// Gzip files store a length and checksum of the uncompressed data. +// The Reader will return a ErrChecksum when Read +// reaches the end of the uncompressed data if it does not +// have the expected length or checksum. Clients should treat data +// returned by Read as tentative until they receive the io.EOF +// marking the end of the data. +type Reader struct { + Header // valid after NewReader or Reader.Reset + r flate.Reader + br *bufio.Reader + decompressor io.ReadCloser + digest uint32 // CRC-32, IEEE polynomial (section 8) + size uint32 // Uncompressed size (section 2.3.1) + buf [512]byte + err error + multistream bool +} + +// NewReader creates a new Reader reading the given reader. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// +// It is the caller's responsibility to call Close on the Reader when done. +// +// The Reader.Header fields will be valid in the Reader returned. +func NewReader(r io.Reader) (*Reader, error) { + z := new(Reader) + if err := z.Reset(r); err != nil { + return nil, err + } + return z, nil +} + +// Reset discards the Reader z's state and makes it equivalent to the +// result of its original state from NewReader, but reading from r instead. +// This permits reusing a Reader rather than allocating a new one. +func (z *Reader) Reset(r io.Reader) error { + *z = Reader{ + decompressor: z.decompressor, + multistream: true, + } + if rr, ok := r.(flate.Reader); ok { + z.r = rr + } else { + // Reuse if we can. + if z.br != nil { + z.br.Reset(r) + } else { + z.br = bufio.NewReader(r) + } + z.r = z.br + } + z.Header, z.err = z.readHeader() + return z.err +} + +// Multistream controls whether the reader supports multistream files. +// +// If enabled (the default), the Reader expects the input to be a sequence +// of individually gzipped data streams, each with its own header and +// trailer, ending at EOF. The effect is that the concatenation of a sequence +// of gzipped files is treated as equivalent to the gzip of the concatenation +// of the sequence. This is standard behavior for gzip readers. +// +// Calling Multistream(false) disables this behavior; disabling the behavior +// can be useful when reading file formats that distinguish individual gzip +// data streams or mix gzip data streams with other data streams. +// In this mode, when the Reader reaches the end of the data stream, +// Read returns io.EOF. If the underlying reader implements io.ByteReader, +// it will be left positioned just after the gzip stream. +// To start the next stream, call z.Reset(r) followed by z.Multistream(false). +// If there is no next stream, z.Reset(r) will return io.EOF. +func (z *Reader) Multistream(ok bool) { + z.multistream = ok +} + +// readString reads a NUL-terminated string from z.r. +// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and +// will output a string encoded using UTF-8. +// This method always updates z.digest with the data read. +func (z *Reader) readString() (string, error) { + var err error + needConv := false + for i := 0; ; i++ { + if i >= len(z.buf) { + return "", ErrHeader + } + z.buf[i], err = z.r.ReadByte() + if err != nil { + return "", err + } + if z.buf[i] > 0x7f { + needConv = true + } + if z.buf[i] == 0 { + // Digest covers the NUL terminator. + z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1]) + + // Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1). + if needConv { + s := make([]rune, 0, i) + for _, v := range z.buf[:i] { + s = append(s, rune(v)) + } + return string(s), nil + } + return string(z.buf[:i]), nil + } + } +} + +// readHeader reads the GZIP header according to section 2.3.1. +// This method does not set z.err. +func (z *Reader) readHeader() (hdr Header, err error) { + if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil { + // RFC 1952, section 2.2, says the following: + // A gzip file consists of a series of "members" (compressed data sets). + // + // Other than this, the specification does not clarify whether a + // "series" is defined as "one or more" or "zero or more". To err on the + // side of caution, Go interprets this to mean "zero or more". + // Thus, it is okay to return io.EOF here. + return hdr, err + } + if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate { + return hdr, ErrHeader + } + flg := z.buf[3] + hdr.ModTime = time.Unix(int64(le.Uint32(z.buf[4:8])), 0) + // z.buf[8] is XFL and is currently ignored. + hdr.OS = z.buf[9] + z.digest = crc32.ChecksumIEEE(z.buf[:10]) + + if flg&flagExtra != 0 { + if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil { + return hdr, noEOF(err) + } + z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2]) + data := make([]byte, le.Uint16(z.buf[:2])) + if _, err = io.ReadFull(z.r, data); err != nil { + return hdr, noEOF(err) + } + z.digest = crc32.Update(z.digest, crc32.IEEETable, data) + hdr.Extra = data + } + + var s string + if flg&flagName != 0 { + if s, err = z.readString(); err != nil { + return hdr, err + } + hdr.Name = s + } + + if flg&flagComment != 0 { + if s, err = z.readString(); err != nil { + return hdr, err + } + hdr.Comment = s + } + + if flg&flagHdrCrc != 0 { + if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil { + return hdr, noEOF(err) + } + digest := le.Uint16(z.buf[:2]) + if digest != uint16(z.digest) { + return hdr, ErrHeader + } + } + + z.digest = 0 + if z.decompressor == nil { + z.decompressor = flate.NewReader(z.r) + } else { + z.decompressor.(flate.Resetter).Reset(z.r, nil) + } + return hdr, nil +} + +// Read implements io.Reader, reading uncompressed bytes from its underlying Reader. +func (z *Reader) Read(p []byte) (n int, err error) { + if z.err != nil { + return 0, z.err + } + + for n == 0 { + n, z.err = z.decompressor.Read(p) + z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n]) + z.size += uint32(n) + if z.err != io.EOF { + // In the normal case we return here. + return n, z.err + } + + // Finished file; check checksum and size. + if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil { + z.err = noEOF(err) + return n, z.err + } + digest := le.Uint32(z.buf[:4]) + size := le.Uint32(z.buf[4:8]) + if digest != z.digest || size != z.size { + z.err = ErrChecksum + return n, z.err + } + z.digest, z.size = 0, 0 + + // File is ok; check if there is another. + if !z.multistream { + return n, io.EOF + } + z.err = nil // Remove io.EOF + + if _, z.err = z.readHeader(); z.err != nil { + return n, z.err + } + } + + return n, nil +} + +// Support the io.WriteTo interface for io.Copy and friends. +func (z *Reader) WriteTo(w io.Writer) (int64, error) { + total := int64(0) + crcWriter := crc32.NewIEEE() + for { + if z.err != nil { + if z.err == io.EOF { + return total, nil + } + return total, z.err + } + + // We write both to output and digest. + mw := io.MultiWriter(w, crcWriter) + n, err := z.decompressor.(io.WriterTo).WriteTo(mw) + total += n + z.size += uint32(n) + if err != nil { + z.err = err + return total, z.err + } + + // Finished file; check checksum + size. + if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + z.err = err + return total, err + } + z.digest = crcWriter.Sum32() + digest := le.Uint32(z.buf[:4]) + size := le.Uint32(z.buf[4:8]) + if digest != z.digest || size != z.size { + z.err = ErrChecksum + return total, z.err + } + z.digest, z.size = 0, 0 + + // File is ok; check if there is another. + if !z.multistream { + return total, nil + } + crcWriter.Reset() + z.err = nil // Remove io.EOF + + if _, z.err = z.readHeader(); z.err != nil { + if z.err == io.EOF { + return total, nil + } + return total, z.err + } + } +} + +// Close closes the Reader. It does not close the underlying io.Reader. +// In order for the GZIP checksum to be verified, the reader must be +// fully consumed until the io.EOF. +func (z *Reader) Close() error { return z.decompressor.Close() } diff --git a/vendor/github.com/klauspost/compress/gzip/gzip.go b/vendor/github.com/klauspost/compress/gzip/gzip.go new file mode 100644 index 000000000..26203851b --- /dev/null +++ b/vendor/github.com/klauspost/compress/gzip/gzip.go @@ -0,0 +1,269 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "errors" + "fmt" + "hash/crc32" + "io" + + "github.com/klauspost/compress/flate" +) + +// These constants are copied from the flate package, so that code that imports +// "compress/gzip" does not also have to import "compress/flate". +const ( + NoCompression = flate.NoCompression + BestSpeed = flate.BestSpeed + BestCompression = flate.BestCompression + DefaultCompression = flate.DefaultCompression + ConstantCompression = flate.ConstantCompression + HuffmanOnly = flate.HuffmanOnly + + // StatelessCompression will do compression but without maintaining any state + // between Write calls. + // There will be no memory kept between Write calls, + // but compression and speed will be suboptimal. + // Because of this, the size of actual Write calls will affect output size. + StatelessCompression = -3 +) + +// A Writer is an io.WriteCloser. +// Writes to a Writer are compressed and written to w. +type Writer struct { + Header // written at first call to Write, Flush, or Close + w io.Writer + level int + err error + compressor *flate.Writer + digest uint32 // CRC-32, IEEE polynomial (section 8) + size uint32 // Uncompressed size (section 2.3.1) + wroteHeader bool + closed bool + buf [10]byte +} + +// NewWriter returns a new Writer. +// Writes to the returned writer are compressed and written to w. +// +// It is the caller's responsibility to call Close on the WriteCloser when done. +// Writes may be buffered and not flushed until Close. +// +// Callers that wish to set the fields in Writer.Header must do so before +// the first call to Write, Flush, or Close. +func NewWriter(w io.Writer) *Writer { + z, _ := NewWriterLevel(w, DefaultCompression) + return z +} + +// NewWriterLevel is like NewWriter but specifies the compression level instead +// of assuming DefaultCompression. +// +// The compression level can be DefaultCompression, NoCompression, or any +// integer value between BestSpeed and BestCompression inclusive. The error +// returned will be nil if the level is valid. +func NewWriterLevel(w io.Writer, level int) (*Writer, error) { + if level < StatelessCompression || level > BestCompression { + return nil, fmt.Errorf("gzip: invalid compression level: %d", level) + } + z := new(Writer) + z.init(w, level) + return z, nil +} + +func (z *Writer) init(w io.Writer, level int) { + compressor := z.compressor + if level != StatelessCompression { + if compressor != nil { + compressor.Reset(w) + } + } + + *z = Writer{ + Header: Header{ + OS: 255, // unknown + }, + w: w, + level: level, + compressor: compressor, + } +} + +// Reset discards the Writer z's state and makes it equivalent to the +// result of its original state from NewWriter or NewWriterLevel, but +// writing to w instead. This permits reusing a Writer rather than +// allocating a new one. +func (z *Writer) Reset(w io.Writer) { + z.init(w, z.level) +} + +// writeBytes writes a length-prefixed byte slice to z.w. +func (z *Writer) writeBytes(b []byte) error { + if len(b) > 0xffff { + return errors.New("gzip.Write: Extra data is too large") + } + le.PutUint16(z.buf[:2], uint16(len(b))) + _, err := z.w.Write(z.buf[:2]) + if err != nil { + return err + } + _, err = z.w.Write(b) + return err +} + +// writeString writes a UTF-8 string s in GZIP's format to z.w. +// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). +func (z *Writer) writeString(s string) (err error) { + // GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII. + needconv := false + for _, v := range s { + if v == 0 || v > 0xff { + return errors.New("gzip.Write: non-Latin-1 header string") + } + if v > 0x7f { + needconv = true + } + } + if needconv { + b := make([]byte, 0, len(s)) + for _, v := range s { + b = append(b, byte(v)) + } + _, err = z.w.Write(b) + } else { + _, err = io.WriteString(z.w, s) + } + if err != nil { + return err + } + // GZIP strings are NUL-terminated. + z.buf[0] = 0 + _, err = z.w.Write(z.buf[:1]) + return err +} + +// Write writes a compressed form of p to the underlying io.Writer. The +// compressed bytes are not necessarily flushed until the Writer is closed. +func (z *Writer) Write(p []byte) (int, error) { + if z.err != nil { + return 0, z.err + } + var n int + // Write the GZIP header lazily. + if !z.wroteHeader { + z.wroteHeader = true + z.buf[0] = gzipID1 + z.buf[1] = gzipID2 + z.buf[2] = gzipDeflate + z.buf[3] = 0 + if z.Extra != nil { + z.buf[3] |= 0x04 + } + if z.Name != "" { + z.buf[3] |= 0x08 + } + if z.Comment != "" { + z.buf[3] |= 0x10 + } + le.PutUint32(z.buf[4:8], uint32(z.ModTime.Unix())) + if z.level == BestCompression { + z.buf[8] = 2 + } else if z.level == BestSpeed { + z.buf[8] = 4 + } else { + z.buf[8] = 0 + } + z.buf[9] = z.OS + n, z.err = z.w.Write(z.buf[:10]) + if z.err != nil { + return n, z.err + } + if z.Extra != nil { + z.err = z.writeBytes(z.Extra) + if z.err != nil { + return n, z.err + } + } + if z.Name != "" { + z.err = z.writeString(z.Name) + if z.err != nil { + return n, z.err + } + } + if z.Comment != "" { + z.err = z.writeString(z.Comment) + if z.err != nil { + return n, z.err + } + } + + if z.compressor == nil && z.level != StatelessCompression { + z.compressor, _ = flate.NewWriter(z.w, z.level) + } + } + z.size += uint32(len(p)) + z.digest = crc32.Update(z.digest, crc32.IEEETable, p) + if z.level == StatelessCompression { + return len(p), flate.StatelessDeflate(z.w, p, false, nil) + } + n, z.err = z.compressor.Write(p) + return n, z.err +} + +// Flush flushes any pending compressed data to the underlying writer. +// +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. Flush does +// not return until the data has been written. If the underlying +// writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (z *Writer) Flush() error { + if z.err != nil { + return z.err + } + if z.closed || z.level == StatelessCompression { + return nil + } + if !z.wroteHeader { + z.Write(nil) + if z.err != nil { + return z.err + } + } + z.err = z.compressor.Flush() + return z.err +} + +// Close closes the Writer, flushing any unwritten data to the underlying +// io.Writer, but does not close the underlying io.Writer. +func (z *Writer) Close() error { + if z.err != nil { + return z.err + } + if z.closed { + return nil + } + z.closed = true + if !z.wroteHeader { + z.Write(nil) + if z.err != nil { + return z.err + } + } + if z.level == StatelessCompression { + z.err = flate.StatelessDeflate(z.w, nil, true, nil) + } else { + z.err = z.compressor.Close() + } + if z.err != nil { + return z.err + } + le.PutUint32(z.buf[:4], z.digest) + le.PutUint32(z.buf[4:8], z.size) + _, z.err = z.w.Write(z.buf[:8]) + return z.err +} diff --git a/vendor/github.com/golang/snappy/.gitignore b/vendor/github.com/klauspost/compress/snappy/.gitignore index 042091d9b..042091d9b 100644 --- a/vendor/github.com/golang/snappy/.gitignore +++ b/vendor/github.com/klauspost/compress/snappy/.gitignore diff --git a/vendor/github.com/golang/snappy/AUTHORS b/vendor/github.com/klauspost/compress/snappy/AUTHORS index 52ccb5a93..52ccb5a93 100644 --- a/vendor/github.com/golang/snappy/AUTHORS +++ b/vendor/github.com/klauspost/compress/snappy/AUTHORS diff --git a/vendor/github.com/golang/snappy/CONTRIBUTORS b/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS index ea6524ddd..ea6524ddd 100644 --- a/vendor/github.com/golang/snappy/CONTRIBUTORS +++ b/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS diff --git a/vendor/github.com/golang/snappy/LICENSE b/vendor/github.com/klauspost/compress/snappy/LICENSE index 6050c10f4..6050c10f4 100644 --- a/vendor/github.com/golang/snappy/LICENSE +++ b/vendor/github.com/klauspost/compress/snappy/LICENSE diff --git a/vendor/github.com/klauspost/compress/snappy/README.md b/vendor/github.com/klauspost/compress/snappy/README.md new file mode 100644 index 000000000..8271bbd09 --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/README.md @@ -0,0 +1,17 @@ +# snappy + +The Snappy compression format in the Go programming language. + +This is a drop-in replacement for `github.com/golang/snappy`. + +It provides a full, compatible replacement of the Snappy package by simply changing imports. + +See [Snappy Compatibility](https://github.com/klauspost/compress/tree/master/s2#snappy-compatibility) in the S2 documentation. + +"Better" compression mode is used. For buffered streams concurrent compression is used. + +For more options use the [s2 package](https://pkg.go.dev/github.com/klauspost/compress/s2). + +# usage + +Replace imports `github.com/golang/snappy` with `github.com/klauspost/compress/snappy`. diff --git a/vendor/github.com/klauspost/compress/snappy/decode.go b/vendor/github.com/klauspost/compress/snappy/decode.go new file mode 100644 index 000000000..89f1fa234 --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/decode.go @@ -0,0 +1,60 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "io" + + "github.com/klauspost/compress/s2" +) + +var ( + // ErrCorrupt reports that the input is invalid. + ErrCorrupt = s2.ErrCorrupt + // ErrTooLarge reports that the uncompressed length is too large. + ErrTooLarge = s2.ErrTooLarge + // ErrUnsupported reports that the input isn't supported. + ErrUnsupported = s2.ErrUnsupported +) + +const ( + // maxBlockSize is the maximum size of the input to encodeBlock. It is not + // part of the wire format per se, but some parts of the encoder assume + // that an offset fits into a uint16. + // + // Also, for the framing format (Writer type instead of Encode function), + // https://github.com/google/snappy/blob/master/framing_format.txt says + // that "the uncompressed data in a chunk must be no longer than 65536 + // bytes". + maxBlockSize = 65536 +) + +// DecodedLen returns the length of the decoded block. +func DecodedLen(src []byte) (int, error) { + return s2.DecodedLen(src) +} + +// Decode returns the decoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire decoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +// +// Decode handles the Snappy block format, not the Snappy stream format. +func Decode(dst, src []byte) ([]byte, error) { + return s2.Decode(dst, src) +} + +// NewReader returns a new Reader that decompresses from r, using the framing +// format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +func NewReader(r io.Reader) *Reader { + return s2.NewReader(r, s2.ReaderMaxBlockSize(maxBlockSize)) +} + +// Reader is an io.Reader that can read Snappy-compressed bytes. +// +// Reader handles the Snappy stream format, not the Snappy block format. +type Reader = s2.Reader diff --git a/vendor/github.com/klauspost/compress/snappy/encode.go b/vendor/github.com/klauspost/compress/snappy/encode.go new file mode 100644 index 000000000..e8bd72c18 --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/encode.go @@ -0,0 +1,59 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "io" + + "github.com/klauspost/compress/s2" +) + +// Encode returns the encoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire encoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +// +// Encode handles the Snappy block format, not the Snappy stream format. +func Encode(dst, src []byte) []byte { + return s2.EncodeSnappyBetter(dst, src) +} + +// MaxEncodedLen returns the maximum length of a snappy block, given its +// uncompressed length. +// +// It will return a negative value if srcLen is too large to encode. +func MaxEncodedLen(srcLen int) int { + return s2.MaxEncodedLen(srcLen) +} + +// NewWriter returns a new Writer that compresses to w. +// +// The Writer returned does not buffer writes. There is no need to Flush or +// Close such a Writer. +// +// Deprecated: the Writer returned is not suitable for many small writes, only +// for few large writes. Use NewBufferedWriter instead, which is efficient +// regardless of the frequency and shape of the writes, and remember to Close +// that Writer when done. +func NewWriter(w io.Writer) *Writer { + return s2.NewWriter(w, s2.WriterSnappyCompat(), s2.WriterBetterCompression(), s2.WriterFlushOnWrite(), s2.WriterConcurrency(1)) +} + +// NewBufferedWriter returns a new Writer that compresses to w, using the +// framing format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +// +// The Writer returned buffers writes. Users must call Close to guarantee all +// data has been forwarded to the underlying io.Writer. They may also call +// Flush zero or more times before calling Close. +func NewBufferedWriter(w io.Writer) *Writer { + return s2.NewWriter(w, s2.WriterSnappyCompat(), s2.WriterBetterCompression()) +} + +// Writer is an io.Writer that can write Snappy-compressed bytes. +// +// Writer handles the Snappy stream format, not the Snappy block format. +type Writer = s2.Writer diff --git a/vendor/github.com/golang/snappy/snappy.go b/vendor/github.com/klauspost/compress/snappy/snappy.go index ece692ea4..398cdc95a 100644 --- a/vendor/github.com/golang/snappy/snappy.go +++ b/vendor/github.com/klauspost/compress/snappy/snappy.go @@ -17,11 +17,7 @@ // // The canonical, C++ implementation is at https://github.com/google/snappy and // it only implements the block format. -package snappy // import "github.com/golang/snappy" - -import ( - "hash/crc32" -) +package snappy /* Each encoded block begins with the varint-encoded length of the decoded data, @@ -48,51 +44,3 @@ Lempel-Ziv compression algorithms. In particular: [1, 65). The length is 1 + m. The offset is the little-endian unsigned integer denoted by the next 4 bytes. */ -const ( - tagLiteral = 0x00 - tagCopy1 = 0x01 - tagCopy2 = 0x02 - tagCopy4 = 0x03 -) - -const ( - checksumSize = 4 - chunkHeaderSize = 4 - magicChunk = "\xff\x06\x00\x00" + magicBody - magicBody = "sNaPpY" - - // maxBlockSize is the maximum size of the input to encodeBlock. It is not - // part of the wire format per se, but some parts of the encoder assume - // that an offset fits into a uint16. - // - // Also, for the framing format (Writer type instead of Encode function), - // https://github.com/google/snappy/blob/master/framing_format.txt says - // that "the uncompressed data in a chunk must be no longer than 65536 - // bytes". - maxBlockSize = 65536 - - // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is - // hard coded to be a const instead of a variable, so that obufLen can also - // be a const. Their equivalence is confirmed by - // TestMaxEncodedLenOfMaxBlockSize. - maxEncodedLenOfMaxBlockSize = 76490 - - obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize - obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize -) - -const ( - chunkTypeCompressedData = 0x00 - chunkTypeUncompressedData = 0x01 - chunkTypePadding = 0xfe - chunkTypeStreamIdentifier = 0xff -) - -var crcTable = crc32.MakeTable(crc32.Castagnoli) - -// crc implements the checksum specified in section 3 of -// https://github.com/google/snappy/blob/master/framing_format.txt -func crc(b []byte) uint32 { - c := crc32.Update(0, crcTable, b) - return uint32(c>>15|c<<17) + 0xa282ead8 -} diff --git a/vendor/github.com/klauspost/compress/zlib/reader.go b/vendor/github.com/klauspost/compress/zlib/reader.go new file mode 100644 index 000000000..f127d4776 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zlib/reader.go @@ -0,0 +1,183 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package zlib implements reading and writing of zlib format compressed data, +as specified in RFC 1950. + +The implementation provides filters that uncompress during reading +and compress during writing. For example, to write compressed data +to a buffer: + + var b bytes.Buffer + w := zlib.NewWriter(&b) + w.Write([]byte("hello, world\n")) + w.Close() + +and to read that data back: + + r, err := zlib.NewReader(&b) + io.Copy(os.Stdout, r) + r.Close() +*/ +package zlib + +import ( + "bufio" + "compress/zlib" + "hash" + "hash/adler32" + "io" + + "github.com/klauspost/compress/flate" +) + +const zlibDeflate = 8 + +var ( + // ErrChecksum is returned when reading ZLIB data that has an invalid checksum. + ErrChecksum = zlib.ErrChecksum + // ErrDictionary is returned when reading ZLIB data that has an invalid dictionary. + ErrDictionary = zlib.ErrDictionary + // ErrHeader is returned when reading ZLIB data that has an invalid header. + ErrHeader = zlib.ErrHeader +) + +type reader struct { + r flate.Reader + decompressor io.ReadCloser + digest hash.Hash32 + err error + scratch [4]byte +} + +// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to +// to switch to a new underlying Reader. This permits reusing a ReadCloser +// instead of allocating a new one. +type Resetter interface { + // Reset discards any buffered data and resets the Resetter as if it was + // newly initialized with the given reader. + Reset(r io.Reader, dict []byte) error +} + +// NewReader creates a new ReadCloser. +// Reads from the returned ReadCloser read and decompress data from r. +// If r does not implement io.ByteReader, the decompressor may read more +// data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser when done. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReader(r io.Reader) (io.ReadCloser, error) { + return NewReaderDict(r, nil) +} + +// NewReaderDict is like NewReader but uses a preset dictionary. +// NewReaderDict ignores the dictionary if the compressed data does not refer to it. +// If the compressed data refers to a different dictionary, NewReaderDict returns ErrDictionary. +// +// The ReadCloser returned by NewReaderDict also implements Resetter. +func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) { + z := new(reader) + err := z.Reset(r, dict) + if err != nil { + return nil, err + } + return z, nil +} + +func (z *reader) Read(p []byte) (int, error) { + if z.err != nil { + return 0, z.err + } + + var n int + n, z.err = z.decompressor.Read(p) + z.digest.Write(p[0:n]) + if z.err != io.EOF { + // In the normal case we return here. + return n, z.err + } + + // Finished file; check checksum. + if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + z.err = err + return n, z.err + } + // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). + checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3]) + if checksum != z.digest.Sum32() { + z.err = ErrChecksum + return n, z.err + } + return n, io.EOF +} + +// Calling Close does not close the wrapped io.Reader originally passed to NewReader. +// In order for the ZLIB checksum to be verified, the reader must be +// fully consumed until the io.EOF. +func (z *reader) Close() error { + if z.err != nil && z.err != io.EOF { + return z.err + } + z.err = z.decompressor.Close() + return z.err +} + +func (z *reader) Reset(r io.Reader, dict []byte) error { + *z = reader{decompressor: z.decompressor, digest: z.digest} + if fr, ok := r.(flate.Reader); ok { + z.r = fr + } else { + z.r = bufio.NewReader(r) + } + + // Read the header (RFC 1950 section 2.2.). + _, z.err = io.ReadFull(z.r, z.scratch[0:2]) + if z.err != nil { + if z.err == io.EOF { + z.err = io.ErrUnexpectedEOF + } + return z.err + } + h := uint(z.scratch[0])<<8 | uint(z.scratch[1]) + if (z.scratch[0]&0x0f != zlibDeflate) || (h%31 != 0) { + z.err = ErrHeader + return z.err + } + haveDict := z.scratch[1]&0x20 != 0 + if haveDict { + _, z.err = io.ReadFull(z.r, z.scratch[0:4]) + if z.err != nil { + if z.err == io.EOF { + z.err = io.ErrUnexpectedEOF + } + return z.err + } + checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3]) + if checksum != adler32.Checksum(dict) { + z.err = ErrDictionary + return z.err + } + } + + if z.decompressor == nil { + if haveDict { + z.decompressor = flate.NewReaderDict(z.r, dict) + } else { + z.decompressor = flate.NewReader(z.r) + } + } else { + z.decompressor.(flate.Resetter).Reset(z.r, dict) + } + + if z.digest != nil { + z.digest.Reset() + } else { + z.digest = adler32.New() + } + return nil +} diff --git a/vendor/github.com/klauspost/compress/zlib/writer.go b/vendor/github.com/klauspost/compress/zlib/writer.go new file mode 100644 index 000000000..605816ba4 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zlib/writer.go @@ -0,0 +1,201 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zlib + +import ( + "fmt" + "hash" + "hash/adler32" + "io" + + "github.com/klauspost/compress/flate" +) + +// These constants are copied from the flate package, so that code that imports +// "compress/zlib" does not also have to import "compress/flate". +const ( + NoCompression = flate.NoCompression + BestSpeed = flate.BestSpeed + BestCompression = flate.BestCompression + DefaultCompression = flate.DefaultCompression + ConstantCompression = flate.ConstantCompression + HuffmanOnly = flate.HuffmanOnly +) + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see NewWriter). +type Writer struct { + w io.Writer + level int + dict []byte + compressor *flate.Writer + digest hash.Hash32 + err error + scratch [4]byte + wroteHeader bool +} + +// NewWriter creates a new Writer. +// Writes to the returned Writer are compressed and written to w. +// +// It is the caller's responsibility to call Close on the WriteCloser when done. +// Writes may be buffered and not flushed until Close. +func NewWriter(w io.Writer) *Writer { + z, _ := NewWriterLevelDict(w, DefaultCompression, nil) + return z +} + +// NewWriterLevel is like NewWriter but specifies the compression level instead +// of assuming DefaultCompression. +// +// The compression level can be DefaultCompression, NoCompression, HuffmanOnly +// or any integer value between BestSpeed and BestCompression inclusive. +// The error returned will be nil if the level is valid. +func NewWriterLevel(w io.Writer, level int) (*Writer, error) { + return NewWriterLevelDict(w, level, nil) +} + +// NewWriterLevelDict is like NewWriterLevel but specifies a dictionary to +// compress with. +// +// The dictionary may be nil. If not, its contents should not be modified until +// the Writer is closed. +func NewWriterLevelDict(w io.Writer, level int, dict []byte) (*Writer, error) { + if level < HuffmanOnly || level > BestCompression { + return nil, fmt.Errorf("zlib: invalid compression level: %d", level) + } + return &Writer{ + w: w, + level: level, + dict: dict, + }, nil +} + +// Reset clears the state of the Writer z such that it is equivalent to its +// initial state from NewWriterLevel or NewWriterLevelDict, but instead writing +// to w. +func (z *Writer) Reset(w io.Writer) { + z.w = w + // z.level and z.dict left unchanged. + if z.compressor != nil { + z.compressor.Reset(w) + } + if z.digest != nil { + z.digest.Reset() + } + z.err = nil + z.scratch = [4]byte{} + z.wroteHeader = false +} + +// writeHeader writes the ZLIB header. +func (z *Writer) writeHeader() (err error) { + z.wroteHeader = true + // ZLIB has a two-byte header (as documented in RFC 1950). + // The first four bits is the CINFO (compression info), which is 7 for the default deflate window size. + // The next four bits is the CM (compression method), which is 8 for deflate. + z.scratch[0] = 0x78 + // The next two bits is the FLEVEL (compression level). The four values are: + // 0=fastest, 1=fast, 2=default, 3=best. + // The next bit, FDICT, is set if a dictionary is given. + // The final five FCHECK bits form a mod-31 checksum. + switch z.level { + case -2, 0, 1: + z.scratch[1] = 0 << 6 + case 2, 3, 4, 5: + z.scratch[1] = 1 << 6 + case 6, -1: + z.scratch[1] = 2 << 6 + case 7, 8, 9: + z.scratch[1] = 3 << 6 + default: + panic("unreachable") + } + if z.dict != nil { + z.scratch[1] |= 1 << 5 + } + z.scratch[1] += uint8(31 - (uint16(z.scratch[0])<<8+uint16(z.scratch[1]))%31) + if _, err = z.w.Write(z.scratch[0:2]); err != nil { + return err + } + if z.dict != nil { + // The next four bytes are the Adler-32 checksum of the dictionary. + checksum := adler32.Checksum(z.dict) + z.scratch[0] = uint8(checksum >> 24) + z.scratch[1] = uint8(checksum >> 16) + z.scratch[2] = uint8(checksum >> 8) + z.scratch[3] = uint8(checksum >> 0) + if _, err = z.w.Write(z.scratch[0:4]); err != nil { + return err + } + } + if z.compressor == nil { + // Initialize deflater unless the Writer is being reused + // after a Reset call. + z.compressor, err = flate.NewWriterDict(z.w, z.level, z.dict) + if err != nil { + return err + } + z.digest = adler32.New() + } + return nil +} + +// Write writes a compressed form of p to the underlying io.Writer. The +// compressed bytes are not necessarily flushed until the Writer is closed or +// explicitly flushed. +func (z *Writer) Write(p []byte) (n int, err error) { + if !z.wroteHeader { + z.err = z.writeHeader() + } + if z.err != nil { + return 0, z.err + } + if len(p) == 0 { + return 0, nil + } + n, err = z.compressor.Write(p) + if err != nil { + z.err = err + return + } + z.digest.Write(p) + return +} + +// Flush flushes the Writer to its underlying io.Writer. +func (z *Writer) Flush() error { + if !z.wroteHeader { + z.err = z.writeHeader() + } + if z.err != nil { + return z.err + } + z.err = z.compressor.Flush() + return z.err +} + +// Close closes the Writer, flushing any unwritten data to the underlying +// io.Writer, but does not close the underlying io.Writer. +func (z *Writer) Close() error { + if !z.wroteHeader { + z.err = z.writeHeader() + } + if z.err != nil { + return z.err + } + z.err = z.compressor.Close() + if z.err != nil { + return z.err + } + checksum := z.digest.Sum32() + // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). + z.scratch[0] = uint8(checksum >> 24) + z.scratch[1] = uint8(checksum >> 16) + z.scratch[2] = uint8(checksum >> 8) + z.scratch[3] = uint8(checksum >> 0) + _, z.err = z.w.Write(z.scratch[0:4]) + return z.err +} diff --git a/vendor/github.com/minio/minio-go/v7/api-datatypes.go b/vendor/github.com/minio/minio-go/v7/api-datatypes.go index 09b03af26..5a8d473c6 100644 --- a/vendor/github.com/minio/minio-go/v7/api-datatypes.go +++ b/vendor/github.com/minio/minio-go/v7/api-datatypes.go @@ -84,6 +84,12 @@ type UploadInfo struct { // not to be confused with `Expires` HTTP header. Expiration time.Time ExpirationRuleID string + + // Verified checksum values, if any. + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string } // RestoreInfo contains information of the restore operation of an archived object @@ -148,6 +154,12 @@ type ObjectInfo struct { Restore *RestoreInfo + // Checksum values + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + // Error Err error `json:"-"` } diff --git a/vendor/github.com/minio/minio-go/v7/api-get-options.go b/vendor/github.com/minio/minio-go/v7/api-get-options.go index 184ef9f86..08ae95c42 100644 --- a/vendor/github.com/minio/minio-go/v7/api-get-options.go +++ b/vendor/github.com/minio/minio-go/v7/api-get-options.go @@ -38,6 +38,12 @@ type GetObjectOptions struct { ServerSideEncryption encrypt.ServerSide VersionID string PartNumber int + + // Include any checksums, if object was uploaded with checksum. + // For multipart objects this is a checksum of part checksums. + // https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html + Checksum bool + // To be not used by external applications Internal AdvancedGetOptions } @@ -60,6 +66,9 @@ func (o GetObjectOptions) Header() http.Header { if o.Internal.ReplicationProxyRequest != "" { headers.Set(minIOBucketReplicationProxyRequest, o.Internal.ReplicationProxyRequest) } + if o.Checksum { + headers.Set("x-amz-checksum-mode", "ENABLED") + } return headers } diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go b/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go index abcbd2981..5fec17a1c 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go @@ -24,6 +24,7 @@ import ( "encoding/hex" "encoding/xml" "fmt" + "hash/crc32" "io" "io/ioutil" "net/http" @@ -79,11 +80,23 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj return UploadInfo{}, err } + // Choose hash algorithms to be calculated by hashCopyN, + // avoid sha256 with non-v4 signature request or + // HTTPS connection. + hashAlgos, hashSums := c.hashMaterials(opts.SendContentMd5, !opts.DisableContentSha256) + if len(hashSums) == 0 { + if opts.UserMetadata == nil { + opts.UserMetadata = make(map[string]string, 1) + } + opts.UserMetadata["X-Amz-Checksum-Algorithm"] = "CRC32C" + } + // Initiate a new multipart upload. uploadID, err := c.newUploadID(ctx, bucketName, objectName, opts) if err != nil { return UploadInfo{}, err } + delete(opts.UserMetadata, "X-Amz-Checksum-Algorithm") defer func() { if err != nil { @@ -100,12 +113,12 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj // Create a buffer. buf := make([]byte, partSize) + // Create checksums + // CRC32C is ~50% faster on AMD64 @ 30GB/s + var crcBytes []byte + customHeader := make(http.Header) + crc := crc32.New(crc32.MakeTable(crc32.Castagnoli)) for partNumber <= totalPartsCount { - // Choose hash algorithms to be calculated by hashCopyN, - // avoid sha256 with non-v4 signature request or - // HTTPS connection. - hashAlgos, hashSums := c.hashMaterials(opts.SendContentMd5, !opts.DisableContentSha256) - length, rErr := readFull(reader, buf) if rErr == io.EOF && partNumber > 1 { break @@ -131,18 +144,23 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj md5Base64 string sha256Hex string ) + if hashSums["md5"] != nil { md5Base64 = base64.StdEncoding.EncodeToString(hashSums["md5"]) } if hashSums["sha256"] != nil { sha256Hex = hex.EncodeToString(hashSums["sha256"]) } + if len(hashSums) == 0 { + crc.Reset() + crc.Write(buf[:length]) + cSum := crc.Sum(nil) + customHeader.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(cSum)) + crcBytes = append(crcBytes, cSum...) + } // Proceed to upload the part. - objPart, uerr := c.uploadPart(ctx, bucketName, objectName, uploadID, rd, partNumber, - md5Base64, sha256Hex, int64(length), - opts.ServerSideEncryption, - !opts.DisableContentSha256) + objPart, uerr := c.uploadPart(ctx, bucketName, objectName, uploadID, rd, partNumber, md5Base64, sha256Hex, int64(length), opts.ServerSideEncryption, !opts.DisableContentSha256, customHeader) if uerr != nil { return UploadInfo{}, uerr } @@ -171,15 +189,25 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, }) } // Sort all completed parts. sort.Sort(completedParts(complMultipartUpload.Parts)) - - uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, PutObjectOptions{}) + opts = PutObjectOptions{} + if len(crcBytes) > 0 { + // Add hash of hashes. + crc.Reset() + crc.Write(crcBytes) + opts.UserMetadata = map[string]string{"X-Amz-Checksum-Crc32c": base64.StdEncoding.EncodeToString(crc.Sum(nil))} + } + uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err } @@ -242,9 +270,7 @@ func (c *Client) initiateMultipartUpload(ctx context.Context, bucketName, object } // uploadPart - Uploads a part in a multipart upload. -func (c *Client) uploadPart(ctx context.Context, bucketName, objectName, uploadID string, reader io.Reader, - partNumber int, md5Base64, sha256Hex string, size int64, sse encrypt.ServerSide, streamSha256 bool, -) (ObjectPart, error) { +func (c *Client) uploadPart(ctx context.Context, bucketName string, objectName string, uploadID string, reader io.Reader, partNumber int, md5Base64 string, sha256Hex string, size int64, sse encrypt.ServerSide, streamSha256 bool, customHeader http.Header) (ObjectPart, error) { // Input validation. if err := s3utils.CheckValidBucketName(bucketName); err != nil { return ObjectPart{}, err @@ -273,7 +299,9 @@ func (c *Client) uploadPart(ctx context.Context, bucketName, objectName, uploadI urlValues.Set("uploadId", uploadID) // Set encryption headers, if any. - customHeader := make(http.Header) + if customHeader == nil { + customHeader = make(http.Header) + } // https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadUploadPart.html // Server-side encryption is supported by the S3 Multipart Upload actions. // Unless you are using a customer-provided encryption key, you don't need @@ -306,11 +334,17 @@ func (c *Client) uploadPart(ctx context.Context, bucketName, objectName, uploadI } } // Once successfully uploaded, return completed part. - objPart := ObjectPart{} + h := resp.Header + objPart := ObjectPart{ + ChecksumCRC32: h.Get("x-amz-checksum-crc32"), + ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"), + ChecksumSHA1: h.Get("x-amz-checksum-sha1"), + ChecksumSHA256: h.Get("x-amz-checksum-sha256"), + } objPart.Size = size objPart.PartNumber = partNumber // Trim off the odd double quotes from ETag in the beginning and end. - objPart.ETag = trimEtag(resp.Header.Get("ETag")) + objPart.ETag = trimEtag(h.Get("ETag")) return objPart, nil } diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go b/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go index 11b3a5255..464bde7f3 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go @@ -22,6 +22,7 @@ import ( "context" "encoding/base64" "fmt" + "hash/crc32" "io" "net/http" "net/url" @@ -38,9 +39,8 @@ import ( // // Following code handles these types of readers. // -// - *minio.Object -// - Any reader which has a method 'ReadAt()' -// +// - *minio.Object +// - Any reader which has a method 'ReadAt()' func (c *Client) putObjectMultipartStream(ctx context.Context, bucketName, objectName string, reader io.Reader, size int64, opts PutObjectOptions, ) (info UploadInfo, err error) { @@ -184,12 +184,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN sectionReader := newHook(io.NewSectionReader(reader, readOffset, partSize), opts.Progress) // Proceed to upload the part. - objPart, err := c.uploadPart(ctx, bucketName, objectName, - uploadID, sectionReader, uploadReq.PartNum, - "", "", partSize, - opts.ServerSideEncryption, - !opts.DisableContentSha256, - ) + objPart, err := c.uploadPart(ctx, bucketName, objectName, uploadID, sectionReader, uploadReq.PartNum, "", "", partSize, opts.ServerSideEncryption, !opts.DisableContentSha256, nil) if err != nil { uploadedPartsCh <- uploadedPartRes{ Error: err, @@ -260,6 +255,13 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b return UploadInfo{}, err } + if !opts.SendContentMd5 { + if opts.UserMetadata == nil { + opts.UserMetadata = make(map[string]string, 1) + } + opts.UserMetadata["X-Amz-Checksum-Algorithm"] = "CRC32C" + } + // Calculate the optimal parts info for a given size. totalPartsCount, partSize, lastPartSize, err := OptimalPartInfo(size, opts.PartSize) if err != nil { @@ -270,6 +272,7 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b if err != nil { return UploadInfo{}, err } + delete(opts.UserMetadata, "X-Amz-Checksum-Algorithm") // Aborts the multipart upload if the function returns // any error, since we do not resume we should purge @@ -281,6 +284,14 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b } }() + // Create checksums + // CRC32C is ~50% faster on AMD64 @ 30GB/s + var crcBytes []byte + customHeader := make(http.Header) + crc := crc32.New(crc32.MakeTable(crc32.Castagnoli)) + md5Hash := c.md5Hasher() + defer md5Hash.Close() + // Total data read and written to server. should be equal to 'size' at the end of the call. var totalUploadedSize int64 @@ -292,7 +303,6 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b // Avoid declaring variables in the for loop var md5Base64 string - var hookReader io.Reader // Part number always starts with '1'. var partNumber int @@ -303,37 +313,34 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b partSize = lastPartSize } - if opts.SendContentMd5 { - length, rerr := readFull(reader, buf) - if rerr == io.EOF && partNumber > 1 { - break - } - - if rerr != nil && rerr != io.ErrUnexpectedEOF && err != io.EOF { - return UploadInfo{}, rerr - } + length, rerr := readFull(reader, buf) + if rerr == io.EOF && partNumber > 1 { + break + } - // Calculate md5sum. - hash := c.md5Hasher() - hash.Write(buf[:length]) - md5Base64 = base64.StdEncoding.EncodeToString(hash.Sum(nil)) - hash.Close() + if rerr != nil && rerr != io.ErrUnexpectedEOF && err != io.EOF { + return UploadInfo{}, rerr + } - // Update progress reader appropriately to the latest offset - // as we read from the source. - hookReader = newHook(bytes.NewReader(buf[:length]), opts.Progress) + // Calculate md5sum. + if opts.SendContentMd5 { + md5Hash.Reset() + md5Hash.Write(buf[:length]) + md5Base64 = base64.StdEncoding.EncodeToString(md5Hash.Sum(nil)) } else { - // Update progress reader appropriately to the latest offset - // as we read from the source. - hookReader = newHook(reader, opts.Progress) + // Add CRC32C instead. + crc.Reset() + crc.Write(buf[:length]) + cSum := crc.Sum(nil) + customHeader.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(cSum)) + crcBytes = append(crcBytes, cSum...) } - objPart, uerr := c.uploadPart(ctx, bucketName, objectName, uploadID, - io.LimitReader(hookReader, partSize), - partNumber, md5Base64, "", partSize, - opts.ServerSideEncryption, - !opts.DisableContentSha256, - ) + // Update progress reader appropriately to the latest offset + // as we read from the source. + hooked := newHook(bytes.NewReader(buf[:length]), opts.Progress) + + objPart, uerr := c.uploadPart(ctx, bucketName, objectName, uploadID, hooked, partNumber, md5Base64, "", partSize, opts.ServerSideEncryption, !opts.DisableContentSha256, customHeader) if uerr != nil { return UploadInfo{}, uerr } @@ -363,15 +370,26 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, }) } // Sort all completed parts. sort.Sort(completedParts(complMultipartUpload.Parts)) - uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, PutObjectOptions{}) + opts = PutObjectOptions{} + if len(crcBytes) > 0 { + // Add hash of hashes. + crc.Reset() + crc.Write(crcBytes) + opts.UserMetadata = map[string]string{"X-Amz-Checksum-Crc32c": base64.StdEncoding.EncodeToString(crc.Sum(nil))} + } + uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err } @@ -490,14 +508,20 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string, // extract lifecycle expiry date and rule ID expTime, ruleID := amzExpirationToExpiryDateRuleID(resp.Header.Get(amzExpiration)) - + h := resp.Header return UploadInfo{ Bucket: bucketName, Key: objectName, - ETag: trimEtag(resp.Header.Get("ETag")), - VersionID: resp.Header.Get(amzVersionID), + ETag: trimEtag(h.Get("ETag")), + VersionID: h.Get(amzVersionID), Size: size, Expiration: expTime, ExpirationRuleID: ruleID, + + // Checksum values + ChecksumCRC32: h.Get("x-amz-checksum-crc32"), + ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"), + ChecksumSHA1: h.Get("x-amz-checksum-sha1"), + ChecksumSHA256: h.Get("x-amz-checksum-sha256"), }, nil } diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object.go b/vendor/github.com/minio/minio-go/v7/api-put-object.go index 9328fb6c1..321ad00aa 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object.go @@ -23,6 +23,7 @@ import ( "encoding/base64" "errors" "fmt" + "hash/crc32" "io" "net/http" "sort" @@ -215,18 +216,18 @@ func (a completedParts) Less(i, j int) bool { return a[i].PartNumber < a[j].Part // // You must have WRITE permissions on a bucket to create an object. // -// - For size smaller than 16MiB PutObject automatically does a -// single atomic PUT operation. +// - For size smaller than 16MiB PutObject automatically does a +// single atomic PUT operation. // -// - For size larger than 16MiB PutObject automatically does a -// multipart upload operation. +// - For size larger than 16MiB PutObject automatically does a +// multipart upload operation. // -// - For size input as -1 PutObject does a multipart Put operation -// until input stream reaches EOF. Maximum object size that can -// be uploaded through this operation will be 5TiB. +// - For size input as -1 PutObject does a multipart Put operation +// until input stream reaches EOF. Maximum object size that can +// be uploaded through this operation will be 5TiB. // -// WARNING: Passing down '-1' will use memory and these cannot -// be reused for best outcomes for PutObject(), pass the size always. +// WARNING: Passing down '-1' will use memory and these cannot +// be reused for best outcomes for PutObject(), pass the size always. // // NOTE: Upon errors during upload multipart operation is entirely aborted. func (c *Client) PutObject(ctx context.Context, bucketName, objectName string, reader io.Reader, objectSize int64, @@ -299,11 +300,20 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam if err != nil { return UploadInfo{}, err } + + if !opts.SendContentMd5 { + if opts.UserMetadata == nil { + opts.UserMetadata = make(map[string]string, 1) + } + opts.UserMetadata["X-Amz-Checksum-Algorithm"] = "CRC32C" + } + // Initiate a new multipart upload. uploadID, err := c.newUploadID(ctx, bucketName, objectName, opts) if err != nil { return UploadInfo{}, err } + delete(opts.UserMetadata, "X-Amz-Checksum-Algorithm") defer func() { if err != nil { @@ -320,6 +330,12 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam // Create a buffer. buf := make([]byte, partSize) + // Create checksums + // CRC32C is ~50% faster on AMD64 @ 30GB/s + var crcBytes []byte + customHeader := make(http.Header) + crc := crc32.New(crc32.MakeTable(crc32.Castagnoli)) + for partNumber <= totalPartsCount { length, rerr := readFull(reader, buf) if rerr == io.EOF && partNumber > 1 { @@ -337,6 +353,12 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam hash.Write(buf[:length]) md5Base64 = base64.StdEncoding.EncodeToString(hash.Sum(nil)) hash.Close() + } else { + crc.Reset() + crc.Write(buf[:length]) + cSum := crc.Sum(nil) + customHeader.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(cSum)) + crcBytes = append(crcBytes, cSum...) } // Update progress reader appropriately to the latest offset @@ -344,11 +366,7 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam rd := newHook(bytes.NewReader(buf[:length]), opts.Progress) // Proceed to upload the part. - objPart, uerr := c.uploadPart(ctx, bucketName, objectName, uploadID, rd, partNumber, - md5Base64, "", int64(length), - opts.ServerSideEncryption, - !opts.DisableContentSha256, - ) + objPart, uerr := c.uploadPart(ctx, bucketName, objectName, uploadID, rd, partNumber, md5Base64, "", int64(length), opts.ServerSideEncryption, !opts.DisableContentSha256, customHeader) if uerr != nil { return UploadInfo{}, uerr } @@ -377,15 +395,26 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, }) } // Sort all completed parts. sort.Sort(completedParts(complMultipartUpload.Parts)) - uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, PutObjectOptions{}) + opts = PutObjectOptions{} + if len(crcBytes) > 0 { + // Add hash of hashes. + crc.Reset() + crc.Write(crcBytes) + opts.UserMetadata = map[string]string{"X-Amz-Checksum-Crc32c": base64.StdEncoding.EncodeToString(crc.Sum(nil))} + } + uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err } diff --git a/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go b/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go index 592d4cdcc..b1b848f4a 100644 --- a/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go +++ b/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go @@ -261,6 +261,12 @@ type ObjectPart struct { // Size of the uploaded part data. Size int64 + + // Checksum values of each part. + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string } // ListObjectPartsResult container for ListObjectParts response. @@ -299,6 +305,12 @@ type completeMultipartUploadResult struct { Bucket string Key string ETag string + + // Checksum values, hash of hashes of parts. + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string } // CompletePart sub container lists individual part numbers and their @@ -309,6 +321,12 @@ type CompletePart struct { // Part number identifies the part. PartNumber int ETag string + + // Checksum values + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string } // completeMultipartUpload container for completing multipart upload. diff --git a/vendor/github.com/minio/minio-go/v7/api.go b/vendor/github.com/minio/minio-go/v7/api.go index 2dcfc7978..6598e9d92 100644 --- a/vendor/github.com/minio/minio-go/v7/api.go +++ b/vendor/github.com/minio/minio-go/v7/api.go @@ -111,7 +111,7 @@ type Options struct { // Global constants. const ( libraryName = "minio-go" - libraryVersion = "v7.0.36" + libraryVersion = "v7.0.37" ) // User Agent should always following the below style. diff --git a/vendor/github.com/minio/minio-go/v7/core.go b/vendor/github.com/minio/minio-go/v7/core.go index 148671eec..f6132e79a 100644 --- a/vendor/github.com/minio/minio-go/v7/core.go +++ b/vendor/github.com/minio/minio-go/v7/core.go @@ -89,7 +89,7 @@ func (c Core) ListMultipartUploads(ctx context.Context, bucket, prefix, keyMarke // PutObjectPart - Upload an object part. func (c Core) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, data io.Reader, size int64, md5Base64, sha256Hex string, sse encrypt.ServerSide) (ObjectPart, error) { streamSha256 := true - return c.uploadPart(ctx, bucket, object, uploadID, data, partID, md5Base64, sha256Hex, size, sse, streamSha256) + return c.uploadPart(ctx, bucket, object, uploadID, data, partID, md5Base64, sha256Hex, size, sse, streamSha256, nil) } // ListObjectParts - List uploaded parts of an incomplete upload.x diff --git a/vendor/github.com/minio/minio-go/v7/functional_tests.go b/vendor/github.com/minio/minio-go/v7/functional_tests.go index 59f347eff..483b5cb11 100644 --- a/vendor/github.com/minio/minio-go/v7/functional_tests.go +++ b/vendor/github.com/minio/minio-go/v7/functional_tests.go @@ -24,8 +24,11 @@ import ( "archive/zip" "bytes" "context" + "crypto/sha1" + "encoding/base64" "errors" "fmt" + "hash" "hash/crc32" "io" "io/ioutil" @@ -46,6 +49,7 @@ import ( "github.com/dustin/go-humanize" jsoniter "github.com/json-iterator/go" + "github.com/minio/sha256-simd" log "github.com/sirupsen/logrus" "github.com/minio/minio-go/v7" @@ -1991,6 +1995,167 @@ func testObjectTaggingWithVersioning() { successLogger(testName, function, args, startTime).Info() } +// Test PutObject with custom checksums. +func testPutObjectWithChecksums() { + // initialize logging params + startTime := time.Now() + testName := getFuncName() + function := "PutObject(bucketName, objectName, reader,size, opts)" + args := map[string]interface{}{ + "bucketName": "", + "objectName": "", + "opts": "minio.PutObjectOptions{UserMetadata: metadata, Progress: progress}", + } + + if !isFullMode() { + ignoredLog(testName, function, args, startTime, "Skipping functional tests for short/quick runs").Info() + return + } + + // Seed random based on current time. + rand.Seed(time.Now().Unix()) + + // Instantiate new minio client object. + c, err := minio.New(os.Getenv(serverEndpoint), + &minio.Options{ + Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), + Secure: mustParseBool(os.Getenv(enableHTTPS)), + }) + if err != nil { + logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) + return + } + + // Enable tracing, write to stderr. + //c.TraceOn(os.Stderr) + + // Set user agent. + c.SetAppInfo("MinIO-go-FunctionalTest", "0.1.0") + + // Generate a new random bucket name. + bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") + args["bucketName"] = bucketName + + // Make a new bucket. + err = c.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{Region: "us-east-1"}) + if err != nil { + logError(testName, function, args, startTime, "", "Make bucket failed", err) + return + } + + defer cleanupBucket(bucketName, c) + tests := []struct { + header string + hasher hash.Hash + + // Checksum values + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + }{ + {header: "x-amz-checksum-crc32", hasher: crc32.NewIEEE(), ChecksumCRC32: "yXTVFQ=="}, + {header: "x-amz-checksum-crc32c", hasher: crc32.New(crc32.MakeTable(crc32.Castagnoli)), ChecksumCRC32C: "zXqj7Q=="}, + {header: "x-amz-checksum-sha1", hasher: sha1.New(), ChecksumSHA1: "SwmAs3F75Sw/sE4dHehkvYtn9UE="}, + {header: "x-amz-checksum-sha256", hasher: sha256.New(), ChecksumSHA256: "8Tlu9msuw/cpmWNEnQx97axliBjiE6gK1doiY0N9WuA="}, + } + + for i, test := range tests { + bufSize := dataFileMap["datafile-129-MB"] + + // Save the data + objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "") + args["objectName"] = objectName + + cmpChecksum := func(got, want string) { + if want != got { + logError(testName, function, args, startTime, "", "checksum mismatch", fmt.Errorf("want %s, got %s", want, got)) + return + } + } + + meta := map[string]string{} + reader := getDataReader("datafile-129-MB") + b, err := io.ReadAll(reader) + if err != nil { + logError(testName, function, args, startTime, "", "Read failed", err) + return + } + h := test.hasher + h.Reset() + // Wrong CRC. + meta[test.header] = base64.StdEncoding.EncodeToString(h.Sum(nil)) + args["metadata"] = meta + + resp, err := c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(b), int64(bufSize), minio.PutObjectOptions{ + DisableMultipart: true, + UserMetadata: meta, + }) + if err == nil { + if i == 0 && resp.ChecksumCRC32 == "" { + ignoredLog(testName, function, args, startTime, "Checksums does not appear to be supported by backend").Info() + return + } + logError(testName, function, args, startTime, "", "PutObject failed", err) + return + } + + // Set correct CRC. + h.Write(b) + meta[test.header] = base64.StdEncoding.EncodeToString(h.Sum(nil)) + reader.Close() + + resp, err = c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(b), int64(bufSize), minio.PutObjectOptions{ + DisableMultipart: true, + UserMetadata: meta, + }) + if err != nil { + logError(testName, function, args, startTime, "", "PutObject failed", err) + return + } + cmpChecksum(resp.ChecksumSHA256, test.ChecksumSHA256) + cmpChecksum(resp.ChecksumSHA1, test.ChecksumSHA1) + cmpChecksum(resp.ChecksumCRC32, test.ChecksumCRC32) + cmpChecksum(resp.ChecksumCRC32C, test.ChecksumCRC32C) + + // Read the data back + gopts := minio.GetObjectOptions{Checksum: true} + r, err := c.GetObject(context.Background(), bucketName, objectName, gopts) + if err != nil { + logError(testName, function, args, startTime, "", "GetObject failed", err) + return + } + + st, err := r.Stat() + if err != nil { + logError(testName, function, args, startTime, "", "Stat failed", err) + return + } + + cmpChecksum(st.ChecksumSHA256, test.ChecksumSHA256) + cmpChecksum(st.ChecksumSHA1, test.ChecksumSHA1) + cmpChecksum(st.ChecksumCRC32, test.ChecksumCRC32) + cmpChecksum(st.ChecksumCRC32C, test.ChecksumCRC32C) + + if st.Size != int64(bufSize) { + logError(testName, function, args, startTime, "", "Number of bytes returned by PutObject does not match GetObject, expected "+string(bufSize)+" got "+string(st.Size), err) + return + } + + if err := r.Close(); err != nil { + logError(testName, function, args, startTime, "", "Object Close failed", err) + return + } + if err := r.Close(); err == nil { + logError(testName, function, args, startTime, "", "Object already closed, should respond with error", err) + return + } + delete(args, "metadata") + } + + successLogger(testName, function, args, startTime).Info() +} + // Test PutObject using a large data to trigger multipart readat func testPutObjectWithMetadata() { // initialize logging params @@ -12128,6 +12293,7 @@ func main() { testComposeObjectErrorCasesV2() testCompose10KSourcesV2() testUserMetadataCopyingV2() + testPutObjectWithChecksums() testPutObject0ByteV2() testPutObjectNoLengthV2() testPutObjectsUnknownV2() diff --git a/vendor/github.com/minio/minio-go/v7/utils.go b/vendor/github.com/minio/minio-go/v7/utils.go index 11a4f3403..f32f84ab0 100644 --- a/vendor/github.com/minio/minio-go/v7/utils.go +++ b/vendor/github.com/minio/minio-go/v7/utils.go @@ -376,6 +376,12 @@ func ToObjectInfo(bucketName string, objectName string, h http.Header) (ObjectIn UserTags: userTags, UserTagCount: tagCount, Restore: restore, + + // Checksum values + ChecksumCRC32: h.Get("x-amz-checksum-crc32"), + ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"), + ChecksumSHA1: h.Get("x-amz-checksum-sha1"), + ChecksumSHA256: h.Get("x-amz-checksum-sha256"), }, nil } @@ -501,7 +507,7 @@ func isSSEHeader(headerKey string) bool { func isAmzHeader(headerKey string) bool { key := strings.ToLower(headerKey) - return strings.HasPrefix(key, "x-amz-meta-") || strings.HasPrefix(key, "x-amz-grant-") || key == "x-amz-acl" || isSSEHeader(headerKey) + return strings.HasPrefix(key, "x-amz-meta-") || strings.HasPrefix(key, "x-amz-grant-") || key == "x-amz-acl" || isSSEHeader(headerKey) || strings.HasPrefix(key, "x-amz-checksum-") } var ( |