summaryrefslogtreecommitdiff
path: root/vendor/github.com/dolthub/swiss/map.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/dolthub/swiss/map.go')
-rw-r--r--vendor/github.com/dolthub/swiss/map.go359
1 files changed, 359 insertions, 0 deletions
diff --git a/vendor/github.com/dolthub/swiss/map.go b/vendor/github.com/dolthub/swiss/map.go
new file mode 100644
index 000000000..e5ad20386
--- /dev/null
+++ b/vendor/github.com/dolthub/swiss/map.go
@@ -0,0 +1,359 @@
+// Copyright 2023 Dolthub, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package swiss
+
+import (
+ "github.com/dolthub/maphash"
+)
+
+const (
+ maxLoadFactor = float32(maxAvgGroupLoad) / float32(groupSize)
+)
+
+// Map is an open-addressing hash map
+// based on Abseil's flat_hash_map.
+type Map[K comparable, V any] struct {
+ ctrl []metadata
+ groups []group[K, V]
+ hash maphash.Hasher[K]
+ resident uint32
+ dead uint32
+ limit uint32
+}
+
+// metadata is the h2 metadata array for a group.
+// find operations first probe the controls bytes
+// to filter candidates before matching keys
+type metadata [groupSize]int8
+
+// group is a group of 16 key-value pairs
+type group[K comparable, V any] struct {
+ keys [groupSize]K
+ values [groupSize]V
+}
+
+const (
+ h1Mask uint64 = 0xffff_ffff_ffff_ff80
+ h2Mask uint64 = 0x0000_0000_0000_007f
+ empty int8 = -128 // 0b1000_0000
+ tombstone int8 = -2 // 0b1111_1110
+)
+
+// h1 is a 57 bit hash prefix
+type h1 uint64
+
+// h2 is a 7 bit hash suffix
+type h2 int8
+
+// NewMap constructs a Map.
+func NewMap[K comparable, V any](sz uint32) (m *Map[K, V]) {
+ groups := numGroups(sz)
+ m = &Map[K, V]{
+ ctrl: make([]metadata, groups),
+ groups: make([]group[K, V], groups),
+ hash: maphash.NewHasher[K](),
+ limit: groups * maxAvgGroupLoad,
+ }
+ for i := range m.ctrl {
+ m.ctrl[i] = newEmptyMetadata()
+ }
+ return
+}
+
+// Has returns true if |key| is present in |m|.
+func (m *Map[K, V]) Has(key K) (ok bool) {
+ hi, lo := splitHash(m.hash.Hash(key))
+ g := probeStart(hi, len(m.groups))
+ for { // inlined find loop
+ matches := metaMatchH2(&m.ctrl[g], lo)
+ for matches != 0 {
+ s := nextMatch(&matches)
+ if key == m.groups[g].keys[s] {
+ ok = true
+ return
+ }
+ }
+ // |key| is not in group |g|,
+ // stop probing if we see an empty slot
+ matches = metaMatchEmpty(&m.ctrl[g])
+ if matches != 0 {
+ ok = false
+ return
+ }
+ g += 1 // linear probing
+ if g >= uint32(len(m.groups)) {
+ g = 0
+ }
+ }
+}
+
+// Get returns the |value| mapped by |key| if one exists.
+func (m *Map[K, V]) Get(key K) (value V, ok bool) {
+ hi, lo := splitHash(m.hash.Hash(key))
+ g := probeStart(hi, len(m.groups))
+ for { // inlined find loop
+ matches := metaMatchH2(&m.ctrl[g], lo)
+ for matches != 0 {
+ s := nextMatch(&matches)
+ if key == m.groups[g].keys[s] {
+ value, ok = m.groups[g].values[s], true
+ return
+ }
+ }
+ // |key| is not in group |g|,
+ // stop probing if we see an empty slot
+ matches = metaMatchEmpty(&m.ctrl[g])
+ if matches != 0 {
+ ok = false
+ return
+ }
+ g += 1 // linear probing
+ if g >= uint32(len(m.groups)) {
+ g = 0
+ }
+ }
+}
+
+// Put attempts to insert |key| and |value|
+func (m *Map[K, V]) Put(key K, value V) {
+ if m.resident >= m.limit {
+ m.rehash(m.nextSize())
+ }
+ hi, lo := splitHash(m.hash.Hash(key))
+ g := probeStart(hi, len(m.groups))
+ for { // inlined find loop
+ matches := metaMatchH2(&m.ctrl[g], lo)
+ for matches != 0 {
+ s := nextMatch(&matches)
+ if key == m.groups[g].keys[s] { // update
+ m.groups[g].keys[s] = key
+ m.groups[g].values[s] = value
+ return
+ }
+ }
+ // |key| is not in group |g|,
+ // stop probing if we see an empty slot
+ matches = metaMatchEmpty(&m.ctrl[g])
+ if matches != 0 { // insert
+ s := nextMatch(&matches)
+ m.groups[g].keys[s] = key
+ m.groups[g].values[s] = value
+ m.ctrl[g][s] = int8(lo)
+ m.resident++
+ return
+ }
+ g += 1 // linear probing
+ if g >= uint32(len(m.groups)) {
+ g = 0
+ }
+ }
+}
+
+// Delete attempts to remove |key|, returns true successful.
+func (m *Map[K, V]) Delete(key K) (ok bool) {
+ hi, lo := splitHash(m.hash.Hash(key))
+ g := probeStart(hi, len(m.groups))
+ for {
+ matches := metaMatchH2(&m.ctrl[g], lo)
+ for matches != 0 {
+ s := nextMatch(&matches)
+ if key == m.groups[g].keys[s] {
+ ok = true
+ // optimization: if |m.ctrl[g]| contains any empty
+ // metadata bytes, we can physically delete |key|
+ // rather than placing a tombstone.
+ // The observation is that any probes into group |g|
+ // would already be terminated by the existing empty
+ // slot, and therefore reclaiming slot |s| will not
+ // cause premature termination of probes into |g|.
+ if metaMatchEmpty(&m.ctrl[g]) != 0 {
+ m.ctrl[g][s] = empty
+ m.resident--
+ } else {
+ m.ctrl[g][s] = tombstone
+ m.dead++
+ }
+ var k K
+ var v V
+ m.groups[g].keys[s] = k
+ m.groups[g].values[s] = v
+ return
+ }
+ }
+ // |key| is not in group |g|,
+ // stop probing if we see an empty slot
+ matches = metaMatchEmpty(&m.ctrl[g])
+ if matches != 0 { // |key| absent
+ ok = false
+ return
+ }
+ g += 1 // linear probing
+ if g >= uint32(len(m.groups)) {
+ g = 0
+ }
+ }
+}
+
+// Iter iterates the elements of the Map, passing them to the callback.
+// It guarantees that any key in the Map will be visited only once, and
+// for un-mutated Maps, every key will be visited once. If the Map is
+// Mutated during iteration, mutations will be reflected on return from
+// Iter, but the set of keys visited by Iter is non-deterministic.
+func (m *Map[K, V]) Iter(cb func(k K, v V) (stop bool)) {
+ // take a consistent view of the table in case
+ // we rehash during iteration
+ ctrl, groups := m.ctrl, m.groups
+ // pick a random starting group
+ g := randIntN(len(groups))
+ for n := 0; n < len(groups); n++ {
+ for s, c := range ctrl[g] {
+ if c == empty || c == tombstone {
+ continue
+ }
+ k, v := groups[g].keys[s], groups[g].values[s]
+ if stop := cb(k, v); stop {
+ return
+ }
+ }
+ g++
+ if g >= uint32(len(groups)) {
+ g = 0
+ }
+ }
+}
+
+// Clear removes all elements from the Map.
+func (m *Map[K, V]) Clear() {
+ for i, c := range m.ctrl {
+ for j := range c {
+ m.ctrl[i][j] = empty
+ }
+ }
+ var k K
+ var v V
+ for i := range m.groups {
+ g := &m.groups[i]
+ for i := range g.keys {
+ g.keys[i] = k
+ g.values[i] = v
+ }
+ }
+ m.resident, m.dead = 0, 0
+}
+
+// Count returns the number of elements in the Map.
+func (m *Map[K, V]) Count() int {
+ return int(m.resident - m.dead)
+}
+
+// Capacity returns the number of additional elements
+// the can be added to the Map before resizing.
+func (m *Map[K, V]) Capacity() int {
+ return int(m.limit - m.resident)
+}
+
+// find returns the location of |key| if present, or its insertion location if absent.
+// for performance, find is manually inlined into public methods.
+func (m *Map[K, V]) find(key K, hi h1, lo h2) (g, s uint32, ok bool) {
+ g = probeStart(hi, len(m.groups))
+ for {
+ matches := metaMatchH2(&m.ctrl[g], lo)
+ for matches != 0 {
+ s = nextMatch(&matches)
+ if key == m.groups[g].keys[s] {
+ return g, s, true
+ }
+ }
+ // |key| is not in group |g|,
+ // stop probing if we see an empty slot
+ matches = metaMatchEmpty(&m.ctrl[g])
+ if matches != 0 {
+ s = nextMatch(&matches)
+ return g, s, false
+ }
+ g += 1 // linear probing
+ if g >= uint32(len(m.groups)) {
+ g = 0
+ }
+ }
+}
+
+func (m *Map[K, V]) nextSize() (n uint32) {
+ n = uint32(len(m.groups)) * 2
+ if m.dead >= (m.resident / 2) {
+ n = uint32(len(m.groups))
+ }
+ return
+}
+
+func (m *Map[K, V]) rehash(n uint32) {
+ groups, ctrl := m.groups, m.ctrl
+ m.groups = make([]group[K, V], n)
+ m.ctrl = make([]metadata, n)
+ for i := range m.ctrl {
+ m.ctrl[i] = newEmptyMetadata()
+ }
+ m.hash = maphash.NewSeed(m.hash)
+ m.limit = n * maxAvgGroupLoad
+ m.resident, m.dead = 0, 0
+ for g := range ctrl {
+ for s := range ctrl[g] {
+ c := ctrl[g][s]
+ if c == empty || c == tombstone {
+ continue
+ }
+ m.Put(groups[g].keys[s], groups[g].values[s])
+ }
+ }
+}
+
+func (m *Map[K, V]) loadFactor() float32 {
+ slots := float32(len(m.groups) * groupSize)
+ return float32(m.resident-m.dead) / slots
+}
+
+// numGroups returns the minimum number of groups needed to store |n| elems.
+func numGroups(n uint32) (groups uint32) {
+ groups = (n + maxAvgGroupLoad - 1) / maxAvgGroupLoad
+ if groups == 0 {
+ groups = 1
+ }
+ return
+}
+
+func newEmptyMetadata() (meta metadata) {
+ for i := range meta {
+ meta[i] = empty
+ }
+ return
+}
+
+func splitHash(h uint64) (h1, h2) {
+ return h1((h & h1Mask) >> 7), h2(h & h2Mask)
+}
+
+func probeStart(hi h1, groups int) uint32 {
+ return fastModN(uint32(hi), uint32(groups))
+}
+
+// lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
+func fastModN(x, n uint32) uint32 {
+ return uint32((uint64(x) * uint64(n)) >> 32)
+}
+
+// randIntN returns a random number in the interval [0, n).
+func randIntN(n int) uint32 {
+ return fastModN(fastrand(), uint32(n))
+}