summaryrefslogtreecommitdiff
path: root/vendor/codeberg.org/gruf/go-structr/index.go
diff options
context:
space:
mode:
authorLibravatar kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com>2024-01-29 15:13:53 +0000
committerLibravatar GitHub <noreply@github.com>2024-01-29 15:13:53 +0000
commit81198fa2d09f07bb54ec28f41b20f275e88a3254 (patch)
tree14f0d24168e085c50c27f6dadbe1fd71e63faca1 /vendor/codeberg.org/gruf/go-structr/index.go
parent[bugfix] Fix Postgres emoji delete, emoji category change (#2570) (diff)
downloadgotosocial-81198fa2d09f07bb54ec28f41b20f275e88a3254.tar.xz
update go-structr v0.2.0 => v0.3.0 to fix possible hash collision issues (#2586)
Diffstat (limited to 'vendor/codeberg.org/gruf/go-structr/index.go')
-rw-r--r--vendor/codeberg.org/gruf/go-structr/index.go391
1 files changed, 285 insertions, 106 deletions
diff --git a/vendor/codeberg.org/gruf/go-structr/index.go b/vendor/codeberg.org/gruf/go-structr/index.go
index 4999249f5..e2768b3ca 100644
--- a/vendor/codeberg.org/gruf/go-structr/index.go
+++ b/vendor/codeberg.org/gruf/go-structr/index.go
@@ -1,7 +1,12 @@
package structr
import (
+ "reflect"
"strings"
+ "sync"
+ "unsafe"
+
+ "github.com/zeebo/xxh3"
)
// IndexConfig defines config variables
@@ -13,6 +18,18 @@ type IndexConfig struct {
// keys for this index. Nested fields should
// be specified using periods. An example:
// "Username,Favorites.Color"
+ //
+ // Field types supported include:
+ // - ~int
+ // - ~int8
+ // - ~int16
+ // - ~int32
+ // - ~int64
+ // - ~float32
+ // - ~float64
+ // - ~string
+ // - slices of above
+ // - ptrs of above
Fields string
// Multiple indicates whether to accept multiple
@@ -32,12 +49,12 @@ type IndexConfig struct {
}
// Index is an exposed Cache internal model, used to
-// generate keys and store struct results by the init
-// defined key generation configuration. This model is
-// exposed to provide faster lookups in the case that
-// you would like to manually provide the used index
-// via the Cache.___By() series of functions, or access
-// the underlying index key generator.
+// extract struct keys, generate hash checksums for them
+// and store struct results by the init defined config.
+// This model is exposed to provide faster lookups in the
+// case that you would like to manually provide the used
+// index via the Cache.___By() series of functions, or
+// access the underlying index key generator.
type Index[StructType any] struct {
// name is the actual name of this
@@ -45,68 +62,168 @@ type Index[StructType any] struct {
// string value of contained fields.
name string
- // struct field key hasher.
- hasher Hasher[StructType]
+ // backing data store of the index, containing
+ // the cached results contained within wrapping
+ // index_entry{} which also contains the exact
+ // key each result is stored under. the hash map
+ // only keys by the xxh3 hash checksum for speed.
+ data map[Hash]*list //[*index_entry[StructType]]
+
+ // struct fields encompassed by
+ // keys (+ hashes) of this index.
+ fields []structfield
+
+ // index flags:
+ // - 1 << 0 = unique
+ // - 1 << 1 = allow zero
+ flags uint8
+}
- // backing in-memory data store of
- // generated index keys to result lists.
- data map[uint64]*list[*result[StructType]]
+// Key returns the configured fields as key, and hash sum of key.
+func (i *Index[T]) Key(value T) ([]any, Hash, bool) {
+ h := get_hasher()
+ key, sum, ok := index_key(i, h, value)
+ hash_pool.Put(h)
+ return key, sum, ok
+}
- // whether to allow
- // multiple results
- // per index key.
- unique bool
+func is_unique(f uint8) bool {
+ const mask = uint8(1) << 0
+ return f&mask != 0
}
-// init initializes this index with the given configuration.
-func (i *Index[T]) init(config IndexConfig, max int) {
- fields := strings.Split(config.Fields, ",")
+func set_is_unique(f *uint8) {
+ const mask = uint8(1) << 0
+ (*f) |= mask
+}
+
+func allow_zero(f uint8) bool {
+ const mask = uint8(1) << 1
+ return f&mask != 0
+}
+
+func set_allow_zero(f *uint8) {
+ const mask = uint8(1) << 1
+ (*f) |= mask
+}
+
+func init_index[T any](i *Index[T], config IndexConfig, max int) {
+ // Set name from the raw
+ // struct fields string.
i.name = config.Fields
- i.hasher = NewHasher[T](fields, config.AllowZero)
- i.unique = !config.Multiple
- i.data = make(map[uint64]*list[*result[T]], max+1)
+
+ // Set struct flags.
+ if config.AllowZero {
+ set_allow_zero(&i.flags)
+ }
+ if !config.Multiple {
+ set_is_unique(&i.flags)
+ }
+
+ // Split to get the containing struct fields.
+ fields := strings.Split(config.Fields, ",")
+
+ // Preallocate expected struct field slice.
+ i.fields = make([]structfield, len(fields))
+
+ // Get the reflected struct ptr type.
+ t := reflect.TypeOf((*T)(nil)).Elem()
+
+ for x, fieldName := range fields {
+ // Split name to account for nesting.
+ names := strings.Split(fieldName, ".")
+
+ // Look for usable struct field.
+ i.fields[x] = find_field(t, names)
+ }
+
+ // Initialize index_entry list store.
+ i.data = make(map[Hash]*list, max+1)
}
-// Hasher returns the hash checksummer associated with this index.
-func (i *Index[T]) Hasher() *Hasher[T] {
- return &i.hasher
+func index_key[T any](i *Index[T], h *xxh3.Hasher, value T) ([]any, Hash, bool) {
+ key := extract_fields(value, i.fields)
+ sum, zero := hash_sum(i.fields, h, key)
+ if zero && !allow_zero(i.flags) {
+ var zero Hash
+ return nil, zero, false
+ }
+ return key, sum, true
}
-func index_append[T any](c *Cache[T], i *Index[T], key uint64, res *result[T]) {
- // Acquire + setup indexkey.
- ikey := indexkey_acquire(c)
- ikey.entry.Value = res
- ikey.key = key
- ikey.index = i
+func index_hash[T any](i *Index[T], h *xxh3.Hasher, key []any) (Hash, bool) {
+ sum, zero := hash_sum(i.fields, h, key)
+ if zero && !allow_zero(i.flags) {
+ var zero Hash
+ return zero, false
+ }
+ return sum, true
+}
- // Append to result's indexkeys.
- res.keys = append(res.keys, ikey)
+func index_get[T any](i *Index[T], hash Hash, key []any) *list {
+ l := i.data[hash]
+ if l == nil {
+ return nil
+ }
+ entry := (*index_entry)(l.head.data)
+ if !is_equal(entry.key, key) {
+ return l
+ }
+ return l
+}
+func index_append[T any](c *Cache[T], i *Index[T], hash Hash, key []any, res *result) {
// Get list at key.
- l := i.data[key]
+ l := i.data[hash]
if l == nil {
// Allocate new list.
- l = list_acquire(c)
- i.data[key] = l
+ l = list_acquire()
+ i.data[hash] = l
- } else if i.unique {
+ } else if entry := (*index_entry)(l.head.data); //nocollapse
+ !is_equal(entry.key, key) {
+
+ // Collision! Drop all.
+ delete(i.data, hash)
+
+ // Iterate entries in list.
+ for x := 0; x < l.len; x++ {
+
+ // Pop current head.
+ list_remove(l, l.head)
+
+ // Extract result.
+ res := entry.result
+
+ // Drop index entry from res.
+ result_drop_index(res, i)
+ if len(res.indexed) == 0 {
+
+ // Old res now unused,
+ // release to mem pool.
+ result_release(c, res)
+ }
+ }
+
+ return
- // Remove currently
- // indexed result.
- old := l.head
- l.remove(old)
+ } else if is_unique(i.flags) {
+
+ // Remove current
+ // indexed entry.
+ list_remove(l, l.head)
// Get ptr to old
- // result before we
+ // entry before we
// release to pool.
- res := old.Value
+ res := entry.result
// Drop this index's key from
// old res now not indexed here.
- result_dropIndex(c, res, i)
- if len(res.keys) == 0 {
+ result_drop_index(res, i)
+ if len(res.indexed) == 0 {
// Old res now unused,
// release to mem pool.
@@ -114,100 +231,162 @@ func index_append[T any](c *Cache[T], i *Index[T], key uint64, res *result[T]) {
}
}
- // Add result indexkey to
- // front of results list.
- l.pushFront(&ikey.entry)
-}
-
-func index_deleteOne[T any](c *Cache[T], i *Index[T], ikey *indexkey[T]) {
- // Get list at key.
- l := i.data[ikey.key]
- if l == nil {
- return
- }
-
- // Remove from list.
- l.remove(&ikey.entry)
- if l.len == 0 {
+ // Acquire + setup index entry.
+ entry := index_entry_acquire()
+ entry.index = unsafe.Pointer(i)
+ entry.result = res
+ entry.key = key
+ entry.hash = hash
- // Remove list from map.
- delete(i.data, ikey.key)
+ // Append to result's indexed entries.
+ res.indexed = append(res.indexed, entry)
- // Release list to pool.
- list_release(c, l)
- }
+ // Add index entry to index list.
+ list_push_front(l, &entry.elem)
}
-func index_delete[T any](c *Cache[T], i *Index[T], key uint64, fn func(*result[T])) {
+func index_delete[T any](c *Cache[T], i *Index[T], hash Hash, key []any, fn func(*result)) {
if fn == nil {
panic("nil fn")
}
- // Get list at key.
- l := i.data[key]
+ // Get list at hash.
+ l := i.data[hash]
if l == nil {
return
}
- // Delete data at key.
- delete(i.data, key)
+ entry := (*index_entry)(l.head.data)
- // Iterate results in list.
+ // Check contains expected key for hash.
+ if !is_equal(entry.key, key) {
+ return
+ }
+
+ // Delete data at hash.
+ delete(i.data, hash)
+
+ // Iterate entries in list.
for x := 0; x < l.len; x++ {
// Pop current head.
- res := l.head.Value
- l.remove(l.head)
+ entry := (*index_entry)(l.head.data)
+ list_remove(l, l.head)
- // Delete index's key
- // from result tracking.
- result_dropIndex(c, res, i)
+ // Extract result.
+ res := entry.result
// Call hook.
fn(res)
+
+ // Drop index entry from res.
+ result_drop_index(res, i)
}
- // Release list to pool.
- list_release(c, l)
+ // Release to pool.
+ list_release(l)
}
-type indexkey[T any] struct {
- // linked list entry the related
- // result is stored under in the
- // Index.data[key] linked list.
- entry elem[*result[T]]
+func index_delete_entry[T any](c *Cache[T], entry *index_entry) {
+ // Get from entry.
+ i := (*Index[T])(entry.index)
+
+ // Get list at hash sum.
+ l := i.data[entry.hash]
+ if l == nil {
+ return
+ }
+
+ // Remove entry from list.
+ list_remove(l, &entry.elem)
+ if l.len == 0 {
+
+ // Remove list from map.
+ delete(i.data, entry.hash)
+
+ // Release to pool.
+ list_release(l)
+ }
- // key is the generated index key
- // the related result is indexed
- // under, in the below index.
- key uint64
+ // Extract result.
+ res := entry.result
- // index is the index that the
- // related result is indexed in.
- index *Index[T]
+ // Drop index entry from res.
+ result_drop_index(res, i)
}
-func indexkey_acquire[T any](c *Cache[T]) *indexkey[T] {
- var ikey *indexkey[T]
+var entry_pool sync.Pool
+
+type index_entry struct {
+ // elem contains the list element
+ // appended to each per-hash list
+ // within the Index{} type. the
+ // contained value is a self-ref.
+ elem list_elem
+
+ // index is the Index{} this
+ // index_entry{} is stored in.
+ index unsafe.Pointer
+
+ // result is the actual
+ // underlying result stored
+ // within the index. this
+ // also contains a ref to
+ // this *index_entry in order
+ // to track indices each result
+ // is currently stored under.
+ result *result
+
+ // key contains the actual
+ // key this item was stored
+ // under, used for collision
+ // check.
+ key []any
+
+ // hash contains computed
+ // hash checksum of .key.
+ hash Hash
+}
- if len(c.keyPool) == 0 {
- // Allocate new key.
- ikey = new(indexkey[T])
- } else {
- // Pop result from pool slice.
- ikey = c.keyPool[len(c.keyPool)-1]
- c.keyPool = c.keyPool[:len(c.keyPool)-1]
+func index_entry_acquire() *index_entry {
+ // Acquire from pool.
+ v := entry_pool.Get()
+ if v == nil {
+ v = new(index_entry)
}
- return ikey
+ // Cast index_entry value.
+ entry := v.(*index_entry)
+
+ // Set index list elem entry on itself.
+ entry.elem.data = unsafe.Pointer(entry)
+
+ return entry
}
-func indexkey_release[T any](c *Cache[T], ikey *indexkey[T]) {
- // Reset indexkey.
- ikey.entry.Value = nil
- ikey.key = 0
- ikey.index = nil
+func index_entry_release(entry *index_entry) {
+ var zero Hash
- // Release indexkey to memory pool.
- c.keyPool = append(c.keyPool, ikey)
+ // Reset index entry.
+ entry.elem.data = nil
+ entry.index = nil
+ entry.result = nil
+ entry.key = nil
+ entry.hash = zero
+
+ // Release to pool.
+ entry_pool.Put(entry)
+}
+
+// is_equal returns whether 2 key slices are equal.
+func is_equal(k1, k2 []any) bool {
+ if len(k1) != len(k2) {
+ return false
+ }
+ for i := range k1 {
+ if k1[i] != k2[i] {
+ return false
+ }
+ }
+ return true
}