diff options
author | 2024-01-29 15:13:53 +0000 | |
---|---|---|
committer | 2024-01-29 15:13:53 +0000 | |
commit | 81198fa2d09f07bb54ec28f41b20f275e88a3254 (patch) | |
tree | 14f0d24168e085c50c27f6dadbe1fd71e63faca1 /vendor/codeberg.org/gruf/go-structr/index.go | |
parent | [bugfix] Fix Postgres emoji delete, emoji category change (#2570) (diff) | |
download | gotosocial-81198fa2d09f07bb54ec28f41b20f275e88a3254.tar.xz |
update go-structr v0.2.0 => v0.3.0 to fix possible hash collision issues (#2586)
Diffstat (limited to 'vendor/codeberg.org/gruf/go-structr/index.go')
-rw-r--r-- | vendor/codeberg.org/gruf/go-structr/index.go | 391 |
1 files changed, 285 insertions, 106 deletions
diff --git a/vendor/codeberg.org/gruf/go-structr/index.go b/vendor/codeberg.org/gruf/go-structr/index.go index 4999249f5..e2768b3ca 100644 --- a/vendor/codeberg.org/gruf/go-structr/index.go +++ b/vendor/codeberg.org/gruf/go-structr/index.go @@ -1,7 +1,12 @@ package structr import ( + "reflect" "strings" + "sync" + "unsafe" + + "github.com/zeebo/xxh3" ) // IndexConfig defines config variables @@ -13,6 +18,18 @@ type IndexConfig struct { // keys for this index. Nested fields should // be specified using periods. An example: // "Username,Favorites.Color" + // + // Field types supported include: + // - ~int + // - ~int8 + // - ~int16 + // - ~int32 + // - ~int64 + // - ~float32 + // - ~float64 + // - ~string + // - slices of above + // - ptrs of above Fields string // Multiple indicates whether to accept multiple @@ -32,12 +49,12 @@ type IndexConfig struct { } // Index is an exposed Cache internal model, used to -// generate keys and store struct results by the init -// defined key generation configuration. This model is -// exposed to provide faster lookups in the case that -// you would like to manually provide the used index -// via the Cache.___By() series of functions, or access -// the underlying index key generator. +// extract struct keys, generate hash checksums for them +// and store struct results by the init defined config. +// This model is exposed to provide faster lookups in the +// case that you would like to manually provide the used +// index via the Cache.___By() series of functions, or +// access the underlying index key generator. type Index[StructType any] struct { // name is the actual name of this @@ -45,68 +62,168 @@ type Index[StructType any] struct { // string value of contained fields. name string - // struct field key hasher. - hasher Hasher[StructType] + // backing data store of the index, containing + // the cached results contained within wrapping + // index_entry{} which also contains the exact + // key each result is stored under. the hash map + // only keys by the xxh3 hash checksum for speed. + data map[Hash]*list //[*index_entry[StructType]] + + // struct fields encompassed by + // keys (+ hashes) of this index. + fields []structfield + + // index flags: + // - 1 << 0 = unique + // - 1 << 1 = allow zero + flags uint8 +} - // backing in-memory data store of - // generated index keys to result lists. - data map[uint64]*list[*result[StructType]] +// Key returns the configured fields as key, and hash sum of key. +func (i *Index[T]) Key(value T) ([]any, Hash, bool) { + h := get_hasher() + key, sum, ok := index_key(i, h, value) + hash_pool.Put(h) + return key, sum, ok +} - // whether to allow - // multiple results - // per index key. - unique bool +func is_unique(f uint8) bool { + const mask = uint8(1) << 0 + return f&mask != 0 } -// init initializes this index with the given configuration. -func (i *Index[T]) init(config IndexConfig, max int) { - fields := strings.Split(config.Fields, ",") +func set_is_unique(f *uint8) { + const mask = uint8(1) << 0 + (*f) |= mask +} + +func allow_zero(f uint8) bool { + const mask = uint8(1) << 1 + return f&mask != 0 +} + +func set_allow_zero(f *uint8) { + const mask = uint8(1) << 1 + (*f) |= mask +} + +func init_index[T any](i *Index[T], config IndexConfig, max int) { + // Set name from the raw + // struct fields string. i.name = config.Fields - i.hasher = NewHasher[T](fields, config.AllowZero) - i.unique = !config.Multiple - i.data = make(map[uint64]*list[*result[T]], max+1) + + // Set struct flags. + if config.AllowZero { + set_allow_zero(&i.flags) + } + if !config.Multiple { + set_is_unique(&i.flags) + } + + // Split to get the containing struct fields. + fields := strings.Split(config.Fields, ",") + + // Preallocate expected struct field slice. + i.fields = make([]structfield, len(fields)) + + // Get the reflected struct ptr type. + t := reflect.TypeOf((*T)(nil)).Elem() + + for x, fieldName := range fields { + // Split name to account for nesting. + names := strings.Split(fieldName, ".") + + // Look for usable struct field. + i.fields[x] = find_field(t, names) + } + + // Initialize index_entry list store. + i.data = make(map[Hash]*list, max+1) } -// Hasher returns the hash checksummer associated with this index. -func (i *Index[T]) Hasher() *Hasher[T] { - return &i.hasher +func index_key[T any](i *Index[T], h *xxh3.Hasher, value T) ([]any, Hash, bool) { + key := extract_fields(value, i.fields) + sum, zero := hash_sum(i.fields, h, key) + if zero && !allow_zero(i.flags) { + var zero Hash + return nil, zero, false + } + return key, sum, true } -func index_append[T any](c *Cache[T], i *Index[T], key uint64, res *result[T]) { - // Acquire + setup indexkey. - ikey := indexkey_acquire(c) - ikey.entry.Value = res - ikey.key = key - ikey.index = i +func index_hash[T any](i *Index[T], h *xxh3.Hasher, key []any) (Hash, bool) { + sum, zero := hash_sum(i.fields, h, key) + if zero && !allow_zero(i.flags) { + var zero Hash + return zero, false + } + return sum, true +} - // Append to result's indexkeys. - res.keys = append(res.keys, ikey) +func index_get[T any](i *Index[T], hash Hash, key []any) *list { + l := i.data[hash] + if l == nil { + return nil + } + entry := (*index_entry)(l.head.data) + if !is_equal(entry.key, key) { + return l + } + return l +} +func index_append[T any](c *Cache[T], i *Index[T], hash Hash, key []any, res *result) { // Get list at key. - l := i.data[key] + l := i.data[hash] if l == nil { // Allocate new list. - l = list_acquire(c) - i.data[key] = l + l = list_acquire() + i.data[hash] = l - } else if i.unique { + } else if entry := (*index_entry)(l.head.data); //nocollapse + !is_equal(entry.key, key) { + + // Collision! Drop all. + delete(i.data, hash) + + // Iterate entries in list. + for x := 0; x < l.len; x++ { + + // Pop current head. + list_remove(l, l.head) + + // Extract result. + res := entry.result + + // Drop index entry from res. + result_drop_index(res, i) + if len(res.indexed) == 0 { + + // Old res now unused, + // release to mem pool. + result_release(c, res) + } + } + + return - // Remove currently - // indexed result. - old := l.head - l.remove(old) + } else if is_unique(i.flags) { + + // Remove current + // indexed entry. + list_remove(l, l.head) // Get ptr to old - // result before we + // entry before we // release to pool. - res := old.Value + res := entry.result // Drop this index's key from // old res now not indexed here. - result_dropIndex(c, res, i) - if len(res.keys) == 0 { + result_drop_index(res, i) + if len(res.indexed) == 0 { // Old res now unused, // release to mem pool. @@ -114,100 +231,162 @@ func index_append[T any](c *Cache[T], i *Index[T], key uint64, res *result[T]) { } } - // Add result indexkey to - // front of results list. - l.pushFront(&ikey.entry) -} - -func index_deleteOne[T any](c *Cache[T], i *Index[T], ikey *indexkey[T]) { - // Get list at key. - l := i.data[ikey.key] - if l == nil { - return - } - - // Remove from list. - l.remove(&ikey.entry) - if l.len == 0 { + // Acquire + setup index entry. + entry := index_entry_acquire() + entry.index = unsafe.Pointer(i) + entry.result = res + entry.key = key + entry.hash = hash - // Remove list from map. - delete(i.data, ikey.key) + // Append to result's indexed entries. + res.indexed = append(res.indexed, entry) - // Release list to pool. - list_release(c, l) - } + // Add index entry to index list. + list_push_front(l, &entry.elem) } -func index_delete[T any](c *Cache[T], i *Index[T], key uint64, fn func(*result[T])) { +func index_delete[T any](c *Cache[T], i *Index[T], hash Hash, key []any, fn func(*result)) { if fn == nil { panic("nil fn") } - // Get list at key. - l := i.data[key] + // Get list at hash. + l := i.data[hash] if l == nil { return } - // Delete data at key. - delete(i.data, key) + entry := (*index_entry)(l.head.data) - // Iterate results in list. + // Check contains expected key for hash. + if !is_equal(entry.key, key) { + return + } + + // Delete data at hash. + delete(i.data, hash) + + // Iterate entries in list. for x := 0; x < l.len; x++ { // Pop current head. - res := l.head.Value - l.remove(l.head) + entry := (*index_entry)(l.head.data) + list_remove(l, l.head) - // Delete index's key - // from result tracking. - result_dropIndex(c, res, i) + // Extract result. + res := entry.result // Call hook. fn(res) + + // Drop index entry from res. + result_drop_index(res, i) } - // Release list to pool. - list_release(c, l) + // Release to pool. + list_release(l) } -type indexkey[T any] struct { - // linked list entry the related - // result is stored under in the - // Index.data[key] linked list. - entry elem[*result[T]] +func index_delete_entry[T any](c *Cache[T], entry *index_entry) { + // Get from entry. + i := (*Index[T])(entry.index) + + // Get list at hash sum. + l := i.data[entry.hash] + if l == nil { + return + } + + // Remove entry from list. + list_remove(l, &entry.elem) + if l.len == 0 { + + // Remove list from map. + delete(i.data, entry.hash) + + // Release to pool. + list_release(l) + } - // key is the generated index key - // the related result is indexed - // under, in the below index. - key uint64 + // Extract result. + res := entry.result - // index is the index that the - // related result is indexed in. - index *Index[T] + // Drop index entry from res. + result_drop_index(res, i) } -func indexkey_acquire[T any](c *Cache[T]) *indexkey[T] { - var ikey *indexkey[T] +var entry_pool sync.Pool + +type index_entry struct { + // elem contains the list element + // appended to each per-hash list + // within the Index{} type. the + // contained value is a self-ref. + elem list_elem + + // index is the Index{} this + // index_entry{} is stored in. + index unsafe.Pointer + + // result is the actual + // underlying result stored + // within the index. this + // also contains a ref to + // this *index_entry in order + // to track indices each result + // is currently stored under. + result *result + + // key contains the actual + // key this item was stored + // under, used for collision + // check. + key []any + + // hash contains computed + // hash checksum of .key. + hash Hash +} - if len(c.keyPool) == 0 { - // Allocate new key. - ikey = new(indexkey[T]) - } else { - // Pop result from pool slice. - ikey = c.keyPool[len(c.keyPool)-1] - c.keyPool = c.keyPool[:len(c.keyPool)-1] +func index_entry_acquire() *index_entry { + // Acquire from pool. + v := entry_pool.Get() + if v == nil { + v = new(index_entry) } - return ikey + // Cast index_entry value. + entry := v.(*index_entry) + + // Set index list elem entry on itself. + entry.elem.data = unsafe.Pointer(entry) + + return entry } -func indexkey_release[T any](c *Cache[T], ikey *indexkey[T]) { - // Reset indexkey. - ikey.entry.Value = nil - ikey.key = 0 - ikey.index = nil +func index_entry_release(entry *index_entry) { + var zero Hash - // Release indexkey to memory pool. - c.keyPool = append(c.keyPool, ikey) + // Reset index entry. + entry.elem.data = nil + entry.index = nil + entry.result = nil + entry.key = nil + entry.hash = zero + + // Release to pool. + entry_pool.Put(entry) +} + +// is_equal returns whether 2 key slices are equal. +func is_equal(k1, k2 []any) bool { + if len(k1) != len(k2) { + return false + } + for i := range k1 { + if k1[i] != k2[i] { + return false + } + } + return true } |