update go-structr v0.2.0 => v0.3.0 to fix possible hash collision issues (#2586)

author: kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com> 2024-01-29 15:13:53 +0000
committer: GitHub <noreply@github.com> 2024-01-29 15:13:53 +0000
commit: 81198fa2d09f07bb54ec28f41b20f275e88a3254 (patch)
tree: 14f0d24168e085c50c27f6dadbe1fd71e63faca1 /vendor/codeberg.org/gruf/go-structr/index.go
parent: [bugfix] Fix Postgres emoji delete, emoji category change (#2570) (diff)
download: gotosocial-81198fa2d09f07bb54ec28f41b20f275e88a3254.tar.xz
1 files changed, 285 insertions, 106 deletions
diff --git a/vendor/codeberg.org/gruf/go-structr/index.go b/vendor/codeberg.org/gruf/go-structr/index.go
index 4999249f5..e2768b3ca 100644
--- a/vendor/codeberg.org/gruf/go-structr/index.go
+++ b/vendor/codeberg.org/gruf/go-structr/index.go
@@ -1,7 +1,12 @@
 package structr
 
 import (
+	"reflect"
 	"strings"
+	"sync"
+	"unsafe"
+
+	"github.com/zeebo/xxh3"
 )
 
 // IndexConfig defines config variables
@@ -13,6 +18,18 @@ type IndexConfig struct {
 	// keys for this index. Nested fields should
 	// be specified using periods. An example:
 	// "Username,Favorites.Color"
+	//
+	// Field types supported include:
+	// - ~int
+	// - ~int8
+	// - ~int16
+	// - ~int32
+	// - ~int64
+	// - ~float32
+	// - ~float64
+	// - ~string
+	// - slices of above
+	// - ptrs of above
 	Fields string
 
 	// Multiple indicates whether to accept multiple
@@ -32,12 +49,12 @@ type IndexConfig struct {
 }
 
 // Index is an exposed Cache internal model, used to
-// generate keys and store struct results by the init
-// defined key generation configuration. This model is
-// exposed to provide faster lookups in the case that
-// you would like to manually provide the used index
-// via the Cache.___By() series of functions, or access
-// the underlying index key generator.
+// extract struct keys, generate hash checksums for them
+// and store struct results by the init defined config.
+// This model is exposed to provide faster lookups in the
+// case that you would like to manually provide the used
+// index via the Cache.___By() series of functions, or
+// access the underlying index key generator.
 type Index[StructType any] struct {
 
 	// name is the actual name of this
@@ -45,68 +62,168 @@ type Index[StructType any] struct {
 	// string value of contained fields.
 	name string
 
-	// struct field key hasher.
-	hasher Hasher[StructType]
+	// backing data store of the index, containing
+	// the cached results contained within wrapping
+	// index_entry{} which also contains the exact
+	// key each result is stored under. the hash map
+	// only keys by the xxh3 hash checksum for speed.
+	data map[Hash]*list //[*index_entry[StructType]]
+
+	// struct fields encompassed by
+	// keys (+ hashes) of this index.
+	fields []structfield
+
+	// index flags:
+	// - 1 << 0 = unique
+	// - 1 << 1 = allow zero
+	flags uint8
+}
 
-	// backing in-memory data store of
-	// generated index keys to result lists.
-	data map[uint64]*list[*result[StructType]]
+// Key returns the configured fields as key, and hash sum of key.
+func (i *Index[T]) Key(value T) ([]any, Hash, bool) {
+	h := get_hasher()
+	key, sum, ok := index_key(i, h, value)
+	hash_pool.Put(h)
+	return key, sum, ok
+}
 
-	// whether to allow
-	// multiple results
-	// per index key.
-	unique bool
+func is_unique(f uint8) bool {
+	const mask = uint8(1) << 0
+	return f&mask != 0
 }
 
-// init initializes this index with the given configuration.
-func (i *Index[T]) init(config IndexConfig, max int) {
-	fields := strings.Split(config.Fields, ",")
+func set_is_unique(f *uint8) {
+	const mask = uint8(1) << 0
+	(*f) |= mask
+}
+
+func allow_zero(f uint8) bool {
+	const mask = uint8(1) << 1
+	return f&mask != 0
+}
+
+func set_allow_zero(f *uint8) {
+	const mask = uint8(1) << 1
+	(*f) |= mask
+}
+
+func init_index[T any](i *Index[T], config IndexConfig, max int) {
+	// Set name from the raw
+	// struct fields string.
 	i.name = config.Fields
-	i.hasher = NewHasher[T](fields, config.AllowZero)
-	i.unique = !config.Multiple
-	i.data = make(map[uint64]*list[*result[T]], max+1)
+
+	// Set struct flags.
+	if config.AllowZero {
+		set_allow_zero(&i.flags)
+	}
+	if !config.Multiple {
+		set_is_unique(&i.flags)
+	}
+
+	// Split to get the containing struct fields.
+	fields := strings.Split(config.Fields, ",")
+
+	// Preallocate expected struct field slice.
+	i.fields = make([]structfield, len(fields))
+
+	// Get the reflected struct ptr type.
+	t := reflect.TypeOf((*T)(nil)).Elem()
+
+	for x, fieldName := range fields {
+		// Split name to account for nesting.
+		names := strings.Split(fieldName, ".")
+
+		// Look for usable struct field.
+		i.fields[x] = find_field(t, names)
+	}
+
+	// Initialize index_entry list store.
+	i.data = make(map[Hash]*list, max+1)
 }
 
-// Hasher returns the hash checksummer associated with this index.
-func (i *Index[T]) Hasher() *Hasher[T] {
-	return &i.hasher
+func index_key[T any](i *Index[T], h *xxh3.Hasher, value T) ([]any, Hash, bool) {
+	key := extract_fields(value, i.fields)
+	sum, zero := hash_sum(i.fields, h, key)
+	if zero && !allow_zero(i.flags) {
+		var zero Hash
+		return nil, zero, false
+	}
+	return key, sum, true
 }
 
-func index_append[T any](c *Cache[T], i *Index[T], key uint64, res *result[T]) {
-	// Acquire + setup indexkey.
-	ikey := indexkey_acquire(c)
-	ikey.entry.Value = res
-	ikey.key = key
-	ikey.index = i
+func index_hash[T any](i *Index[T], h *xxh3.Hasher, key []any) (Hash, bool) {
+	sum, zero := hash_sum(i.fields, h, key)
+	if zero && !allow_zero(i.flags) {
+		var zero Hash
+		return zero, false
+	}
+	return sum, true
+}
 
-	// Append to result's indexkeys.
-	res.keys = append(res.keys, ikey)
+func index_get[T any](i *Index[T], hash Hash, key []any) *list {
+	l := i.data[hash]
+	if l == nil {
+		return nil
+	}
+	entry := (*index_entry)(l.head.data)
+	if !is_equal(entry.key, key) {
+		return l
+	}
+	return l
+}
 
+func index_append[T any](c *Cache[T], i *Index[T], hash Hash, key []any, res *result) {
 	// Get list at key.
-	l := i.data[key]
+	l := i.data[hash]
 
 	if l == nil {
 
 		// Allocate new list.
-		l = list_acquire(c)
-		i.data[key] = l
+		l = list_acquire()
+		i.data[hash] = l
 
-	} else if i.unique {
+	} else if entry := (*index_entry)(l.head.data); //nocollapse
+	!is_equal(entry.key, key) {
+
+		// Collision! Drop all.
+		delete(i.data, hash)
+
+		// Iterate entries in list.
+		for x := 0; x < l.len; x++ {
+
+			// Pop current head.
+			list_remove(l, l.head)
+
+			// Extract result.
+			res := entry.result
+
+			// Drop index entry from res.
+			result_drop_index(res, i)
+			if len(res.indexed) == 0 {
+
+				// Old res now unused,
+				// release to mem pool.
+				result_release(c, res)
+			}
+		}
+
+		return
 
-		// Remove currently
-		// indexed result.
-		old := l.head
-		l.remove(old)
+	} else if is_unique(i.flags) {
+
+		// Remove current
+		// indexed entry.
+		list_remove(l, l.head)
 
 		// Get ptr to old
-		// result before we
+		// entry before we
 		// release to pool.
-		res := old.Value
+		res := entry.result
 
 		// Drop this index's key from
 		// old res now not indexed here.
-		result_dropIndex(c, res, i)
-		if len(res.keys) == 0 {
+		result_drop_index(res, i)
+		if len(res.indexed) == 0 {
 
 			// Old res now unused,
 			// release to mem pool.
@@ -114,100 +231,162 @@ func index_append[T any](c *Cache[T], i *Index[T], key uint64, res *result[T]) {
 		}
 	}
 
-	// Add result indexkey to
-	// front of results list.
-	l.pushFront(&ikey.entry)
-}
-
-func index_deleteOne[T any](c *Cache[T], i *Index[T], ikey *indexkey[T]) {
-	// Get list at key.
-	l := i.data[ikey.key]
-	if l == nil {
-		return
-	}
-
-	// Remove from list.
-	l.remove(&ikey.entry)
-	if l.len == 0 {
+	// Acquire + setup index entry.
+	entry := index_entry_acquire()
+	entry.index = unsafe.Pointer(i)
+	entry.result = res
+	entry.key = key
+	entry.hash = hash
 
-		// Remove list from map.
-		delete(i.data, ikey.key)
+	// Append to result's indexed entries.
+	res.indexed = append(res.indexed, entry)
 
-		// Release list to pool.
-		list_release(c, l)
-	}
+	// Add index entry to index list.
+	list_push_front(l, &entry.elem)
 }
 
-func index_delete[T any](c *Cache[T], i *Index[T], key uint64, fn func(*result[T])) {
+func index_delete[T any](c *Cache[T], i *Index[T], hash Hash, key []any, fn func(*result)) {
 	if fn == nil {
 		panic("nil fn")
 	}
 
-	// Get list at key.
-	l := i.data[key]
+	// Get list at hash.
+	l := i.data[hash]
 	if l == nil {
 		return
 	}
 
-	// Delete data at key.
-	delete(i.data, key)
+	entry := (*index_entry)(l.head.data)
 
-	// Iterate results in list.
+	// Check contains expected key for hash.
+	if !is_equal(entry.key, key) {
+		return
+	}
+
+	// Delete data at hash.
+	delete(i.data, hash)
+
+	// Iterate entries in list.
 	for x := 0; x < l.len; x++ {
 
 		// Pop current head.
-		res := l.head.Value
-		l.remove(l.head)
+		entry := (*index_entry)(l.head.data)
+		list_remove(l, l.head)
 
-		// Delete index's key
-		// from result tracking.
-		result_dropIndex(c, res, i)
+		// Extract result.
+		res := entry.result
 
 		// Call hook.
 		fn(res)
+
+		// Drop index entry from res.
+		result_drop_index(res, i)
 	}
 
-	// Release list to pool.
-	list_release(c, l)
+	// Release to pool.
+	list_release(l)
 }
 
-type indexkey[T any] struct {
-	// linked list entry the related
-	// result is stored under in the
-	// Index.data[key] linked list.
-	entry elem[*result[T]]
+func index_delete_entry[T any](c *Cache[T], entry *index_entry) {
+	// Get from entry.
+	i := (*Index[T])(entry.index)
+
+	// Get list at hash sum.
+	l := i.data[entry.hash]
+	if l == nil {
+		return
+	}
+
+	// Remove entry from list.
+	list_remove(l, &entry.elem)
+	if l.len == 0 {
+
+		// Remove list from map.
+		delete(i.data, entry.hash)
+
+		// Release to pool.
+		list_release(l)
+	}
 
-	// key is the generated index key
-	// the related result is indexed
-	// under, in the below index.
-	key uint64
+	// Extract result.
+	res := entry.result
 
-	// index is the index that the
-	// related result is indexed in.
-	index *Index[T]
+	// Drop index entry from res.
+	result_drop_index(res, i)
 }
 
-func indexkey_acquire[T any](c *Cache[T]) *indexkey[T] {
-	var ikey *indexkey[T]
+var entry_pool sync.Pool
+
+type index_entry struct {
+	// elem contains the list element
+	// appended to each per-hash list
+	// within the Index{} type. the
+	// contained value is a self-ref.
+	elem list_elem
+
+	// index is the Index{} this
+	// index_entry{} is stored in.
+	index unsafe.Pointer
+
+	// result is the actual
+	// underlying result stored
+	// within the index. this
+	// also contains a ref to
+	// this *index_entry in order
+	// to track indices each result
+	// is currently stored under.
+	result *result
+
+	// key contains the actual
+	// key this item was stored
+	// under, used for collision
+	// check.
+	key []any
+
+	// hash contains computed
+	// hash checksum of .key.
+	hash Hash
+}
 
-	if len(c.keyPool) == 0 {
-		// Allocate new key.
-		ikey = new(indexkey[T])
-	} else {
-		// Pop result from pool slice.
-		ikey = c.keyPool[len(c.keyPool)-1]
-		c.keyPool = c.keyPool[:len(c.keyPool)-1]
+func index_entry_acquire() *index_entry {
+	// Acquire from pool.
+	v := entry_pool.Get()
+	if v == nil {
+		v = new(index_entry)
 	}
 
-	return ikey
+	// Cast index_entry value.
+	entry := v.(*index_entry)
+
+	// Set index list elem entry on itself.
+	entry.elem.data = unsafe.Pointer(entry)
+
+	return entry
 }
 
-func indexkey_release[T any](c *Cache[T], ikey *indexkey[T]) {
-	// Reset indexkey.
-	ikey.entry.Value = nil
-	ikey.key = 0
-	ikey.index = nil
+func index_entry_release(entry *index_entry) {
+	var zero Hash
 
-	// Release indexkey to memory pool.
-	c.keyPool = append(c.keyPool, ikey)
+	// Reset index entry.
+	entry.elem.data = nil
+	entry.index = nil
+	entry.result = nil
+	entry.key = nil
+	entry.hash = zero
+
+	// Release to pool.
+	entry_pool.Put(entry)
+}
+
+// is_equal returns whether 2 key slices are equal.
+func is_equal(k1, k2 []any) bool {
+	if len(k1) != len(k2) {
+		return false
+	}
+	for i := range k1 {
+		if k1[i] != k2[i] {
+			return false
+		}
+	}
+	return true
 }
author	kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com>	2024-01-29 15:13:53 +0000
committer	GitHub <noreply@github.com>	2024-01-29 15:13:53 +0000
commit	81198fa2d09f07bb54ec28f41b20f275e88a3254 (patch)
tree	14f0d24168e085c50c27f6dadbe1fd71e63faca1 /vendor/codeberg.org/gruf/go-structr/index.go
parent	[bugfix] Fix Postgres emoji delete, emoji category change (#2570) (diff)
download	gotosocial-81198fa2d09f07bb54ec28f41b20f275e88a3254.tar.xz