From e3c2b790fd4329494979bd27be7fa162600f1436 Mon Sep 17 00:00:00 2001 From: kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com> Date: Mon, 11 Nov 2024 15:45:19 +0000 Subject: [performance] minimise log field allocations (#3529) * when appending log field only do so by minimal amount * move slice utils to separate package to fix import cycle, add GrowJust() and AppendJust() functions * fix GrowJust() not returning slice of same length * improved xslices tests * make AppendJust() test check for slice contents, fix AppendJust() final copying behaviour * add a +1 with field growth to try minimise allocation for log 'msg' field --- internal/util/slices.go | 182 ---------------------------- internal/util/slices_test.go | 94 --------------- internal/util/xslices/slices.go | 223 +++++++++++++++++++++++++++++++++++ internal/util/xslices/slices_test.go | 162 +++++++++++++++++++++++++ 4 files changed, 385 insertions(+), 276 deletions(-) delete mode 100644 internal/util/slices.go delete mode 100644 internal/util/slices_test.go create mode 100644 internal/util/xslices/slices.go create mode 100644 internal/util/xslices/slices_test.go (limited to 'internal/util') diff --git a/internal/util/slices.go b/internal/util/slices.go deleted file mode 100644 index 955fe8830..000000000 --- a/internal/util/slices.go +++ /dev/null @@ -1,182 +0,0 @@ -// GoToSocial -// Copyright (C) GoToSocial Authors admin@gotosocial.org -// SPDX-License-Identifier: AGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package util - -import ( - "slices" -) - -// Deduplicate deduplicates entries in the given slice. -func Deduplicate[T comparable](in []T) []T { - var ( - inL = len(in) - unique = make(map[T]struct{}, inL) - deduped = make([]T, 0, inL) - ) - - for _, v := range in { - if _, ok := unique[v]; ok { - // Already have this. - continue - } - - unique[v] = struct{}{} - deduped = append(deduped, v) - } - - return deduped -} - -// DeduplicateFunc deduplicates entries in the given -// slice, using the result of key() to gauge uniqueness. -func DeduplicateFunc[T any, C comparable](in []T, key func(v T) C) []T { - var ( - inL = len(in) - unique = make(map[C]struct{}, inL) - deduped = make([]T, 0, inL) - ) - - if key == nil { - panic("nil func") - } - - for _, v := range in { - k := key(v) - - if _, ok := unique[k]; ok { - // Already have this. - continue - } - - unique[k] = struct{}{} - deduped = append(deduped, v) - } - - return deduped -} - -// Gather will collect the values of type V from input type []T, -// passing each item to 'get' and appending V to the return slice. -func Gather[T, V any](out []V, in []T, get func(T) V) []V { - if get == nil { - panic("nil func") - } - - // Starting write index - // in the resliced / re - // alloc'd output slice. - start := len(out) - - // Total required slice len. - total := start + len(in) - - if total > cap(out) { - // Reallocate output with - // capacity for total len. - out2 := make([]V, len(out), total) - copy(out2, out) - out = out2 - } - - // Reslice with capacity - // up to total required. - out = out[:total] - - // Gather vs from 'in'. - for i, v := range in { - j := start + i - out[j] = get(v) - } - - return out -} - -// GatherIf is functionally similar to Gather(), but only when return bool is true. -// If you don't need to check the boolean, Gather() will be very slightly faster. -func GatherIf[T, V any](out []V, in []T, get func(T) (V, bool)) []V { - if get == nil { - panic("nil func") - } - - if cap(out)-len(out) < len(in) { - // Reallocate output with capacity for 'in'. - out2 := make([]V, len(out), cap(out)+len(in)) - copy(out2, out) - out = out2 - } - - // Gather vs from 'in'. - for _, v := range in { - if v, ok := get(v); ok { - out = append(out, v) - } - } - - return out -} - -// Collate will collect the values of type K from input type []T, -// passing each item to 'get' and deduplicating the end result. -// This is equivalent to calling Gather() followed by Deduplicate(). -func Collate[T any, K comparable](in []T, get func(T) K) []K { - if get == nil { - panic("nil func") - } - - ks := make([]K, 0, len(in)) - km := make(map[K]struct{}, len(in)) - - for i := 0; i < len(in); i++ { - // Get next k. - k := get(in[i]) - - if _, ok := km[k]; !ok { - // New value, add - // to map + slice. - ks = append(ks, k) - km[k] = struct{}{} - } - } - - return ks -} - -// OrderBy orders a slice of given type by the provided alternative slice of comparable type. -func OrderBy[T any, K comparable](in []T, keys []K, key func(T) K) { - if key == nil { - panic("nil func") - } - - // Create lookup of keys->idx. - m := make(map[K]int, len(in)) - for i, k := range keys { - m[k] = i - } - - // Sort according to the reverse lookup. - slices.SortFunc(in, func(a, b T) int { - ai := m[key(a)] - bi := m[key(b)] - if ai < bi { - return -1 - } else if bi < ai { - return +1 - } - return 0 - }) -} diff --git a/internal/util/slices_test.go b/internal/util/slices_test.go deleted file mode 100644 index c93e489f5..000000000 --- a/internal/util/slices_test.go +++ /dev/null @@ -1,94 +0,0 @@ -// GoToSocial -// Copyright (C) GoToSocial Authors admin@gotosocial.org -// SPDX-License-Identifier: AGPL-3.0-or-later -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. -// -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -package util_test - -import ( - "net/url" - "slices" - "testing" - - "github.com/superseriousbusiness/gotosocial/internal/util" -) - -var ( - testURLSlice = []*url.URL{} -) - -func TestGather(t *testing.T) { - out := util.Gather(nil, []*url.URL{ - {Scheme: "https", Host: "google.com", Path: "/some-search"}, - {Scheme: "http", Host: "example.com", Path: "/robots.txt"}, - }, (*url.URL).String) - if !slices.Equal(out, []string{ - "https://google.com/some-search", - "http://example.com/robots.txt", - }) { - t.Fatal("unexpected gather output") - } - - out = util.Gather([]string{ - "starting input string", - "another starting input", - }, []*url.URL{ - {Scheme: "https", Host: "google.com", Path: "/some-search"}, - {Scheme: "http", Host: "example.com", Path: "/robots.txt"}, - }, (*url.URL).String) - if !slices.Equal(out, []string{ - "starting input string", - "another starting input", - "https://google.com/some-search", - "http://example.com/robots.txt", - }) { - t.Fatal("unexpected gather output") - } -} - -func TestGatherIf(t *testing.T) { - out := util.GatherIf(nil, []string{ - "hello world", - "not hello world", - "hello world", - }, func(s string) (string, bool) { - return s, s == "hello world" - }) - if !slices.Equal(out, []string{ - "hello world", - "hello world", - }) { - t.Fatal("unexpected gatherif output") - } - - out = util.GatherIf([]string{ - "starting input string", - "another starting input", - }, []string{ - "hello world", - "not hello world", - "hello world", - }, func(s string) (string, bool) { - return s, s == "hello world" - }) - if !slices.Equal(out, []string{ - "starting input string", - "another starting input", - "hello world", - "hello world", - }) { - t.Fatal("unexpected gatherif output") - } -} diff --git a/internal/util/xslices/slices.go b/internal/util/xslices/slices.go new file mode 100644 index 000000000..1c1c159b2 --- /dev/null +++ b/internal/util/xslices/slices.go @@ -0,0 +1,223 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package xslices + +import ( + "slices" +) + +// GrowJust increases slice capacity to guarantee +// extra room 'size', where in the case that it does +// need to allocate more it ONLY allocates 'size' extra. +// This is different to typical slices.Grow behaviour, +// which simply guarantees extra through append() which +// may allocate more than necessary extra size. +func GrowJust[T any](in []T, size int) []T { + + if cap(in)-len(in) < size { + // Reallocate enough for in + size. + in2 := make([]T, len(in), len(in)+size) + _ = copy(in2, in) + in = in2 + } + + return in +} + +// AppendJust appends extra elements to slice, +// ONLY allocating at most len(extra) elements. This +// is different to the typical append behaviour which +// will append extra, in a manner to reduce the need +// for new allocations on every call to append. +func AppendJust[T any](in []T, extra ...T) []T { + l := len(in) + + if cap(in)-l < len(extra) { + // Reallocate enough for + extra. + in2 := make([]T, l+len(extra)) + _ = copy(in2, in) + in = in2 + } else { + // Reslice for + extra. + in = in[:l+len(extra)] + } + + // Copy extra into slice. + _ = copy(in[l:], extra) + return in +} + +// Deduplicate deduplicates entries in the given slice. +func Deduplicate[T comparable](in []T) []T { + var ( + inL = len(in) + unique = make(map[T]struct{}, inL) + deduped = make([]T, 0, inL) + ) + + for _, v := range in { + if _, ok := unique[v]; ok { + // Already have this. + continue + } + + unique[v] = struct{}{} + deduped = append(deduped, v) + } + + return deduped +} + +// DeduplicateFunc deduplicates entries in the given +// slice, using the result of key() to gauge uniqueness. +func DeduplicateFunc[T any, C comparable](in []T, key func(v T) C) []T { + var ( + inL = len(in) + unique = make(map[C]struct{}, inL) + deduped = make([]T, 0, inL) + ) + + if key == nil { + panic("nil func") + } + + for _, v := range in { + k := key(v) + + if _, ok := unique[k]; ok { + // Already have this. + continue + } + + unique[k] = struct{}{} + deduped = append(deduped, v) + } + + return deduped +} + +// Gather will collect the values of type V from input type []T, +// passing each item to 'get' and appending V to the return slice. +func Gather[T, V any](out []V, in []T, get func(T) V) []V { + if get == nil { + panic("nil func") + } + + // Starting write index + // in the resliced / re + // alloc'd output slice. + start := len(out) + + // Total required slice len. + total := start + len(in) + + if total > cap(out) { + // Reallocate output with + // capacity for total len. + out2 := make([]V, len(out), total) + copy(out2, out) + out = out2 + } + + // Reslice with capacity + // up to total required. + out = out[:total] + + // Gather vs from 'in'. + for i, v := range in { + j := start + i + out[j] = get(v) + } + + return out +} + +// GatherIf is functionally similar to Gather(), but only when return bool is true. +// If you don't need to check the boolean, Gather() will be very slightly faster. +func GatherIf[T, V any](out []V, in []T, get func(T) (V, bool)) []V { + if get == nil { + panic("nil func") + } + + if cap(out)-len(out) < len(in) { + // Reallocate output with capacity for 'in'. + out2 := make([]V, len(out), cap(out)+len(in)) + copy(out2, out) + out = out2 + } + + // Gather vs from 'in'. + for _, v := range in { + if v, ok := get(v); ok { + out = append(out, v) + } + } + + return out +} + +// Collate will collect the values of type K from input type []T, +// passing each item to 'get' and deduplicating the end result. +// This is equivalent to calling Gather() followed by Deduplicate(). +func Collate[T any, K comparable](in []T, get func(T) K) []K { + if get == nil { + panic("nil func") + } + + ks := make([]K, 0, len(in)) + km := make(map[K]struct{}, len(in)) + + for i := 0; i < len(in); i++ { + // Get next k. + k := get(in[i]) + + if _, ok := km[k]; !ok { + // New value, add + // to map + slice. + ks = append(ks, k) + km[k] = struct{}{} + } + } + + return ks +} + +// OrderBy orders a slice of given type by the provided alternative slice of comparable type. +func OrderBy[T any, K comparable](in []T, keys []K, key func(T) K) { + if key == nil { + panic("nil func") + } + + // Create lookup of keys->idx. + m := make(map[K]int, len(in)) + for i, k := range keys { + m[k] = i + } + + // Sort according to the reverse lookup. + slices.SortFunc(in, func(a, b T) int { + ai := m[key(a)] + bi := m[key(b)] + if ai < bi { + return -1 + } else if bi < ai { + return +1 + } + return 0 + }) +} diff --git a/internal/util/xslices/slices_test.go b/internal/util/xslices/slices_test.go new file mode 100644 index 000000000..7c62ac77f --- /dev/null +++ b/internal/util/xslices/slices_test.go @@ -0,0 +1,162 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package xslices_test + +import ( + "math/rand" + "net/url" + "slices" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/superseriousbusiness/gotosocial/internal/util/xslices" +) + +func TestGrowJust(t *testing.T) { + for _, l := range []int{0, 2, 4, 8, 16, 32, 64} { + for _, x := range []int{0, 2, 4, 8, 16, 32, 64} { + s := make([]int, l, l+x) + for _, g := range []int{0, 2, 4, 8, 16, 32, 64} { + s2 := xslices.GrowJust(s, g) + + // Slice length should not be different. + assert.Equal(t, len(s), len(s2)) + + switch { + // If slice already has capacity for + // 'g' then it should not be changed. + case cap(s) >= len(s)+g: + assert.Equal(t, cap(s), cap(s2)) + + // Else, returned slice should only + // have capacity for original length + // plus extra elements, NOTHING MORE. + default: + assert.Equal(t, cap(s2), len(s)+g) + } + } + } + } +} + +func TestAppendJust(t *testing.T) { + for _, l := range []int{0, 2, 4, 8, 16, 32, 64} { + for _, x := range []int{0, 2, 4, 8, 16, 32, 64} { + s := make([]int, l, l+x) + + // Randomize slice. + for i := range s { + s[i] = rand.Int() + } + + for _, a := range []int{0, 2, 4, 8, 16, 32, 64} { + toAppend := make([]int, a) + + // Randomize appended vals. + for i := range toAppend { + toAppend[i] = rand.Int() + } + + s2 := xslices.AppendJust(s, toAppend...) + + // Slice length should be as expected. + assert.Equal(t, len(s)+a, len(s2)) + + // Slice contents should be as expected. + assert.Equal(t, append(s, toAppend...), s2) + + switch { + // If slice already has capacity for + // 'toAppend' then it should not change. + case cap(s) >= len(s)+a: + assert.Equal(t, cap(s), cap(s2)) + + // Else, returned slice should only + // have capacity for original length + // plus extra elements, NOTHING MORE. + default: + assert.Equal(t, len(s)+a, cap(s2)) + } + } + } + } +} + +func TestGather(t *testing.T) { + out := xslices.Gather(nil, []*url.URL{ + {Scheme: "https", Host: "google.com", Path: "/some-search"}, + {Scheme: "http", Host: "example.com", Path: "/robots.txt"}, + }, (*url.URL).String) + if !slices.Equal(out, []string{ + "https://google.com/some-search", + "http://example.com/robots.txt", + }) { + t.Fatal("unexpected gather output") + } + + out = xslices.Gather([]string{ + "starting input string", + "another starting input", + }, []*url.URL{ + {Scheme: "https", Host: "google.com", Path: "/some-search"}, + {Scheme: "http", Host: "example.com", Path: "/robots.txt"}, + }, (*url.URL).String) + if !slices.Equal(out, []string{ + "starting input string", + "another starting input", + "https://google.com/some-search", + "http://example.com/robots.txt", + }) { + t.Fatal("unexpected gather output") + } +} + +func TestGatherIf(t *testing.T) { + out := xslices.GatherIf(nil, []string{ + "hello world", + "not hello world", + "hello world", + }, func(s string) (string, bool) { + return s, s == "hello world" + }) + if !slices.Equal(out, []string{ + "hello world", + "hello world", + }) { + t.Fatal("unexpected gatherif output") + } + + out = xslices.GatherIf([]string{ + "starting input string", + "another starting input", + }, []string{ + "hello world", + "not hello world", + "hello world", + }, func(s string) (string, bool) { + return s, s == "hello world" + }) + if !slices.Equal(out, []string{ + "starting input string", + "another starting input", + "hello world", + "hello world", + }) { + t.Fatal("unexpected gatherif output") + } +} -- cgit v1.3