diff options
Diffstat (limited to 'vendor/github.com/golang/geo/s2/lexicon.go')
-rw-r--r-- | vendor/github.com/golang/geo/s2/lexicon.go | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/vendor/github.com/golang/geo/s2/lexicon.go b/vendor/github.com/golang/geo/s2/lexicon.go new file mode 100644 index 000000000..41cbffdc2 --- /dev/null +++ b/vendor/github.com/golang/geo/s2/lexicon.go @@ -0,0 +1,175 @@ +// Copyright 2020 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package s2 + +import ( + "encoding/binary" + "hash/adler32" + "math" + "sort" +) + +// TODO(roberts): If any of these are worth making public, change the +// method signatures and type names. + +// emptySetID represents the last ID that will ever be generated. +// (Non-negative IDs are reserved for singleton sets.) +var emptySetID = int32(math.MinInt32) + +// idSetLexicon compactly represents a set of non-negative +// integers such as array indices ("ID sets"). It is especially suitable when +// either (1) there are many duplicate sets, or (2) there are many singleton +// or empty sets. See also sequenceLexicon. +// +// Each distinct ID set is mapped to a 32-bit integer. Empty and singleton +// sets take up no additional space; the set itself is represented +// by the unique ID assigned to the set. Duplicate sets are automatically +// eliminated. Note also that ID sets are referred to using 32-bit integers +// rather than pointers. +type idSetLexicon struct { + idSets *sequenceLexicon +} + +func newIDSetLexicon() *idSetLexicon { + return &idSetLexicon{ + idSets: newSequenceLexicon(), + } +} + +// add adds the given set of integers to the lexicon if it is not already +// present, and return the unique ID for this set. The values are automatically +// sorted and duplicates are removed. +// +// The primary difference between this and sequenceLexicon are: +// 1. Empty and singleton sets are represented implicitly; they use no space. +// 2. Sets are represented rather than sequences; the ordering of values is +// not important and duplicates are removed. +// 3. The values must be 32-bit non-negative integers only. +func (l *idSetLexicon) add(ids ...int32) int32 { + // Empty sets have a special ID chosen not to conflict with other IDs. + if len(ids) == 0 { + return emptySetID + } + + // Singleton sets are represented by their element. + if len(ids) == 1 { + return ids[0] + } + + // Canonicalize the set by sorting and removing duplicates. + // + // Creates a new slice in order to not alter the supplied values. + set := uniqueInt32s(ids) + + // Non-singleton sets are represented by the bitwise complement of the ID + // returned by the sequenceLexicon + return ^l.idSets.add(set) +} + +// idSet returns the set of integers corresponding to an ID returned by add. +func (l *idSetLexicon) idSet(setID int32) []int32 { + if setID >= 0 { + return []int32{setID} + } + if setID == emptySetID { + return []int32{} + } + + return l.idSets.sequence(^setID) +} + +func (l *idSetLexicon) clear() { + l.idSets.clear() +} + +// sequenceLexicon compactly represents a sequence of values (e.g., tuples). +// It automatically eliminates duplicates slices, and maps the remaining +// sequences to sequentially increasing integer IDs. See also idSetLexicon. +// +// Each distinct sequence is mapped to a 32-bit integer. +type sequenceLexicon struct { + values []int32 + begins []uint32 + + // idSet is a mapping of a sequence hash to sequence index in the lexicon. + idSet map[uint32]int32 +} + +func newSequenceLexicon() *sequenceLexicon { + return &sequenceLexicon{ + begins: []uint32{0}, + idSet: make(map[uint32]int32), + } +} + +// clears all data from the lexicon. +func (l *sequenceLexicon) clear() { + l.values = nil + l.begins = []uint32{0} + l.idSet = make(map[uint32]int32) +} + +// add adds the given value to the lexicon if it is not already present, and +// returns its ID. IDs are assigned sequentially starting from zero. +func (l *sequenceLexicon) add(ids []int32) int32 { + if id, ok := l.idSet[hashSet(ids)]; ok { + return id + } + for _, v := range ids { + l.values = append(l.values, v) + } + l.begins = append(l.begins, uint32(len(l.values))) + + id := int32(len(l.begins)) - 2 + l.idSet[hashSet(ids)] = id + + return id +} + +// sequence returns the original sequence of values for the given ID. +func (l *sequenceLexicon) sequence(id int32) []int32 { + return l.values[l.begins[id]:l.begins[id+1]] +} + +// size reports the number of value sequences in the lexicon. +func (l *sequenceLexicon) size() int { + // Subtract one because the list of begins starts out with the first element set to 0. + return len(l.begins) - 1 +} + +// hash returns a hash of this sequence of int32s. +func hashSet(s []int32) uint32 { + // TODO(roberts): We just need a way to nicely hash all the values down to + // a 32-bit value. To ensure no unnecessary dependencies we use the core + // library types available to do this. Is there a better option? + a := adler32.New() + binary.Write(a, binary.LittleEndian, s) + return a.Sum32() +} + +// uniqueInt32s returns the sorted and uniqued set of int32s from the input. +func uniqueInt32s(in []int32) []int32 { + var vals []int32 + m := make(map[int32]bool) + for _, i := range in { + if m[i] { + continue + } + m[i] = true + vals = append(vals, i) + } + sort.Slice(vals, func(i, j int) bool { return vals[i] < vals[j] }) + return vals +} |