diff options
Diffstat (limited to 'vendor/codeberg.org/gruf/go-split/splitter.go')
| -rw-r--r-- | vendor/codeberg.org/gruf/go-split/splitter.go | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/vendor/codeberg.org/gruf/go-split/splitter.go b/vendor/codeberg.org/gruf/go-split/splitter.go new file mode 100644 index 000000000..e87cd8454 --- /dev/null +++ b/vendor/codeberg.org/gruf/go-split/splitter.go @@ -0,0 +1,204 @@ +package split + +import ( + "errors" + "strings" + "unicode" + "unicode/utf8" +) + +// Splitter holds onto a byte buffer for use in minimising allocations during SplitFunc(). +type Splitter struct{ B []byte } + +// SplitFunc will split input string on commas, taking into account string quoting and +// stripping extra whitespace, passing each split to the given function hook. +func (s *Splitter) SplitFunc(str string, fn func(string) error) error { + for { + // Reset buffer + s.B = s.B[0:0] + + // Trim leading space + str = trimLeadingSpace(str) + + if len(str) < 1 { + // Reached end + return nil + } + + switch { + // Single / double quoted + case str[0] == '\'', str[0] == '"': + // Calculate next string elem + i := 1 + s.next(str[1:], str[0]) + if i == 0 /* i.e. if .next() returned -1 */ { + return errors.New("missing end quote") + } + + // Pass next element to callback func + if err := fn(string(s.B)); err != nil { + return err + } + + // Reslice + trim leading space + str = trimLeadingSpace(str[i+1:]) + + if len(str) < 1 { + // reached end + return nil + } + + if str[0] != ',' { + // malformed element without comma after quote + return errors.New("missing comma separator") + } + + // Skip comma + str = str[1:] + + // Empty segment + case str[0] == ',': + str = str[1:] + + // No quoting + default: + // Calculate next string elem + i := s.next(str, ',') + + switch i { + // Reached end + case -1: + // we know len > 0 + + // Pass to callback + return fn(string(s.B)) + + // Empty elem + case 0: + str = str[1:] + + // Non-zero elem + default: + // Pass next element to callback + if err := fn(string(s.B)); err != nil { + return err + } + + // Skip past eleme + str = str[i+1:] + } + } + } +} + +// next will build the next string element in s.B up to non-delimited instance of c, +// returning number of characters iterated, or -1 if the end of the string was reached. +func (s *Splitter) next(str string, c byte) int { + var delims int + + // Guarantee buf large enough + if len(str) > cap(s.B)-len(s.B) { + nb := make([]byte, 2*cap(s.B)+len(str)) + _ = copy(nb, s.B) + s.B = nb[:len(s.B)] + } + + for i := 0; i < len(str); i++ { + // Increment delims + if str[i] == '\\' { + delims++ + continue + } + + if str[i] == c { + var count int + + if count = delims / 2; count > 0 { + // Add backslashes to buffer + slashes := backslashes(count) + s.B = append(s.B, slashes...) + } + + // Reached delim'd char + if delims-count == 0 { + return i + } + } else if delims > 0 { + // Add backslashes to buffer + slashes := backslashes(delims) + s.B = append(s.B, slashes...) + } + + // Write byte to buffer + s.B = append(s.B, str[i]) + + // Reset count + delims = 0 + } + + return -1 +} + +// asciiSpace is a lookup table of ascii space chars (see: strings.asciiSet). +var asciiSpace = func() (as [8]uint32) { + as['\t'/32] |= 1 << ('\t' % 32) + as['\n'/32] |= 1 << ('\n' % 32) + as['\v'/32] |= 1 << ('\v' % 32) + as['\f'/32] |= 1 << ('\f' % 32) + as['\r'/32] |= 1 << ('\r' % 32) + as[' '/32] |= 1 << (' ' % 32) + return +}() + +// trimLeadingSpace trims the leading space from a string. +func trimLeadingSpace(str string) string { + var start int + + for ; start < len(str); start++ { + // If beyond ascii range, trim using slower rune check. + if str[start] >= utf8.RuneSelf { + return trimLeadingSpaceSlow(str[start:]) + } + + // Ascii character + char := str[start] + + // This is first non-space ASCII, trim up to here + if (asciiSpace[char/32] & (1 << (char % 32))) == 0 { + break + } + } + + return str[start:] +} + +// trimLeadingSpaceSlow trims leading space using the slower unicode.IsSpace check. +func trimLeadingSpaceSlow(str string) string { + for i, r := range str { + if !unicode.IsSpace(r) { + return str[i:] + } + } + return str +} + +// backslashes will return a string of backslashes of given length. +func backslashes(count int) string { + const backslashes = `\\\\\\\\\\\\\\\\\\\\` + + // Fast-path, use string const + if count < len(backslashes) { + return backslashes[:count] + } + + // Slow-path, build custom string + return backslashSlow(count) +} + +// backslashSlow will build a string of backslashes of custom length. +func backslashSlow(count int) string { + var buf strings.Builder + for i := 0; i < count; i++ { + buf.WriteByte('\\') + } + return buf.String() +} |
