summaryrefslogtreecommitdiff
path: root/vendor/codeberg.org/gruf/go-split/splitter.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/codeberg.org/gruf/go-split/splitter.go')
-rw-r--r--vendor/codeberg.org/gruf/go-split/splitter.go204
1 files changed, 204 insertions, 0 deletions
diff --git a/vendor/codeberg.org/gruf/go-split/splitter.go b/vendor/codeberg.org/gruf/go-split/splitter.go
new file mode 100644
index 000000000..e87cd8454
--- /dev/null
+++ b/vendor/codeberg.org/gruf/go-split/splitter.go
@@ -0,0 +1,204 @@
+package split
+
+import (
+ "errors"
+ "strings"
+ "unicode"
+ "unicode/utf8"
+)
+
+// Splitter holds onto a byte buffer for use in minimising allocations during SplitFunc().
+type Splitter struct{ B []byte }
+
+// SplitFunc will split input string on commas, taking into account string quoting and
+// stripping extra whitespace, passing each split to the given function hook.
+func (s *Splitter) SplitFunc(str string, fn func(string) error) error {
+ for {
+ // Reset buffer
+ s.B = s.B[0:0]
+
+ // Trim leading space
+ str = trimLeadingSpace(str)
+
+ if len(str) < 1 {
+ // Reached end
+ return nil
+ }
+
+ switch {
+ // Single / double quoted
+ case str[0] == '\'', str[0] == '"':
+ // Calculate next string elem
+ i := 1 + s.next(str[1:], str[0])
+ if i == 0 /* i.e. if .next() returned -1 */ {
+ return errors.New("missing end quote")
+ }
+
+ // Pass next element to callback func
+ if err := fn(string(s.B)); err != nil {
+ return err
+ }
+
+ // Reslice + trim leading space
+ str = trimLeadingSpace(str[i+1:])
+
+ if len(str) < 1 {
+ // reached end
+ return nil
+ }
+
+ if str[0] != ',' {
+ // malformed element without comma after quote
+ return errors.New("missing comma separator")
+ }
+
+ // Skip comma
+ str = str[1:]
+
+ // Empty segment
+ case str[0] == ',':
+ str = str[1:]
+
+ // No quoting
+ default:
+ // Calculate next string elem
+ i := s.next(str, ',')
+
+ switch i {
+ // Reached end
+ case -1:
+ // we know len > 0
+
+ // Pass to callback
+ return fn(string(s.B))
+
+ // Empty elem
+ case 0:
+ str = str[1:]
+
+ // Non-zero elem
+ default:
+ // Pass next element to callback
+ if err := fn(string(s.B)); err != nil {
+ return err
+ }
+
+ // Skip past eleme
+ str = str[i+1:]
+ }
+ }
+ }
+}
+
+// next will build the next string element in s.B up to non-delimited instance of c,
+// returning number of characters iterated, or -1 if the end of the string was reached.
+func (s *Splitter) next(str string, c byte) int {
+ var delims int
+
+ // Guarantee buf large enough
+ if len(str) > cap(s.B)-len(s.B) {
+ nb := make([]byte, 2*cap(s.B)+len(str))
+ _ = copy(nb, s.B)
+ s.B = nb[:len(s.B)]
+ }
+
+ for i := 0; i < len(str); i++ {
+ // Increment delims
+ if str[i] == '\\' {
+ delims++
+ continue
+ }
+
+ if str[i] == c {
+ var count int
+
+ if count = delims / 2; count > 0 {
+ // Add backslashes to buffer
+ slashes := backslashes(count)
+ s.B = append(s.B, slashes...)
+ }
+
+ // Reached delim'd char
+ if delims-count == 0 {
+ return i
+ }
+ } else if delims > 0 {
+ // Add backslashes to buffer
+ slashes := backslashes(delims)
+ s.B = append(s.B, slashes...)
+ }
+
+ // Write byte to buffer
+ s.B = append(s.B, str[i])
+
+ // Reset count
+ delims = 0
+ }
+
+ return -1
+}
+
+// asciiSpace is a lookup table of ascii space chars (see: strings.asciiSet).
+var asciiSpace = func() (as [8]uint32) {
+ as['\t'/32] |= 1 << ('\t' % 32)
+ as['\n'/32] |= 1 << ('\n' % 32)
+ as['\v'/32] |= 1 << ('\v' % 32)
+ as['\f'/32] |= 1 << ('\f' % 32)
+ as['\r'/32] |= 1 << ('\r' % 32)
+ as[' '/32] |= 1 << (' ' % 32)
+ return
+}()
+
+// trimLeadingSpace trims the leading space from a string.
+func trimLeadingSpace(str string) string {
+ var start int
+
+ for ; start < len(str); start++ {
+ // If beyond ascii range, trim using slower rune check.
+ if str[start] >= utf8.RuneSelf {
+ return trimLeadingSpaceSlow(str[start:])
+ }
+
+ // Ascii character
+ char := str[start]
+
+ // This is first non-space ASCII, trim up to here
+ if (asciiSpace[char/32] & (1 << (char % 32))) == 0 {
+ break
+ }
+ }
+
+ return str[start:]
+}
+
+// trimLeadingSpaceSlow trims leading space using the slower unicode.IsSpace check.
+func trimLeadingSpaceSlow(str string) string {
+ for i, r := range str {
+ if !unicode.IsSpace(r) {
+ return str[i:]
+ }
+ }
+ return str
+}
+
+// backslashes will return a string of backslashes of given length.
+func backslashes(count int) string {
+ const backslashes = `\\\\\\\\\\\\\\\\\\\\`
+
+ // Fast-path, use string const
+ if count < len(backslashes) {
+ return backslashes[:count]
+ }
+
+ // Slow-path, build custom string
+ return backslashSlow(count)
+}
+
+// backslashSlow will build a string of backslashes of custom length.
+func backslashSlow(count int) string {
+ var buf strings.Builder
+ for i := 0; i < count; i++ {
+ buf.WriteByte('\\')
+ }
+ return buf.String()
+}