summaryrefslogtreecommitdiff
path: root/vendor/codeberg.org/gruf/go-split/splitter.go
blob: e87cd845446aa61d8f656af20b287adc01260afa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
package split

import (
	"errors"
	"strings"
	"unicode"
	"unicode/utf8"
)

// Splitter holds onto a byte buffer for use in minimising allocations during SplitFunc().
type Splitter struct{ B []byte }

// SplitFunc will split input string on commas, taking into account string quoting and
// stripping extra whitespace, passing each split to the given function hook.
func (s *Splitter) SplitFunc(str string, fn func(string) error) error {
	for {
		// Reset buffer
		s.B = s.B[0:0]

		// Trim leading space
		str = trimLeadingSpace(str)

		if len(str) < 1 {
			// Reached end
			return nil
		}

		switch {
		// Single / double quoted
		case str[0] == '\'', str[0] == '"':
			// Calculate next string elem
			i := 1 + s.next(str[1:], str[0])
			if i == 0 /* i.e. if .next() returned -1 */ {
				return errors.New("missing end quote")
			}

			// Pass next element to callback func
			if err := fn(string(s.B)); err != nil {
				return err
			}

			// Reslice + trim leading space
			str = trimLeadingSpace(str[i+1:])

			if len(str) < 1 {
				// reached end
				return nil
			}

			if str[0] != ',' {
				// malformed element without comma after quote
				return errors.New("missing comma separator")
			}

			// Skip comma
			str = str[1:]

		// Empty segment
		case str[0] == ',':
			str = str[1:]

		// No quoting
		default:
			// Calculate next string elem
			i := s.next(str, ',')

			switch i {
			// Reached end
			case -1:
				// we know len > 0

				// Pass to callback
				return fn(string(s.B))

			// Empty elem
			case 0:
				str = str[1:]

			// Non-zero elem
			default:
				// Pass next element to callback
				if err := fn(string(s.B)); err != nil {
					return err
				}

				// Skip past eleme
				str = str[i+1:]
			}
		}
	}
}

// next will build the next string element in s.B up to non-delimited instance of c,
// returning number of characters iterated, or -1 if the end of the string was reached.
func (s *Splitter) next(str string, c byte) int {
	var delims int

	// Guarantee buf large enough
	if len(str) > cap(s.B)-len(s.B) {
		nb := make([]byte, 2*cap(s.B)+len(str))
		_ = copy(nb, s.B)
		s.B = nb[:len(s.B)]
	}

	for i := 0; i < len(str); i++ {
		// Increment delims
		if str[i] == '\\' {
			delims++
			continue
		}

		if str[i] == c {
			var count int

			if count = delims / 2; count > 0 {
				// Add backslashes to buffer
				slashes := backslashes(count)
				s.B = append(s.B, slashes...)
			}

			// Reached delim'd char
			if delims-count == 0 {
				return i
			}
		} else if delims > 0 {
			// Add backslashes to buffer
			slashes := backslashes(delims)
			s.B = append(s.B, slashes...)
		}

		// Write byte to buffer
		s.B = append(s.B, str[i])

		// Reset count
		delims = 0
	}

	return -1
}

// asciiSpace is a lookup table of ascii space chars (see: strings.asciiSet).
var asciiSpace = func() (as [8]uint32) {
	as['\t'/32] |= 1 << ('\t' % 32)
	as['\n'/32] |= 1 << ('\n' % 32)
	as['\v'/32] |= 1 << ('\v' % 32)
	as['\f'/32] |= 1 << ('\f' % 32)
	as['\r'/32] |= 1 << ('\r' % 32)
	as[' '/32] |= 1 << (' ' % 32)
	return
}()

// trimLeadingSpace trims the leading space from a string.
func trimLeadingSpace(str string) string {
	var start int

	for ; start < len(str); start++ {
		// If beyond ascii range, trim using slower rune check.
		if str[start] >= utf8.RuneSelf {
			return trimLeadingSpaceSlow(str[start:])
		}

		// Ascii character
		char := str[start]

		// This is first non-space ASCII, trim up to here
		if (asciiSpace[char/32] & (1 << (char % 32))) == 0 {
			break
		}
	}

	return str[start:]
}

// trimLeadingSpaceSlow trims leading space using the slower unicode.IsSpace check.
func trimLeadingSpaceSlow(str string) string {
	for i, r := range str {
		if !unicode.IsSpace(r) {
			return str[i:]
		}
	}
	return str
}

// backslashes will return a string of backslashes of given length.
func backslashes(count int) string {
	const backslashes = `\\\\\\\\\\\\\\\\\\\\`

	// Fast-path, use string const
	if count < len(backslashes) {
		return backslashes[:count]
	}

	// Slow-path, build custom string
	return backslashSlow(count)
}

// backslashSlow will build a string of backslashes of custom length.
func backslashSlow(count int) string {
	var buf strings.Builder
	for i := 0; i < count; i++ {
		buf.WriteByte('\\')
	}
	return buf.String()
}