1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
|
package split
import (
"errors"
"strings"
"unicode"
"unicode/utf8"
)
// Splitter holds onto a byte buffer for use in minimising allocations during SplitFunc().
type Splitter struct{ B []byte }
// SplitFunc will split input string on commas, taking into account string quoting and
// stripping extra whitespace, passing each split to the given function hook.
func (s *Splitter) SplitFunc(str string, fn func(string) error) error {
for {
// Reset buffer
s.B = s.B[0:0]
// Trim leading space
str = trimLeadingSpace(str)
if len(str) < 1 {
// Reached end
return nil
}
switch {
// Single / double quoted
case str[0] == '\'', str[0] == '"':
// Calculate next string elem
i := 1 + s.next(str[1:], str[0])
if i == 0 /* i.e. if .next() returned -1 */ {
return errors.New("missing end quote")
}
// Pass next element to callback func
if err := fn(string(s.B)); err != nil {
return err
}
// Reslice + trim leading space
str = trimLeadingSpace(str[i+1:])
if len(str) < 1 {
// reached end
return nil
}
if str[0] != ',' {
// malformed element without comma after quote
return errors.New("missing comma separator")
}
// Skip comma
str = str[1:]
// Empty segment
case str[0] == ',':
str = str[1:]
// No quoting
default:
// Calculate next string elem
i := s.next(str, ',')
switch i {
// Reached end
case -1:
// we know len > 0
// Pass to callback
return fn(string(s.B))
// Empty elem
case 0:
str = str[1:]
// Non-zero elem
default:
// Pass next element to callback
if err := fn(string(s.B)); err != nil {
return err
}
// Skip past eleme
str = str[i+1:]
}
}
}
}
// next will build the next string element in s.B up to non-delimited instance of c,
// returning number of characters iterated, or -1 if the end of the string was reached.
func (s *Splitter) next(str string, c byte) int {
var delims int
// Guarantee buf large enough
if len(str) > cap(s.B)-len(s.B) {
nb := make([]byte, 2*cap(s.B)+len(str))
_ = copy(nb, s.B)
s.B = nb[:len(s.B)]
}
for i := 0; i < len(str); i++ {
// Increment delims
if str[i] == '\\' {
delims++
continue
}
if str[i] == c {
var count int
if count = delims / 2; count > 0 {
// Add backslashes to buffer
slashes := backslashes(count)
s.B = append(s.B, slashes...)
}
// Reached delim'd char
if delims-count == 0 {
return i
}
} else if delims > 0 {
// Add backslashes to buffer
slashes := backslashes(delims)
s.B = append(s.B, slashes...)
}
// Write byte to buffer
s.B = append(s.B, str[i])
// Reset count
delims = 0
}
return -1
}
// asciiSpace is a lookup table of ascii space chars (see: strings.asciiSet).
var asciiSpace = func() (as [8]uint32) {
as['\t'/32] |= 1 << ('\t' % 32)
as['\n'/32] |= 1 << ('\n' % 32)
as['\v'/32] |= 1 << ('\v' % 32)
as['\f'/32] |= 1 << ('\f' % 32)
as['\r'/32] |= 1 << ('\r' % 32)
as[' '/32] |= 1 << (' ' % 32)
return
}()
// trimLeadingSpace trims the leading space from a string.
func trimLeadingSpace(str string) string {
var start int
for ; start < len(str); start++ {
// If beyond ascii range, trim using slower rune check.
if str[start] >= utf8.RuneSelf {
return trimLeadingSpaceSlow(str[start:])
}
// Ascii character
char := str[start]
// This is first non-space ASCII, trim up to here
if (asciiSpace[char/32] & (1 << (char % 32))) == 0 {
break
}
}
return str[start:]
}
// trimLeadingSpaceSlow trims leading space using the slower unicode.IsSpace check.
func trimLeadingSpaceSlow(str string) string {
for i, r := range str {
if !unicode.IsSpace(r) {
return str[i:]
}
}
return str
}
// backslashes will return a string of backslashes of given length.
func backslashes(count int) string {
const backslashes = `\\\\\\\\\\\\\\\\\\\\`
// Fast-path, use string const
if count < len(backslashes) {
return backslashes[:count]
}
// Slow-path, build custom string
return backslashSlow(count)
}
// backslashSlow will build a string of backslashes of custom length.
func backslashSlow(count int) string {
var buf strings.Builder
for i := 0; i < count; i++ {
buf.WriteByte('\\')
}
return buf.String()
}
|