summaryrefslogtreecommitdiff
path: root/vendor/github.com/goccy/go-json/internal/encoder/string.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/goccy/go-json/internal/encoder/string.go')
-rw-r--r--vendor/github.com/goccy/go-json/internal/encoder/string.go637
1 files changed, 637 insertions, 0 deletions
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/string.go b/vendor/github.com/goccy/go-json/internal/encoder/string.go
new file mode 100644
index 000000000..26da31a5e
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/string.go
@@ -0,0 +1,637 @@
+package encoder
+
+import (
+ "math/bits"
+ "reflect"
+ "unicode/utf8"
+ "unsafe"
+)
+
+const (
+ lsb = 0x0101010101010101
+ msb = 0x8080808080808080
+)
+
+var needEscapeWithHTML = [256]bool{
+ '"': true,
+ '&': true,
+ '<': true,
+ '>': true,
+ '\\': true,
+ 0x00: true,
+ 0x01: true,
+ 0x02: true,
+ 0x03: true,
+ 0x04: true,
+ 0x05: true,
+ 0x06: true,
+ 0x07: true,
+ 0x08: true,
+ 0x09: true,
+ 0x0a: true,
+ 0x0b: true,
+ 0x0c: true,
+ 0x0d: true,
+ 0x0e: true,
+ 0x0f: true,
+ 0x10: true,
+ 0x11: true,
+ 0x12: true,
+ 0x13: true,
+ 0x14: true,
+ 0x15: true,
+ 0x16: true,
+ 0x17: true,
+ 0x18: true,
+ 0x19: true,
+ 0x1a: true,
+ 0x1b: true,
+ 0x1c: true,
+ 0x1d: true,
+ 0x1e: true,
+ 0x1f: true,
+ /* 0x20 - 0x7f */
+ 0x80: true,
+ 0x81: true,
+ 0x82: true,
+ 0x83: true,
+ 0x84: true,
+ 0x85: true,
+ 0x86: true,
+ 0x87: true,
+ 0x88: true,
+ 0x89: true,
+ 0x8a: true,
+ 0x8b: true,
+ 0x8c: true,
+ 0x8d: true,
+ 0x8e: true,
+ 0x8f: true,
+ 0x90: true,
+ 0x91: true,
+ 0x92: true,
+ 0x93: true,
+ 0x94: true,
+ 0x95: true,
+ 0x96: true,
+ 0x97: true,
+ 0x98: true,
+ 0x99: true,
+ 0x9a: true,
+ 0x9b: true,
+ 0x9c: true,
+ 0x9d: true,
+ 0x9e: true,
+ 0x9f: true,
+ 0xa0: true,
+ 0xa1: true,
+ 0xa2: true,
+ 0xa3: true,
+ 0xa4: true,
+ 0xa5: true,
+ 0xa6: true,
+ 0xa7: true,
+ 0xa8: true,
+ 0xa9: true,
+ 0xaa: true,
+ 0xab: true,
+ 0xac: true,
+ 0xad: true,
+ 0xae: true,
+ 0xaf: true,
+ 0xb0: true,
+ 0xb1: true,
+ 0xb2: true,
+ 0xb3: true,
+ 0xb4: true,
+ 0xb5: true,
+ 0xb6: true,
+ 0xb7: true,
+ 0xb8: true,
+ 0xb9: true,
+ 0xba: true,
+ 0xbb: true,
+ 0xbc: true,
+ 0xbd: true,
+ 0xbe: true,
+ 0xbf: true,
+ 0xc0: true,
+ 0xc1: true,
+ 0xc2: true,
+ 0xc3: true,
+ 0xc4: true,
+ 0xc5: true,
+ 0xc6: true,
+ 0xc7: true,
+ 0xc8: true,
+ 0xc9: true,
+ 0xca: true,
+ 0xcb: true,
+ 0xcc: true,
+ 0xcd: true,
+ 0xce: true,
+ 0xcf: true,
+ 0xd0: true,
+ 0xd1: true,
+ 0xd2: true,
+ 0xd3: true,
+ 0xd4: true,
+ 0xd5: true,
+ 0xd6: true,
+ 0xd7: true,
+ 0xd8: true,
+ 0xd9: true,
+ 0xda: true,
+ 0xdb: true,
+ 0xdc: true,
+ 0xdd: true,
+ 0xde: true,
+ 0xdf: true,
+ 0xe0: true,
+ 0xe1: true,
+ 0xe2: true,
+ 0xe3: true,
+ 0xe4: true,
+ 0xe5: true,
+ 0xe6: true,
+ 0xe7: true,
+ 0xe8: true,
+ 0xe9: true,
+ 0xea: true,
+ 0xeb: true,
+ 0xec: true,
+ 0xed: true,
+ 0xee: true,
+ 0xef: true,
+ 0xf0: true,
+ 0xf1: true,
+ 0xf2: true,
+ 0xf3: true,
+ 0xf4: true,
+ 0xf5: true,
+ 0xf6: true,
+ 0xf7: true,
+ 0xf8: true,
+ 0xf9: true,
+ 0xfa: true,
+ 0xfb: true,
+ 0xfc: true,
+ 0xfd: true,
+ 0xfe: true,
+ 0xff: true,
+}
+
+var needEscape = [256]bool{
+ '"': true,
+ '\\': true,
+ 0x00: true,
+ 0x01: true,
+ 0x02: true,
+ 0x03: true,
+ 0x04: true,
+ 0x05: true,
+ 0x06: true,
+ 0x07: true,
+ 0x08: true,
+ 0x09: true,
+ 0x0a: true,
+ 0x0b: true,
+ 0x0c: true,
+ 0x0d: true,
+ 0x0e: true,
+ 0x0f: true,
+ 0x10: true,
+ 0x11: true,
+ 0x12: true,
+ 0x13: true,
+ 0x14: true,
+ 0x15: true,
+ 0x16: true,
+ 0x17: true,
+ 0x18: true,
+ 0x19: true,
+ 0x1a: true,
+ 0x1b: true,
+ 0x1c: true,
+ 0x1d: true,
+ 0x1e: true,
+ 0x1f: true,
+ /* 0x20 - 0x7f */
+ 0x80: true,
+ 0x81: true,
+ 0x82: true,
+ 0x83: true,
+ 0x84: true,
+ 0x85: true,
+ 0x86: true,
+ 0x87: true,
+ 0x88: true,
+ 0x89: true,
+ 0x8a: true,
+ 0x8b: true,
+ 0x8c: true,
+ 0x8d: true,
+ 0x8e: true,
+ 0x8f: true,
+ 0x90: true,
+ 0x91: true,
+ 0x92: true,
+ 0x93: true,
+ 0x94: true,
+ 0x95: true,
+ 0x96: true,
+ 0x97: true,
+ 0x98: true,
+ 0x99: true,
+ 0x9a: true,
+ 0x9b: true,
+ 0x9c: true,
+ 0x9d: true,
+ 0x9e: true,
+ 0x9f: true,
+ 0xa0: true,
+ 0xa1: true,
+ 0xa2: true,
+ 0xa3: true,
+ 0xa4: true,
+ 0xa5: true,
+ 0xa6: true,
+ 0xa7: true,
+ 0xa8: true,
+ 0xa9: true,
+ 0xaa: true,
+ 0xab: true,
+ 0xac: true,
+ 0xad: true,
+ 0xae: true,
+ 0xaf: true,
+ 0xb0: true,
+ 0xb1: true,
+ 0xb2: true,
+ 0xb3: true,
+ 0xb4: true,
+ 0xb5: true,
+ 0xb6: true,
+ 0xb7: true,
+ 0xb8: true,
+ 0xb9: true,
+ 0xba: true,
+ 0xbb: true,
+ 0xbc: true,
+ 0xbd: true,
+ 0xbe: true,
+ 0xbf: true,
+ 0xc0: true,
+ 0xc1: true,
+ 0xc2: true,
+ 0xc3: true,
+ 0xc4: true,
+ 0xc5: true,
+ 0xc6: true,
+ 0xc7: true,
+ 0xc8: true,
+ 0xc9: true,
+ 0xca: true,
+ 0xcb: true,
+ 0xcc: true,
+ 0xcd: true,
+ 0xce: true,
+ 0xcf: true,
+ 0xd0: true,
+ 0xd1: true,
+ 0xd2: true,
+ 0xd3: true,
+ 0xd4: true,
+ 0xd5: true,
+ 0xd6: true,
+ 0xd7: true,
+ 0xd8: true,
+ 0xd9: true,
+ 0xda: true,
+ 0xdb: true,
+ 0xdc: true,
+ 0xdd: true,
+ 0xde: true,
+ 0xdf: true,
+ 0xe0: true,
+ 0xe1: true,
+ 0xe2: true,
+ 0xe3: true,
+ 0xe4: true,
+ 0xe5: true,
+ 0xe6: true,
+ 0xe7: true,
+ 0xe8: true,
+ 0xe9: true,
+ 0xea: true,
+ 0xeb: true,
+ 0xec: true,
+ 0xed: true,
+ 0xee: true,
+ 0xef: true,
+ 0xf0: true,
+ 0xf1: true,
+ 0xf2: true,
+ 0xf3: true,
+ 0xf4: true,
+ 0xf5: true,
+ 0xf6: true,
+ 0xf7: true,
+ 0xf8: true,
+ 0xf9: true,
+ 0xfa: true,
+ 0xfb: true,
+ 0xfc: true,
+ 0xfd: true,
+ 0xfe: true,
+ 0xff: true,
+}
+
+var hex = "0123456789abcdef"
+
+// escapeIndex finds the index of the first char in `s` that requires escaping.
+// A char requires escaping if it's outside of the range of [0x20, 0x7F] or if
+// it includes a double quote or backslash.
+// If no chars in `s` require escaping, the return value is -1.
+func escapeIndex(s string) int {
+ chunks := stringToUint64Slice(s)
+ for _, n := range chunks {
+ // combine masks before checking for the MSB of each byte. We include
+ // `n` in the mask to check whether any of the *input* byte MSBs were
+ // set (i.e. the byte was outside the ASCII range).
+ mask := n | below(n, 0x20) | contains(n, '"') | contains(n, '\\')
+ if (mask & msb) != 0 {
+ return bits.TrailingZeros64(mask&msb) / 8
+ }
+ }
+
+ valLen := len(s)
+ for i := len(chunks) * 8; i < valLen; i++ {
+ if needEscape[s[i]] {
+ return i
+ }
+ }
+
+ return -1
+}
+
+// below return a mask that can be used to determine if any of the bytes
+// in `n` are below `b`. If a byte's MSB is set in the mask then that byte was
+// below `b`. The result is only valid if `b`, and each byte in `n`, is below
+// 0x80.
+func below(n uint64, b byte) uint64 {
+ return n - expand(b)
+}
+
+// contains returns a mask that can be used to determine if any of the
+// bytes in `n` are equal to `b`. If a byte's MSB is set in the mask then
+// that byte is equal to `b`. The result is only valid if `b`, and each
+// byte in `n`, is below 0x80.
+func contains(n uint64, b byte) uint64 {
+ return (n ^ expand(b)) - lsb
+}
+
+// expand puts the specified byte into each of the 8 bytes of a uint64.
+func expand(b byte) uint64 {
+ return lsb * uint64(b)
+}
+
+//nolint:govet
+func stringToUint64Slice(s string) []uint64 {
+ return *(*[]uint64)(unsafe.Pointer(&reflect.SliceHeader{
+ Data: ((*reflect.StringHeader)(unsafe.Pointer(&s))).Data,
+ Len: len(s) / 8,
+ Cap: len(s) / 8,
+ }))
+}
+
+func AppendEscapedString(buf []byte, s string) []byte {
+ valLen := len(s)
+ if valLen == 0 {
+ return append(buf, `""`...)
+ }
+ buf = append(buf, '"')
+ var (
+ i, j int
+ )
+ if valLen >= 8 {
+ chunks := stringToUint64Slice(s)
+ for _, n := range chunks {
+ // combine masks before checking for the MSB of each byte. We include
+ // `n` in the mask to check whether any of the *input* byte MSBs were
+ // set (i.e. the byte was outside the ASCII range).
+ mask := n | (n - (lsb * 0x20)) |
+ ((n ^ (lsb * '"')) - lsb) |
+ ((n ^ (lsb * '\\')) - lsb) |
+ ((n ^ (lsb * '<')) - lsb) |
+ ((n ^ (lsb * '>')) - lsb) |
+ ((n ^ (lsb * '&')) - lsb)
+ if (mask & msb) != 0 {
+ j = bits.TrailingZeros64(mask&msb) / 8
+ goto ESCAPE_END
+ }
+ }
+ for i := len(chunks) * 8; i < valLen; i++ {
+ if needEscapeWithHTML[s[i]] {
+ j = i
+ goto ESCAPE_END
+ }
+ }
+ // no found any escape characters.
+ return append(append(buf, s...), '"')
+ }
+ESCAPE_END:
+ for j < valLen {
+ c := s[j]
+
+ if !needEscapeWithHTML[c] {
+ // fast path: most of the time, printable ascii characters are used
+ j++
+ continue
+ }
+
+ switch c {
+ case '\\', '"':
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, '\\', c)
+ i = j + 1
+ j = j + 1
+ continue
+
+ case '\n':
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, '\\', 'n')
+ i = j + 1
+ j = j + 1
+ continue
+
+ case '\r':
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, '\\', 'r')
+ i = j + 1
+ j = j + 1
+ continue
+
+ case '\t':
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, '\\', 't')
+ i = j + 1
+ j = j + 1
+ continue
+
+ case '<', '>', '&':
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, `\u00`...)
+ buf = append(buf, hex[c>>4], hex[c&0xF])
+ i = j + 1
+ j = j + 1
+ continue
+ }
+
+ // This encodes bytes < 0x20 except for \t, \n and \r.
+ if c < 0x20 {
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, `\u00`...)
+ buf = append(buf, hex[c>>4], hex[c&0xF])
+ i = j + 1
+ j = j + 1
+ continue
+ }
+
+ r, size := utf8.DecodeRuneInString(s[j:])
+
+ if r == utf8.RuneError && size == 1 {
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, `\ufffd`...)
+ i = j + size
+ j = j + size
+ continue
+ }
+
+ switch r {
+ case '\u2028', '\u2029':
+ // U+2028 is LINE SEPARATOR.
+ // U+2029 is PARAGRAPH SEPARATOR.
+ // They are both technically valid characters in JSON strings,
+ // but don't work in JSONP, which has to be evaluated as JavaScript,
+ // and can lead to security holes there. It is valid JSON to
+ // escape them, so we do so unconditionally.
+ // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, `\u202`...)
+ buf = append(buf, hex[r&0xF])
+ i = j + size
+ j = j + size
+ continue
+ }
+
+ j += size
+ }
+
+ return append(append(buf, s[i:]...), '"')
+}
+
+func AppendString(buf []byte, s string) []byte {
+ valLen := len(s)
+ if valLen == 0 {
+ return append(buf, `""`...)
+ }
+ buf = append(buf, '"')
+ var escapeIdx int
+ if valLen >= 8 {
+ if escapeIdx = escapeIndex(s); escapeIdx < 0 {
+ return append(append(buf, s...), '"')
+ }
+ }
+
+ i := 0
+ j := escapeIdx
+ for j < valLen {
+ c := s[j]
+
+ if c >= 0x20 && c <= 0x7f && c != '\\' && c != '"' {
+ // fast path: most of the time, printable ascii characters are used
+ j++
+ continue
+ }
+
+ switch c {
+ case '\\', '"':
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, '\\', c)
+ i = j + 1
+ j = j + 1
+ continue
+
+ case '\n':
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, '\\', 'n')
+ i = j + 1
+ j = j + 1
+ continue
+
+ case '\r':
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, '\\', 'r')
+ i = j + 1
+ j = j + 1
+ continue
+
+ case '\t':
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, '\\', 't')
+ i = j + 1
+ j = j + 1
+ continue
+
+ case '<', '>', '&':
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, `\u00`...)
+ buf = append(buf, hex[c>>4], hex[c&0xF])
+ i = j + 1
+ j = j + 1
+ continue
+ }
+
+ // This encodes bytes < 0x20 except for \t, \n and \r.
+ if c < 0x20 {
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, `\u00`...)
+ buf = append(buf, hex[c>>4], hex[c&0xF])
+ i = j + 1
+ j = j + 1
+ continue
+ }
+
+ r, size := utf8.DecodeRuneInString(s[j:])
+
+ if r == utf8.RuneError && size == 1 {
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, `\ufffd`...)
+ i = j + size
+ j = j + size
+ continue
+ }
+
+ switch r {
+ case '\u2028', '\u2029':
+ // U+2028 is LINE SEPARATOR.
+ // U+2029 is PARAGRAPH SEPARATOR.
+ // They are both technically valid characters in JSON strings,
+ // but don't work in JSONP, which has to be evaluated as JavaScript,
+ // and can lead to security holes there. It is valid JSON to
+ // escape them, so we do so unconditionally.
+ // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
+ buf = append(buf, s[i:j]...)
+ buf = append(buf, `\u202`...)
+ buf = append(buf, hex[r&0xF])
+ i = j + size
+ j = j + size
+ continue
+ }
+
+ j += size
+ }
+
+ return append(append(buf, s[i:]...), '"')
+}