diff options
Diffstat (limited to 'vendor/github.com/bytedance/sonic/unquote/unquote_fallback.go')
| -rw-r--r-- | vendor/github.com/bytedance/sonic/unquote/unquote_fallback.go | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/vendor/github.com/bytedance/sonic/unquote/unquote_fallback.go b/vendor/github.com/bytedance/sonic/unquote/unquote_fallback.go new file mode 100644 index 000000000..41e28f067 --- /dev/null +++ b/vendor/github.com/bytedance/sonic/unquote/unquote_fallback.go @@ -0,0 +1,206 @@ +// +build !amd64,!arm64 go1.26 !go1.17 arm64,!go1.20 + +/* + * Copyright 2021 ByteDance Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package unquote + +import ( + "unicode" + "unicode/utf16" + "unicode/utf8" + + "github.com/bytedance/sonic/internal/rt" + "github.com/bytedance/sonic/internal/native/types" +) + +// getu4 decodes \uXXXX from the beginning of s, returning the hex value, +// or it returns -1. +func getu4(s []byte) rune { + if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { + return -1 + } + var r rune + for _, c := range s[2:6] { + switch { + case '0' <= c && c <= '9': + c = c - '0' + case 'a' <= c && c <= 'f': + c = c - 'a' + 10 + case 'A' <= c && c <= 'F': + c = c - 'A' + 10 + default: + return -1 + } + r = r*16 + rune(c) + } + return r +} + + +// unquoteBytes is a fallback implementation copied from Go standard library +// encoding/json/decode.go. This is used when native unquote is not available. +func unquoteBytes(s []byte) (t []byte, ok bool) { + // Check for unusual characters. If there are none, + // then no unquoting is needed, so return a slice of the + // original bytes. + r := 0 + for r < len(s) { + c := s[r] + if c == '\\' || c == '"' || c < ' ' { + break + } + if c < utf8.RuneSelf { + r++ + continue + } + rr, size := utf8.DecodeRune(s[r:]) + if rr == utf8.RuneError && size == 1 { + break + } + r += size + } + if r == len(s) { + return s, true + } + + b := make([]byte, len(s)+2*utf8.UTFMax) + w := copy(b, s[0:r]) + for r < len(s) { + // Out of room? Can only happen if s is full of + // malformed UTF-8 and we're replacing each + // byte with RuneError. + if w >= len(b)-2*utf8.UTFMax { + nb := make([]byte, (len(b)+utf8.UTFMax)*2) + copy(nb, b[0:w]) + b = nb + } + switch c := s[r]; { + case c == '\\': + r++ + if r >= len(s) { + return + } + switch s[r] { + default: + return + case '"', '\\', '/', '\'': + b[w] = s[r] + r++ + w++ + case 'b': + b[w] = '\b' + r++ + w++ + case 'f': + b[w] = '\f' + r++ + w++ + case 'n': + b[w] = '\n' + r++ + w++ + case 'r': + b[w] = '\r' + r++ + w++ + case 't': + b[w] = '\t' + r++ + w++ + case 'u': + r-- + rr := getu4(s[r:]) + if rr < 0 { + return + } + r += 6 + if utf16.IsSurrogate(rr) { + rr1 := getu4(s[r:]) + if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { + // A valid pair; consume. + r += 6 + w += utf8.EncodeRune(b[w:], dec) + break + } + // Invalid surrogate; fall back to replacement rune. + rr = unicode.ReplacementChar + } + w += utf8.EncodeRune(b[w:], rr) + } + + // Quote, control characters are invalid. + case c == '"', c < ' ': + return + + // ASCII + case c < utf8.RuneSelf: + b[w] = c + r++ + w++ + + // Coerce to well-formed UTF-8. + default: + rr, size := utf8.DecodeRune(s[r:]) + r += size + w += utf8.EncodeRune(b[w:], rr) + } + } + return b[0:w], true +} + + +// getu4Fallback decodes a 4-byte hex sequence from the beginning of s. +// It is copied from Go standard library encoding/json.decode.go. +func getu4Fallback(s []byte) rune { + if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { + return -1 + } + var r rune + for _, c := range s[2:6] { + switch { + case '0' <= c && c <= '9': + c = c - '0' + case 'a' <= c && c <= 'f': + c = c - 'a' + 10 + case 'A' <= c && c <= 'F': + c = c - 'A' + 10 + default: + return -1 + } + r = r*16 + rune(c) + } + return r +} + + +// String unescapes an escaped string (not including `"` at beginning and end) +// It validates invalid UTF8 and replace with `\ufffd` +func String(s string) (ret string, err types.ParsingError) { + // Convert string to []byte and use fallback implementation + sBytes := rt.Str2Mem(s) + result, ok := unquoteBytes(sBytes) + if !ok { + return "", types.ERR_INVALID_ESCAPE + } + return string(result), 0 +} + +// String unescapes an escaped string (not including `"` at beginning and end) +// - replace enables replacing invalid utf8 escaped char with `\uffd` +func _String(s string, _replace bool) (ret string, err error) { + return String(s) +} |
