// +build !amd64,!arm64 go1.26 !go1.17 arm64,!go1.20 /* * Copyright 2021 ByteDance Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package unquote import ( "unicode" "unicode/utf16" "unicode/utf8" "github.com/bytedance/sonic/internal/rt" "github.com/bytedance/sonic/internal/native/types" ) // getu4 decodes \uXXXX from the beginning of s, returning the hex value, // or it returns -1. func getu4(s []byte) rune { if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { return -1 } var r rune for _, c := range s[2:6] { switch { case '0' <= c && c <= '9': c = c - '0' case 'a' <= c && c <= 'f': c = c - 'a' + 10 case 'A' <= c && c <= 'F': c = c - 'A' + 10 default: return -1 } r = r*16 + rune(c) } return r } // unquoteBytes is a fallback implementation copied from Go standard library // encoding/json/decode.go. This is used when native unquote is not available. func unquoteBytes(s []byte) (t []byte, ok bool) { // Check for unusual characters. If there are none, // then no unquoting is needed, so return a slice of the // original bytes. r := 0 for r < len(s) { c := s[r] if c == '\\' || c == '"' || c < ' ' { break } if c < utf8.RuneSelf { r++ continue } rr, size := utf8.DecodeRune(s[r:]) if rr == utf8.RuneError && size == 1 { break } r += size } if r == len(s) { return s, true } b := make([]byte, len(s)+2*utf8.UTFMax) w := copy(b, s[0:r]) for r < len(s) { // Out of room? Can only happen if s is full of // malformed UTF-8 and we're replacing each // byte with RuneError. if w >= len(b)-2*utf8.UTFMax { nb := make([]byte, (len(b)+utf8.UTFMax)*2) copy(nb, b[0:w]) b = nb } switch c := s[r]; { case c == '\\': r++ if r >= len(s) { return } switch s[r] { default: return case '"', '\\', '/', '\'': b[w] = s[r] r++ w++ case 'b': b[w] = '\b' r++ w++ case 'f': b[w] = '\f' r++ w++ case 'n': b[w] = '\n' r++ w++ case 'r': b[w] = '\r' r++ w++ case 't': b[w] = '\t' r++ w++ case 'u': r-- rr := getu4(s[r:]) if rr < 0 { return } r += 6 if utf16.IsSurrogate(rr) { rr1 := getu4(s[r:]) if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { // A valid pair; consume. r += 6 w += utf8.EncodeRune(b[w:], dec) break } // Invalid surrogate; fall back to replacement rune. rr = unicode.ReplacementChar } w += utf8.EncodeRune(b[w:], rr) } // Quote, control characters are invalid. case c == '"', c < ' ': return // ASCII case c < utf8.RuneSelf: b[w] = c r++ w++ // Coerce to well-formed UTF-8. default: rr, size := utf8.DecodeRune(s[r:]) r += size w += utf8.EncodeRune(b[w:], rr) } } return b[0:w], true } // getu4Fallback decodes a 4-byte hex sequence from the beginning of s. // It is copied from Go standard library encoding/json.decode.go. func getu4Fallback(s []byte) rune { if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { return -1 } var r rune for _, c := range s[2:6] { switch { case '0' <= c && c <= '9': c = c - '0' case 'a' <= c && c <= 'f': c = c - 'a' + 10 case 'A' <= c && c <= 'F': c = c - 'A' + 10 default: return -1 } r = r*16 + rune(c) } return r } // String unescapes an escaped string (not including `"` at beginning and end) // It validates invalid UTF8 and replace with `\ufffd` func String(s string) (ret string, err types.ParsingError) { // Convert string to []byte and use fallback implementation sBytes := rt.Str2Mem(s) result, ok := unquoteBytes(sBytes) if !ok { return "", types.ERR_INVALID_ESCAPE } return string(result), 0 } // String unescapes an escaped string (not including `"` at beginning and end) // - replace enables replacing invalid utf8 escaped char with `\uffd` func _String(s string, _replace bool) (ret string, err error) { return String(s) }