1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
|
// Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved.
// Use of this source code is governed by a MIT license found in the LICENSE file.
package codec
import (
"encoding/base32"
"encoding/base64"
"errors"
"math"
"reflect"
"strings"
"time"
"unicode"
)
//--------------------------------
// jsonLits and jsonLitb are defined at the package level,
// so they are guaranteed to be stored efficiently, making
// for better append/string comparison/etc.
//
// (anecdotal evidence from some benchmarking on go 1.20 devel in 20220104)
const jsonLits = `"true"false"null"{}[]`
const (
jsonLitT = 1
jsonLitF = 6
jsonLitN = 12
jsonLitM = 17
jsonLitA = 19
)
var jsonLitb = []byte(jsonLits)
var jsonNull = jsonLitb[jsonLitN : jsonLitN+4]
var jsonArrayEmpty = jsonLitb[jsonLitA : jsonLitA+2]
var jsonMapEmpty = jsonLitb[jsonLitM : jsonLitM+2]
const jsonEncodeUintSmallsString = "" +
"00010203040506070809" +
"10111213141516171819" +
"20212223242526272829" +
"30313233343536373839" +
"40414243444546474849" +
"50515253545556575859" +
"60616263646566676869" +
"70717273747576777879" +
"80818283848586878889" +
"90919293949596979899"
var jsonEncodeUintSmallsStringBytes = (*[len(jsonEncodeUintSmallsString)]byte)([]byte(jsonEncodeUintSmallsString))
const (
jsonU4Chk2 = '0'
jsonU4Chk1 = 'a' - 10
jsonU4Chk0 = 'A' - 10
)
const (
// If !jsonValidateSymbols, decoding will be faster, by skipping some checks:
// - If we see first character of null, false or true,
// do not validate subsequent characters.
// - e.g. if we see a n, assume null and skip next 3 characters,
// and do not validate they are ull.
// P.S. Do not expect a significant decoding boost from this.
jsonValidateSymbols = true
// jsonEscapeMultiByteUnicodeSep controls whether some unicode characters
// that are valid json but may bomb in some contexts are escaped during encoeing.
//
// U+2028 is LINE SEPARATOR. U+2029 is PARAGRAPH SEPARATOR.
// Both technically valid JSON, but bomb on JSONP, so fix here unconditionally.
jsonEscapeMultiByteUnicodeSep = true
// jsonNakedBoolNumInQuotedStr is used during decoding into a blank interface{}
// to control whether we detect quoted values of bools and null where a map key is expected,
// and treat as nil, true or false.
jsonNakedBoolNumInQuotedStr = true
)
var (
// jsonTabs and jsonSpaces are used as caches for indents
jsonTabs [32]byte
jsonSpaces [128]byte
jsonHexEncoder hexEncoder
// jsonTimeLayout is used to validate time layouts.
// Unfortunately, we couldn't compare time.Time effectively, so punted.
// jsonTimeLayout time.Time
)
func init() {
for i := 0; i < len(jsonTabs); i++ {
jsonTabs[i] = '\t'
}
for i := 0; i < len(jsonSpaces); i++ {
jsonSpaces[i] = ' '
}
// jsonTimeLayout, err := time.Parse(time.Layout, time.Layout)
// halt.onerror(err)
// jsonTimeLayout = jsonTimeLayout.Round(time.Second).UTC()
}
// ----------------
type jsonBytesFmt uint8
const (
jsonBytesFmtArray jsonBytesFmt = iota + 1
jsonBytesFmtBase64
jsonBytesFmtBase64url
jsonBytesFmtBase32
jsonBytesFmtBase32hex
jsonBytesFmtBase16
jsonBytesFmtHex = jsonBytesFmtBase16
)
type jsonTimeFmt uint8
const (
jsonTimeFmtStringLayout jsonTimeFmt = iota + 1
jsonTimeFmtUnix
jsonTimeFmtUnixMilli
jsonTimeFmtUnixMicro
jsonTimeFmtUnixNano
)
type jsonBytesFmter = bytesEncoder
type jsonHandleOpts struct {
rawext bool
// bytesFmt used during encode to determine how to encode []byte
bytesFmt jsonBytesFmt
// timeFmt used during encode to determine how to encode a time.Time
timeFmt jsonTimeFmt
// timeFmtNum used during decode to decode a time.Time from an int64 in the stream
timeFmtNum jsonTimeFmt
// timeFmtLayouts used on decode, to try to parse time.Time until successful
timeFmtLayouts []string
// byteFmters used on decode, to try to parse []byte from a UTF-8 string encoding (e.g. base64)
byteFmters []jsonBytesFmter
}
func jsonCheckTimeLayout(s string) (ok bool) {
_, err := time.Parse(s, s)
// t...Equal(jsonTimeLayout) always returns false - unsure why
// return err == nil && t.Round(time.Second).UTC().Equal(jsonTimeLayout)
return err == nil
}
func (x *jsonHandleOpts) reset(h *JsonHandle) {
x.timeFmt = 0
x.timeFmtNum = 0
x.timeFmtLayouts = x.timeFmtLayouts[:0]
if len(h.TimeFormat) != 0 {
switch h.TimeFormat[0] {
case "unix":
x.timeFmt = jsonTimeFmtUnix
case "unixmilli":
x.timeFmt = jsonTimeFmtUnixMilli
case "unixmicro":
x.timeFmt = jsonTimeFmtUnixMicro
case "unixnano":
x.timeFmt = jsonTimeFmtUnixNano
}
x.timeFmtNum = x.timeFmt
for _, v := range h.TimeFormat {
if !strings.HasPrefix(v, "unix") && jsonCheckTimeLayout(v) {
x.timeFmtLayouts = append(x.timeFmtLayouts, v)
}
}
}
if x.timeFmt == 0 { // both timeFmt and timeFmtNum are 0
x.timeFmtNum = jsonTimeFmtUnix
x.timeFmt = jsonTimeFmtStringLayout
if len(x.timeFmtLayouts) == 0 {
x.timeFmtLayouts = append(x.timeFmtLayouts, time.RFC3339Nano)
}
}
x.bytesFmt = 0
x.byteFmters = x.byteFmters[:0]
var b64 bool
if len(h.BytesFormat) != 0 {
switch h.BytesFormat[0] {
case "array":
x.bytesFmt = jsonBytesFmtArray
case "base64":
x.bytesFmt = jsonBytesFmtBase64
case "base64url":
x.bytesFmt = jsonBytesFmtBase64url
case "base32":
x.bytesFmt = jsonBytesFmtBase32
case "base32hex":
x.bytesFmt = jsonBytesFmtBase32hex
case "base16", "hex":
x.bytesFmt = jsonBytesFmtBase16
}
for _, v := range h.BytesFormat {
switch v {
// case "array":
case "base64":
x.byteFmters = append(x.byteFmters, base64.StdEncoding)
b64 = true
case "base64url":
x.byteFmters = append(x.byteFmters, base64.URLEncoding)
case "base32":
x.byteFmters = append(x.byteFmters, base32.StdEncoding)
case "base32hex":
x.byteFmters = append(x.byteFmters, base32.HexEncoding)
case "base16", "hex":
x.byteFmters = append(x.byteFmters, &jsonHexEncoder)
}
}
}
if x.bytesFmt == 0 {
// either len==0 OR gibberish was in the first element; resolve here
x.bytesFmt = jsonBytesFmtBase64
if !b64 { // not present - so insert into pos 0
x.byteFmters = append(x.byteFmters, nil)
copy(x.byteFmters[1:], x.byteFmters[0:])
x.byteFmters[0] = base64.StdEncoding
}
}
// ----
x.rawext = h.RawBytesExt != nil
}
var jsonEncBoolStrs = [2][2]string{
{jsonLits[jsonLitF : jsonLitF+5], jsonLits[jsonLitT : jsonLitT+4]},
{jsonLits[jsonLitF-1 : jsonLitF+6], jsonLits[jsonLitT-1 : jsonLitT+5]},
}
func jsonEncodeUint(neg, quotes bool, u uint64, b *[48]byte) []byte {
// MARKER: use setByteAt/byteAt to elide the bounds-checks
// when we are sure that we don't go beyond the bounds.
// MARKER: copied mostly from std library: strconv/itoa.go
// this should only be called on 64bit OS.
var ss = jsonEncodeUintSmallsStringBytes[:]
// typically, 19 or 20 bytes sufficient for decimal encoding a uint64
var a = b[:24]
var i = uint(len(a))
if quotes {
i--
setByteAt(a, i, '"')
// a[i] = '"'
}
var is, us uint // use uint, as those fit into a register on the platform
if cpu32Bit {
for u >= 1e9 {
q := u / 1e9
us = uint(u - q*1e9) // u % 1e9 fits into a uint
for j := 4; j > 0; j-- {
is = us % 100 * 2
us /= 100
i -= 2
setByteAt(a, i+1, byteAt(ss, is+1))
setByteAt(a, i, byteAt(ss, is))
}
i--
setByteAt(a, i, byteAt(ss, us*2+1))
u = q
}
// u is now < 1e9, so is guaranteed to fit into a uint
}
us = uint(u)
for us >= 100 {
is = us % 100 * 2
us /= 100
i -= 2
setByteAt(a, i+1, byteAt(ss, is+1))
setByteAt(a, i, byteAt(ss, is))
// a[i+1], a[i] = ss[is+1], ss[is]
}
// us < 100
is = us * 2
i--
setByteAt(a, i, byteAt(ss, is+1))
// a[i] = ss[is+1]
if us >= 10 {
i--
setByteAt(a, i, byteAt(ss, is))
// a[i] = ss[is]
}
if neg {
i--
setByteAt(a, i, '-')
// a[i] = '-'
}
if quotes {
i--
setByteAt(a, i, '"')
// a[i] = '"'
}
return a[i:]
}
// MARKER: checkLitErr methods to prevent the got/expect parameters from escaping
//go:noinline
func jsonCheckLitErr3(got, expect [3]byte) {
halt.errorf("expecting %s: got %s", expect, got)
}
//go:noinline
func jsonCheckLitErr4(got, expect [4]byte) {
halt.errorf("expecting %s: got %s", expect, got)
}
func jsonSlashURune(cs [4]byte) (rr uint32) {
for _, c := range cs {
// best to use explicit if-else
// - not a table, etc which involve memory loads, array lookup with bounds checks, etc
if c >= '0' && c <= '9' {
rr = rr*16 + uint32(c-jsonU4Chk2)
} else if c >= 'a' && c <= 'f' {
rr = rr*16 + uint32(c-jsonU4Chk1)
} else if c >= 'A' && c <= 'F' {
rr = rr*16 + uint32(c-jsonU4Chk0)
} else {
return unicode.ReplacementChar
}
}
return
}
func jsonNakedNum(z *fauxUnion, bs []byte, preferFloat, signedInt bool) (err error) {
// Note: jsonNakedNum is NEVER called with a zero-length []byte
if preferFloat {
z.v = valueTypeFloat
z.f, err = parseFloat64(bs)
} else {
err = parseNumber(bs, z, signedInt)
}
return
}
//----------------------
// JsonHandle is a handle for JSON encoding format.
//
// Json is comprehensively supported:
// - decodes numbers into interface{} as int, uint or float64
// based on how the number looks and some config parameters e.g. PreferFloat, SignedInt, etc.
// - decode integers from float formatted numbers e.g. 1.27e+8
// - decode any json value (numbers, bool, etc) from quoted strings
// - configurable way to encode/decode []byte .
// by default, encodes and decodes []byte using base64 Std Encoding
// - UTF-8 support for encoding and decoding
//
// It has better performance than the json library in the standard library,
// by leveraging the performance improvements of the codec library.
//
// In addition, it doesn't read more bytes than necessary during a decode, which allows
// reading multiple values from a stream containing json and non-json content.
// For example, a user can read a json value, then a cbor value, then a msgpack value,
// all from the same stream in sequence.
//
// Note that, when decoding quoted strings, invalid UTF-8 or invalid UTF-16 surrogate pairs are
// not treated as an error. Instead, they are replaced by the Unicode replacement character U+FFFD.
//
// Note also that the float values for NaN, +Inf or -Inf are encoded as null,
// as suggested by NOTE 4 of the ECMA-262 ECMAScript Language Specification 5.1 edition.
// see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf .
//
// Note the following behaviour differences vs std-library encoding/json package:
// - struct field names matched in case-sensitive manner
type JsonHandle struct {
textEncodingType
BasicHandle
// Indent indicates how a value is encoded.
// - If positive, indent by that number of spaces.
// - If negative, indent by that number of tabs.
Indent int8
// IntegerAsString controls how integers (signed and unsigned) are encoded.
//
// Per the JSON Spec, JSON numbers are 64-bit floating point numbers.
// Consequently, integers > 2^53 cannot be represented as a JSON number without losing precision.
// This can be mitigated by configuring how to encode integers.
//
// IntegerAsString interpretes the following values:
// - if 'L', then encode integers > 2^53 as a json string.
// - if 'A', then encode all integers as a json string
// containing the exact integer representation as a decimal.
// - else encode all integers as a json number (default)
IntegerAsString byte
// HTMLCharsAsIs controls how to encode some special characters to html: < > &
//
// By default, we encode them as \uXXX
// to prevent security holes when served from some browsers.
HTMLCharsAsIs bool
// PreferFloat says that we will default to decoding a number as a float.
// If not set, we will examine the characters of the number and decode as an
// integer type if it doesn't have any of the characters [.eE].
PreferFloat bool
// TermWhitespace says that we add a whitespace character
// at the end of an encoding.
//
// The whitespace is important, especially if using numbers in a context
// where multiple items are written to a stream.
TermWhitespace bool
// MapKeyAsString says to encode all map keys as strings.
//
// Use this to enforce strict json output.
// The only caveat is that nil value is ALWAYS written as null (never as "null")
MapKeyAsString bool
// _ uint64 // padding (cache line)
// Note: below, we store hardly-used items e.g. RawBytesExt.
// These values below may straddle a cache line, but they are hardly-used,
// so shouldn't contribute to false-sharing except in rare cases.
// RawBytesExt, if configured, is used to encode and decode raw bytes in a custom way.
// If not configured, raw bytes are encoded to/from base64 text.
RawBytesExt InterfaceExt
// TimeFormat is an array of strings representing a time.Time format, with each one being either
// a layout that honor the time.Time.Format specification.
// In addition, at most one of the set below (unix, unixmilli, unixmicro, unixnana) can be specified
// supporting encoding and decoding time as a number relative to the time epoch of Jan 1, 1970.
//
// During encode of a time.Time, the first entry in the array is used (defaults to RFC 3339).
//
// During decode,
// - if a string, then each of the layout formats will be tried in order until a time.Time is decoded.
// - if a number, then the sole unix entry is used.
TimeFormat []string
// BytesFormat is an array of strings representing how bytes are encoded.
//
// Supported values are base64 (default), base64url, base32, base32hex, base16 (synonymous with hex) and array.
//
// array is a special value configuring that bytes are encoded as a sequence of numbers.
//
// During encode of a []byte, the first entry is used (defaults to base64 if none specified).
//
// During decode
// - if a string, then attempt decoding using each format in sequence until successful.
// - if an array, then decode normally
BytesFormat []string
}
func (h *JsonHandle) isJson() bool { return true }
// Name returns the name of the handle: json
func (h *JsonHandle) Name() string { return "json" }
// func (h *JsonHandle) desc(bd byte) string { return str4byte(bd) }
func (h *JsonHandle) desc(bd byte) string { return string(bd) }
func (h *JsonHandle) typical() bool {
return h.Indent == 0 && !h.MapKeyAsString && h.IntegerAsString != 'A' && h.IntegerAsString != 'L'
}
// SetInterfaceExt sets an extension
func (h *JsonHandle) SetInterfaceExt(rt reflect.Type, tag uint64, ext InterfaceExt) (err error) {
return h.SetExt(rt, tag, makeExt(ext))
}
func jsonFloatStrconvFmtPrec64(f float64) (fmt byte, prec int8) {
fmt = 'f'
prec = -1
fbits := math.Float64bits(f)
abs := math.Float64frombits(fbits &^ (1 << 63))
if abs == 0 || abs == 1 {
prec = 1
} else if abs < 1e-6 || abs >= 1e21 {
fmt = 'e'
} else if noFrac64(fbits) {
prec = 1
}
return
}
func jsonFloatStrconvFmtPrec32(f float32) (fmt byte, prec int8) {
fmt = 'f'
prec = -1
// directly handle Modf (to get fractions) and Abs (to get absolute)
fbits := math.Float32bits(f)
abs := math.Float32frombits(fbits &^ (1 << 31))
if abs == 0 || abs == 1 {
prec = 1
} else if abs < 1e-6 || abs >= 1e21 {
fmt = 'e'
} else if noFrac32(fbits) {
prec = 1
}
return
}
var errJsonNoBd = errors.New("descBd unsupported in json")
|