diff options
Diffstat (limited to 'vendor/github.com/tdewolff/parse/v2/common.go')
-rw-r--r-- | vendor/github.com/tdewolff/parse/v2/common.go | 546 |
1 files changed, 0 insertions, 546 deletions
diff --git a/vendor/github.com/tdewolff/parse/v2/common.go b/vendor/github.com/tdewolff/parse/v2/common.go deleted file mode 100644 index 1883d1bd4..000000000 --- a/vendor/github.com/tdewolff/parse/v2/common.go +++ /dev/null @@ -1,546 +0,0 @@ -// Package parse contains a collection of parsers for various formats in its subpackages. -package parse - -import ( - "bytes" - "encoding/base64" - "errors" - "strconv" -) - -var ( - dataSchemeBytes = []byte("data:") - base64Bytes = []byte("base64") - textMimeBytes = []byte("text/plain") -) - -// ErrBadDataURI is returned by DataURI when the byte slice does not start with 'data:' or is too short. -var ErrBadDataURI = errors.New("not a data URI") - -// Number returns the number of bytes that parse as a number of the regex format (+|-)?([0-9]+(\.[0-9]+)?|\.[0-9]+)((e|E)(+|-)?[0-9]+)?. -func Number(b []byte) int { - if len(b) == 0 { - return 0 - } - i := 0 - if b[i] == '+' || b[i] == '-' { - i++ - if i >= len(b) { - return 0 - } - } - firstDigit := (b[i] >= '0' && b[i] <= '9') - if firstDigit { - i++ - for i < len(b) && b[i] >= '0' && b[i] <= '9' { - i++ - } - } - if i < len(b) && b[i] == '.' { - i++ - if i < len(b) && b[i] >= '0' && b[i] <= '9' { - i++ - for i < len(b) && b[i] >= '0' && b[i] <= '9' { - i++ - } - } else if firstDigit { - // . could belong to the next token - i-- - return i - } else { - return 0 - } - } else if !firstDigit { - return 0 - } - iOld := i - if i < len(b) && (b[i] == 'e' || b[i] == 'E') { - i++ - if i < len(b) && (b[i] == '+' || b[i] == '-') { - i++ - } - if i >= len(b) || b[i] < '0' || b[i] > '9' { - // e could belong to next token - return iOld - } - for i < len(b) && b[i] >= '0' && b[i] <= '9' { - i++ - } - } - return i -} - -// Dimension parses a byte-slice and returns the length of the number and its unit. -func Dimension(b []byte) (int, int) { - num := Number(b) - if num == 0 || num == len(b) { - return num, 0 - } else if b[num] == '%' { - return num, 1 - } else if b[num] >= 'a' && b[num] <= 'z' || b[num] >= 'A' && b[num] <= 'Z' { - i := num + 1 - for i < len(b) && (b[i] >= 'a' && b[i] <= 'z' || b[i] >= 'A' && b[i] <= 'Z') { - i++ - } - return num, i - num - } - return num, 0 -} - -// Mediatype parses a given mediatype and splits the mimetype from the parameters. -// It works similar to mime.ParseMediaType but is faster. -func Mediatype(b []byte) ([]byte, map[string]string) { - i := 0 - for i < len(b) && b[i] == ' ' { - i++ - } - b = b[i:] - n := len(b) - mimetype := b - var params map[string]string - for i := 3; i < n; i++ { // mimetype is at least three characters long - if b[i] == ';' || b[i] == ' ' { - mimetype = b[:i] - if b[i] == ' ' { - i++ // space - for i < n && b[i] == ' ' { - i++ - } - if n <= i || b[i] != ';' { - break - } - } - params = map[string]string{} - s := string(b) - PARAM: - i++ // semicolon - for i < n && s[i] == ' ' { - i++ - } - start := i - for i < n && s[i] != '=' && s[i] != ';' && s[i] != ' ' { - i++ - } - key := s[start:i] - for i < n && s[i] == ' ' { - i++ - } - if i < n && s[i] == '=' { - i++ - for i < n && s[i] == ' ' { - i++ - } - start = i - for i < n && s[i] != ';' && s[i] != ' ' { - i++ - } - } else { - start = i - } - params[key] = s[start:i] - for i < n && s[i] == ' ' { - i++ - } - if i < n && s[i] == ';' { - goto PARAM - } - break - } - } - return mimetype, params -} - -// DataURI parses the given data URI and returns the mediatype, data and ok. -func DataURI(dataURI []byte) ([]byte, []byte, error) { - if len(dataURI) > 5 && bytes.Equal(dataURI[:5], dataSchemeBytes) { - dataURI = dataURI[5:] - inBase64 := false - var mediatype []byte - i := 0 - for j := 0; j < len(dataURI); j++ { - c := dataURI[j] - if c == '=' || c == ';' || c == ',' { - if c != '=' && bytes.Equal(TrimWhitespace(dataURI[i:j]), base64Bytes) { - if len(mediatype) > 0 { - mediatype = mediatype[:len(mediatype)-1] - } - inBase64 = true - i = j - } else if c != ',' { - mediatype = append(append(mediatype, TrimWhitespace(dataURI[i:j])...), c) - i = j + 1 - } else { - mediatype = append(mediatype, TrimWhitespace(dataURI[i:j])...) - } - if c == ',' { - if len(mediatype) == 0 || mediatype[0] == ';' { - mediatype = textMimeBytes - } - data := dataURI[j+1:] - if inBase64 { - decoded := make([]byte, base64.StdEncoding.DecodedLen(len(data))) - n, err := base64.StdEncoding.Decode(decoded, data) - if err != nil { - return nil, nil, err - } - data = decoded[:n] - } else { - data = DecodeURL(data) - } - return mediatype, data, nil - } - } - } - } - return nil, nil, ErrBadDataURI -} - -// QuoteEntity parses the given byte slice and returns the quote that got matched (' or ") and its entity length. -// TODO: deprecated -func QuoteEntity(b []byte) (quote byte, n int) { - if len(b) < 5 || b[0] != '&' { - return 0, 0 - } - if b[1] == '#' { - if b[2] == 'x' { - i := 3 - for i < len(b) && b[i] == '0' { - i++ - } - if i+2 < len(b) && b[i] == '2' && b[i+2] == ';' { - if b[i+1] == '2' { - return '"', i + 3 // " - } else if b[i+1] == '7' { - return '\'', i + 3 // ' - } - } - } else { - i := 2 - for i < len(b) && b[i] == '0' { - i++ - } - if i+2 < len(b) && b[i] == '3' && b[i+2] == ';' { - if b[i+1] == '4' { - return '"', i + 3 // " - } else if b[i+1] == '9' { - return '\'', i + 3 // ' - } - } - } - } else if len(b) >= 6 && b[5] == ';' { - if bytes.Equal(b[1:5], []byte{'q', 'u', 'o', 't'}) { - return '"', 6 // " - } else if bytes.Equal(b[1:5], []byte{'a', 'p', 'o', 's'}) { - return '\'', 6 // ' - } - } - return 0, 0 -} - -// ReplaceMultipleWhitespace replaces character series of space, \n, \t, \f, \r into a single space or newline (when the serie contained a \n or \r). -func ReplaceMultipleWhitespace(b []byte) []byte { - j, k := 0, 0 // j is write position, k is start of next text section - for i := 0; i < len(b); i++ { - if IsWhitespace(b[i]) { - start := i - newline := IsNewline(b[i]) - i++ - for ; i < len(b) && IsWhitespace(b[i]); i++ { - if IsNewline(b[i]) { - newline = true - } - } - if newline { - b[start] = '\n' - } else { - b[start] = ' ' - } - if 1 < i-start { // more than one whitespace - if j == 0 { - j = start + 1 - } else { - j += copy(b[j:], b[k:start+1]) - } - k = i - } - } - } - if j == 0 { - return b - } else if j == 1 { // only if starts with whitespace - b[k-1] = b[0] - return b[k-1:] - } else if k < len(b) { - j += copy(b[j:], b[k:]) - } - return b[:j] -} - -// replaceEntities will replace in b at index i, assuming that b[i] == '&' and that i+3<len(b). The returned int will be the last character of the entity, so that the next iteration can safely do i++ to continue and not miss any entitites. -func replaceEntities(b []byte, i int, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) ([]byte, int) { - const MaxEntityLength = 31 // longest HTML entity: CounterClockwiseContourIntegral - var r []byte - j := i + 1 - if b[j] == '#' { - j++ - if b[j] == 'x' { - j++ - c := 0 - for ; j < len(b) && (b[j] >= '0' && b[j] <= '9' || b[j] >= 'a' && b[j] <= 'f' || b[j] >= 'A' && b[j] <= 'F'); j++ { - if b[j] <= '9' { - c = c<<4 + int(b[j]-'0') - } else if b[j] <= 'F' { - c = c<<4 + int(b[j]-'A') + 10 - } else if b[j] <= 'f' { - c = c<<4 + int(b[j]-'a') + 10 - } - } - if j <= i+3 || 10000 <= c { - return b, j - 1 - } - if c < 128 { - r = []byte{byte(c)} - } else { - r = append(r, '&', '#') - r = strconv.AppendInt(r, int64(c), 10) - r = append(r, ';') - } - } else { - c := 0 - for ; j < len(b) && c < 128 && b[j] >= '0' && b[j] <= '9'; j++ { - c = c*10 + int(b[j]-'0') - } - if j <= i+2 || 128 <= c { - return b, j - 1 - } - r = []byte{byte(c)} - } - } else { - for ; j < len(b) && j-i-1 <= MaxEntityLength && b[j] != ';'; j++ { - if !(b[j] >= '0' && b[j] <= '9' || b[j] >= 'a' && b[j] <= 'z' || b[j] >= 'A' && b[j] <= 'Z') { - // invalid character reference character - break - } - } - if len(b) <= j || j == i+1 || b[j] != ';' { - return b, i - } - - var ok bool - r, ok = entitiesMap[string(b[i+1:j])] - if !ok { - return b, j - } - } - - // j is at semicolon - n := j + 1 - i - if j < len(b) && b[j] == ';' && 2 < n { - if len(r) == 1 { - if q, ok := revEntitiesMap[r[0]]; ok { - if len(q) == len(b[i:j+1]) && bytes.Equal(q, b[i:j+1]) { - return b, j - } - r = q - } else if r[0] == '&' { - // check if for example & is followed by something that could potentially be an entity - k := j + 1 - if k < len(b) && (b[k] >= '0' && b[k] <= '9' || b[k] >= 'a' && b[k] <= 'z' || b[k] >= 'A' && b[k] <= 'Z' || b[k] == '#') { - return b, k - } - } - } - - copy(b[i:], r) - copy(b[i+len(r):], b[j+1:]) - b = b[:len(b)-n+len(r)] - return b, i + len(r) - 1 - } - return b, i -} - -// ReplaceEntities replaces all occurrences of entites (such as ") to their respective unencoded bytes. -func ReplaceEntities(b []byte, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) []byte { - for i := 0; i < len(b); i++ { - if b[i] == '&' && i+3 < len(b) { - b, i = replaceEntities(b, i, entitiesMap, revEntitiesMap) - } - } - return b -} - -// ReplaceMultipleWhitespaceAndEntities is a combination of ReplaceMultipleWhitespace and ReplaceEntities. It is faster than executing both sequentially. -func ReplaceMultipleWhitespaceAndEntities(b []byte, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) []byte { - j, k := 0, 0 // j is write position, k is start of next text section - for i := 0; i < len(b); i++ { - if IsWhitespace(b[i]) { - start := i - newline := IsNewline(b[i]) - i++ - for ; i < len(b) && IsWhitespace(b[i]); i++ { - if IsNewline(b[i]) { - newline = true - } - } - if newline { - b[start] = '\n' - } else { - b[start] = ' ' - } - if 1 < i-start { // more than one whitespace - if j == 0 { - j = start + 1 - } else { - j += copy(b[j:], b[k:start+1]) - } - k = i - } - } - if i+3 < len(b) && b[i] == '&' { - b, i = replaceEntities(b, i, entitiesMap, revEntitiesMap) - } - } - if j == 0 { - return b - } else if j == 1 { // only if starts with whitespace - b[k-1] = b[0] // move newline to end of whitespace - return b[k-1:] - } else if k < len(b) { - j += copy(b[j:], b[k:]) - } - return b[:j] -} - -// URLEncodingTable is a charmap for which characters need escaping in the URL encoding scheme -var URLEncodingTable = [256]bool{ - // ASCII - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - - true, false, true, true, true, true, true, false, // space, ", #, $, %, & - false, false, false, true, true, false, false, true, // +, comma, / - false, false, false, false, false, false, false, false, - false, false, true, true, true, true, true, true, // :, ;, <, =, >, ? - - true, false, false, false, false, false, false, false, // @ - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, true, true, true, true, false, // [, \, ], ^ - - true, false, false, false, false, false, false, false, // ` - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, true, true, true, false, true, // {, |, }, DEL - - // non-ASCII - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, -} - -// DataURIEncodingTable is a charmap for which characters need escaping in the Data URI encoding scheme -// Escape only non-printable characters, unicode and %, #, &. -// IE11 additionally requires encoding of \, [, ], ", <, >, `, {, }, |, ^ which is not required by Chrome, Firefox, Opera, Edge, Safari, Yandex -// To pass the HTML validator, restricted URL characters must be escaped: non-printable characters, space, <, >, #, %, " -var DataURIEncodingTable = [256]bool{ - // ASCII - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - - true, false, true, true, false, true, true, false, // space, ", #, %, & - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, true, false, true, false, // <, > - - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, true, true, true, true, false, // [, \, ], ^ - - true, false, false, false, false, false, false, false, // ` - false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, false, - false, false, false, true, true, true, false, true, // {, |, }, DEL - - // non-ASCII - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true, true, -} - -// EncodeURL encodes bytes using the URL encoding scheme -func EncodeURL(b []byte, table [256]bool) []byte { - for i := 0; i < len(b); i++ { - c := b[i] - if table[c] { - b = append(b, 0, 0) - copy(b[i+3:], b[i+1:]) - b[i+0] = '%' - b[i+1] = "0123456789ABCDEF"[c>>4] - b[i+2] = "0123456789ABCDEF"[c&15] - } - } - return b -} - -// DecodeURL decodes an URL encoded using the URL encoding scheme -func DecodeURL(b []byte) []byte { - for i := 0; i < len(b); i++ { - if b[i] == '%' && i+2 < len(b) { - j := i + 1 - c := 0 - for ; j < i+3 && (b[j] >= '0' && b[j] <= '9' || b[j] >= 'a' && b[j] <= 'f' || b[j] >= 'A' && b[j] <= 'F'); j++ { - if b[j] <= '9' { - c = c<<4 + int(b[j]-'0') - } else if b[j] <= 'F' { - c = c<<4 + int(b[j]-'A') + 10 - } else if b[j] <= 'f' { - c = c<<4 + int(b[j]-'a') + 10 - } - } - if j == i+3 && c < 128 { - b[i] = byte(c) - b = append(b[:i+1], b[i+3:]...) - } - } else if b[i] == '+' { - b[i] = ' ' - } - } - return b -} |