summaryrefslogtreecommitdiff
path: root/vendor/github.com/tdewolff/parse/v2/util.go
diff options
context:
space:
mode:
authorLibravatar tobi <31960611+tsmethurst@users.noreply.github.com>2022-08-07 18:19:16 +0200
committerLibravatar GitHub <noreply@github.com>2022-08-07 18:19:16 +0200
commit879b4abde722cb66463ca81a4cf6ac5465ef276d (patch)
treecdbd98840bca27317ad0860a194072671ed04ebe /vendor/github.com/tdewolff/parse/v2/util.go
parent[feature] Photoswipe gallery (#740) (diff)
downloadgotosocial-879b4abde722cb66463ca81a4cf6ac5465ef276d.tar.xz
[bugfix] Markdown formatting updates (#743)
* add minify dependency specifically for markdown * rearrange markdown formatting * update markdown tests
Diffstat (limited to 'vendor/github.com/tdewolff/parse/v2/util.go')
-rw-r--r--vendor/github.com/tdewolff/parse/v2/util.go486
1 files changed, 486 insertions, 0 deletions
diff --git a/vendor/github.com/tdewolff/parse/v2/util.go b/vendor/github.com/tdewolff/parse/v2/util.go
new file mode 100644
index 000000000..4174cb242
--- /dev/null
+++ b/vendor/github.com/tdewolff/parse/v2/util.go
@@ -0,0 +1,486 @@
+package parse
+
+import (
+ "bytes"
+ "fmt"
+ "strconv"
+ "unicode"
+)
+
+// Copy returns a copy of the given byte slice.
+func Copy(src []byte) (dst []byte) {
+ dst = make([]byte, len(src))
+ copy(dst, src)
+ return
+}
+
+// ToLower converts all characters in the byte slice from A-Z to a-z.
+func ToLower(src []byte) []byte {
+ for i, c := range src {
+ if c >= 'A' && c <= 'Z' {
+ src[i] = c + ('a' - 'A')
+ }
+ }
+ return src
+}
+
+// EqualFold returns true when s matches case-insensitively the targetLower (which must be lowercase).
+func EqualFold(s, targetLower []byte) bool {
+ if len(s) != len(targetLower) {
+ return false
+ }
+ for i, c := range targetLower {
+ d := s[i]
+ if d != c && (d < 'A' || d > 'Z' || d+('a'-'A') != c) {
+ return false
+ }
+ }
+ return true
+}
+
+// Printable returns a printable string for given rune
+func Printable(r rune) string {
+ if unicode.IsGraphic(r) {
+ return fmt.Sprintf("%c", r)
+ } else if r < 128 {
+ return fmt.Sprintf("0x%02X", r)
+ }
+ return fmt.Sprintf("%U", r)
+}
+
+var whitespaceTable = [256]bool{
+ // ASCII
+ false, false, false, false, false, false, false, false,
+ false, true, true, false, true, true, false, false, // tab, new line, form feed, carriage return
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ true, false, false, false, false, false, false, false, // space
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ // non-ASCII
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+}
+
+// IsWhitespace returns true for space, \n, \r, \t, \f.
+func IsWhitespace(c byte) bool {
+ return whitespaceTable[c]
+}
+
+var newlineTable = [256]bool{
+ // ASCII
+ false, false, false, false, false, false, false, false,
+ false, false, true, false, false, true, false, false, // new line, carriage return
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ // non-ASCII
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+}
+
+// IsNewline returns true for \n, \r.
+func IsNewline(c byte) bool {
+ return newlineTable[c]
+}
+
+// IsAllWhitespace returns true when the entire byte slice consists of space, \n, \r, \t, \f.
+func IsAllWhitespace(b []byte) bool {
+ for _, c := range b {
+ if !IsWhitespace(c) {
+ return false
+ }
+ }
+ return true
+}
+
+// TrimWhitespace removes any leading and trailing whitespace characters.
+func TrimWhitespace(b []byte) []byte {
+ n := len(b)
+ start := n
+ for i := 0; i < n; i++ {
+ if !IsWhitespace(b[i]) {
+ start = i
+ break
+ }
+ }
+ end := n
+ for i := n - 1; i >= start; i-- {
+ if !IsWhitespace(b[i]) {
+ end = i + 1
+ break
+ }
+ }
+ return b[start:end]
+}
+
+// ReplaceMultipleWhitespace replaces character series of space, \n, \t, \f, \r into a single space or newline (when the serie contained a \n or \r).
+func ReplaceMultipleWhitespace(b []byte) []byte {
+ j, k := 0, 0 // j is write position, k is start of next text section
+ for i := 0; i < len(b); i++ {
+ if IsWhitespace(b[i]) {
+ start := i
+ newline := IsNewline(b[i])
+ i++
+ for ; i < len(b) && IsWhitespace(b[i]); i++ {
+ if IsNewline(b[i]) {
+ newline = true
+ }
+ }
+ if newline {
+ b[start] = '\n'
+ } else {
+ b[start] = ' '
+ }
+ if 1 < i-start { // more than one whitespace
+ if j == 0 {
+ j = start + 1
+ } else {
+ j += copy(b[j:], b[k:start+1])
+ }
+ k = i
+ }
+ }
+ }
+ if j == 0 {
+ return b
+ } else if j == 1 { // only if starts with whitespace
+ b[k-1] = b[0]
+ return b[k-1:]
+ } else if k < len(b) {
+ j += copy(b[j:], b[k:])
+ }
+ return b[:j]
+}
+
+// replaceEntities will replace in b at index i, assuming that b[i] == '&' and that i+3<len(b). The returned int will be the last character of the entity, so that the next iteration can safely do i++ to continue and not miss any entitites.
+func replaceEntities(b []byte, i int, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) ([]byte, int) {
+ const MaxEntityLength = 31 // longest HTML entity: CounterClockwiseContourIntegral
+ var r []byte
+ j := i + 1
+ if b[j] == '#' {
+ j++
+ if b[j] == 'x' {
+ j++
+ c := 0
+ for ; j < len(b) && (b[j] >= '0' && b[j] <= '9' || b[j] >= 'a' && b[j] <= 'f' || b[j] >= 'A' && b[j] <= 'F'); j++ {
+ if b[j] <= '9' {
+ c = c<<4 + int(b[j]-'0')
+ } else if b[j] <= 'F' {
+ c = c<<4 + int(b[j]-'A') + 10
+ } else if b[j] <= 'f' {
+ c = c<<4 + int(b[j]-'a') + 10
+ }
+ }
+ if j <= i+3 || 10000 <= c {
+ return b, j - 1
+ }
+ if c < 128 {
+ r = []byte{byte(c)}
+ } else {
+ r = append(r, '&', '#')
+ r = strconv.AppendInt(r, int64(c), 10)
+ r = append(r, ';')
+ }
+ } else {
+ c := 0
+ for ; j < len(b) && c < 128 && b[j] >= '0' && b[j] <= '9'; j++ {
+ c = c*10 + int(b[j]-'0')
+ }
+ if j <= i+2 || 128 <= c {
+ return b, j - 1
+ }
+ r = []byte{byte(c)}
+ }
+ } else {
+ for ; j < len(b) && j-i-1 <= MaxEntityLength && b[j] != ';'; j++ {
+ }
+ if j <= i+1 || len(b) <= j {
+ return b, j - 1
+ }
+
+ var ok bool
+ r, ok = entitiesMap[string(b[i+1:j])]
+ if !ok {
+ return b, j
+ }
+ }
+
+ // j is at semicolon
+ n := j + 1 - i
+ if j < len(b) && b[j] == ';' && 2 < n {
+ if len(r) == 1 {
+ if q, ok := revEntitiesMap[r[0]]; ok {
+ if len(q) == len(b[i:j+1]) && bytes.Equal(q, b[i:j+1]) {
+ return b, j
+ }
+ r = q
+ } else if r[0] == '&' {
+ // check if for example &amp; is followed by something that could potentially be an entity
+ k := j + 1
+ if k < len(b) && b[k] == '#' {
+ k++
+ }
+ for ; k < len(b) && k-j <= MaxEntityLength && (b[k] >= '0' && b[k] <= '9' || b[k] >= 'a' && b[k] <= 'z' || b[k] >= 'A' && b[k] <= 'Z'); k++ {
+ }
+ if k < len(b) && b[k] == ';' {
+ return b, k
+ }
+ }
+ }
+
+ copy(b[i:], r)
+ copy(b[i+len(r):], b[j+1:])
+ b = b[:len(b)-n+len(r)]
+ return b, i + len(r) - 1
+ }
+ return b, i
+}
+
+// ReplaceEntities replaces all occurrences of entites (such as &quot;) to their respective unencoded bytes.
+func ReplaceEntities(b []byte, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) []byte {
+ for i := 0; i < len(b); i++ {
+ if b[i] == '&' && i+3 < len(b) {
+ b, i = replaceEntities(b, i, entitiesMap, revEntitiesMap)
+ }
+ }
+ return b
+}
+
+// ReplaceMultipleWhitespaceAndEntities is a combination of ReplaceMultipleWhitespace and ReplaceEntities. It is faster than executing both sequentially.
+func ReplaceMultipleWhitespaceAndEntities(b []byte, entitiesMap map[string][]byte, revEntitiesMap map[byte][]byte) []byte {
+ j, k := 0, 0 // j is write position, k is start of next text section
+ for i := 0; i < len(b); i++ {
+ if IsWhitespace(b[i]) {
+ start := i
+ newline := IsNewline(b[i])
+ i++
+ for ; i < len(b) && IsWhitespace(b[i]); i++ {
+ if IsNewline(b[i]) {
+ newline = true
+ }
+ }
+ if newline {
+ b[start] = '\n'
+ } else {
+ b[start] = ' '
+ }
+ if 1 < i-start { // more than one whitespace
+ if j == 0 {
+ j = start + 1
+ } else {
+ j += copy(b[j:], b[k:start+1])
+ }
+ k = i
+ }
+ }
+ if i+3 < len(b) && b[i] == '&' {
+ b, i = replaceEntities(b, i, entitiesMap, revEntitiesMap)
+ }
+ }
+ if j == 0 {
+ return b
+ } else if j == 1 { // only if starts with whitespace
+ b[k-1] = b[0]
+ return b[k-1:]
+ } else if k < len(b) {
+ j += copy(b[j:], b[k:])
+ }
+ return b[:j]
+}
+
+// URLEncodingTable is a charmap for which characters need escaping in the URL encoding scheme
+var URLEncodingTable = [256]bool{
+ // ASCII
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+
+ true, false, true, true, true, true, true, false, // space, ", #, $, %, &
+ false, false, false, true, true, false, false, true, // +, comma, /
+ false, false, false, false, false, false, false, false,
+ false, false, true, true, true, true, true, true, // :, ;, <, =, >, ?
+
+ true, false, false, false, false, false, false, false, // @
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, true, true, true, true, false, // [, \, ], ^
+
+ true, false, false, false, false, false, false, false, // `
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, true, true, true, false, true, // {, |, }, DEL
+
+ // non-ASCII
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+}
+
+// DataURIEncodingTable is a charmap for which characters need escaping in the Data URI encoding scheme
+// Escape only non-printable characters, unicode and %, #, &.
+// IE11 additionally requires encoding of \, [, ], ", <, >, `, {, }, |, ^ which is not required by Chrome, Firefox, Opera, Edge, Safari, Yandex
+// To pass the HTML validator, restricted URL characters must be escaped: non-printable characters, space, <, >, #, %, "
+var DataURIEncodingTable = [256]bool{
+ // ASCII
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+
+ true, false, true, true, false, true, true, false, // space, ", #, %, &
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, true, false, true, false, // <, >
+
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, true, true, true, true, false, // [, \, ], ^
+
+ true, false, false, false, false, false, false, false, // `
+ false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false,
+ false, false, false, true, true, true, false, true, // {, |, }, DEL
+
+ // non-ASCII
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true,
+}
+
+// EncodeURL encodes bytes using the URL encoding scheme
+func EncodeURL(b []byte, table [256]bool) []byte {
+ for i := 0; i < len(b); i++ {
+ c := b[i]
+ if table[c] {
+ b = append(b, 0, 0)
+ copy(b[i+3:], b[i+1:])
+ b[i+0] = '%'
+ b[i+1] = "0123456789ABCDEF"[c>>4]
+ b[i+2] = "0123456789ABCDEF"[c&15]
+ }
+ }
+ return b
+}
+
+// DecodeURL decodes an URL encoded using the URL encoding scheme
+func DecodeURL(b []byte) []byte {
+ for i := 0; i < len(b); i++ {
+ if b[i] == '%' && i+2 < len(b) {
+ j := i + 1
+ c := 0
+ for ; j < i+3 && (b[j] >= '0' && b[j] <= '9' || b[j] >= 'a' && b[j] <= 'f' || b[j] >= 'A' && b[j] <= 'F'); j++ {
+ if b[j] <= '9' {
+ c = c<<4 + int(b[j]-'0')
+ } else if b[j] <= 'F' {
+ c = c<<4 + int(b[j]-'A') + 10
+ } else if b[j] <= 'f' {
+ c = c<<4 + int(b[j]-'a') + 10
+ }
+ }
+ if j == i+3 && c < 128 {
+ b[i] = byte(c)
+ b = append(b[:i+1], b[i+3:]...)
+ }
+ } else if b[i] == '+' {
+ b[i] = ' '
+ }
+ }
+ return b
+}