summaryrefslogtreecommitdiff
path: root/vendor/github.com/microcosm-cc/bluemonday/sanitize.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/microcosm-cc/bluemonday/sanitize.go')
-rw-r--r--vendor/github.com/microcosm-cc/bluemonday/sanitize.go1096
1 files changed, 0 insertions, 1096 deletions
diff --git a/vendor/github.com/microcosm-cc/bluemonday/sanitize.go b/vendor/github.com/microcosm-cc/bluemonday/sanitize.go
deleted file mode 100644
index 47c31f7da..000000000
--- a/vendor/github.com/microcosm-cc/bluemonday/sanitize.go
+++ /dev/null
@@ -1,1096 +0,0 @@
-// Copyright (c) 2014, David Kitchen <david@buro9.com>
-//
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright notice, this
-// list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * Neither the name of the organisation (Microcosm) nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-package bluemonday
-
-import (
- "bytes"
- "fmt"
- "io"
- "net/url"
- "regexp"
- "strconv"
- "strings"
-
- "golang.org/x/net/html"
-
- "github.com/aymerick/douceur/parser"
-)
-
-var (
- dataAttribute = regexp.MustCompile("^data-.+")
- dataAttributeXMLPrefix = regexp.MustCompile("^xml.+")
- dataAttributeInvalidChars = regexp.MustCompile("[A-Z;]+")
- cssUnicodeChar = regexp.MustCompile(`\\[0-9a-f]{1,6} ?`)
- dataURIbase64Prefix = regexp.MustCompile(`^data:[^,]*;base64,`)
-)
-
-// Sanitize takes a string that contains a HTML fragment or document and applies
-// the given policy allowlist.
-//
-// It returns a HTML string that has been sanitized by the policy or an empty
-// string if an error has occurred (most likely as a consequence of extremely
-// malformed input)
-func (p *Policy) Sanitize(s string) string {
- if strings.TrimSpace(s) == "" {
- return s
- }
-
- return p.sanitizeWithBuff(strings.NewReader(s)).String()
-}
-
-// SanitizeBytes takes a []byte that contains a HTML fragment or document and applies
-// the given policy allowlist.
-//
-// It returns a []byte containing the HTML that has been sanitized by the policy
-// or an empty []byte if an error has occurred (most likely as a consequence of
-// extremely malformed input)
-func (p *Policy) SanitizeBytes(b []byte) []byte {
- if len(bytes.TrimSpace(b)) == 0 {
- return b
- }
-
- return p.sanitizeWithBuff(bytes.NewReader(b)).Bytes()
-}
-
-// SanitizeReader takes an io.Reader that contains a HTML fragment or document
-// and applies the given policy allowlist.
-//
-// It returns a bytes.Buffer containing the HTML that has been sanitized by the
-// policy. Errors during sanitization will merely return an empty result.
-func (p *Policy) SanitizeReader(r io.Reader) *bytes.Buffer {
- return p.sanitizeWithBuff(r)
-}
-
-// SanitizeReaderToWriter takes an io.Reader that contains a HTML fragment or document
-// and applies the given policy allowlist and writes to the provided writer returning
-// an error if there is one.
-func (p *Policy) SanitizeReaderToWriter(r io.Reader, w io.Writer) error {
- return p.sanitize(r, w)
-}
-
-// Query represents a single part of the query string, a query param
-type Query struct {
- Key string
- Value string
- HasValue bool
-}
-
-func parseQuery(query string) (values []Query, err error) {
- // This is essentially a copy of parseQuery from
- // https://golang.org/src/net/url/url.go but adjusted to build our values
- // based on our type, which we need to preserve the ordering of the query
- // string
- for query != "" {
- key := query
- if i := strings.IndexAny(key, "&;"); i >= 0 {
- key, query = key[:i], key[i+1:]
- } else {
- query = ""
- }
- if key == "" {
- continue
- }
- value := ""
- hasValue := false
- if i := strings.Index(key, "="); i >= 0 {
- key, value = key[:i], key[i+1:]
- hasValue = true
- }
- key, err1 := url.QueryUnescape(key)
- if err1 != nil {
- if err == nil {
- err = err1
- }
- continue
- }
- value, err1 = url.QueryUnescape(value)
- if err1 != nil {
- if err == nil {
- err = err1
- }
- continue
- }
- values = append(values, Query{
- Key: key,
- Value: value,
- HasValue: hasValue,
- })
- }
- return values, err
-}
-
-func encodeQueries(queries []Query) string {
- var buff bytes.Buffer
- for i, query := range queries {
- buff.WriteString(url.QueryEscape(query.Key))
- if query.HasValue {
- buff.WriteString("=")
- buff.WriteString(url.QueryEscape(query.Value))
- }
- if i < len(queries)-1 {
- buff.WriteString("&")
- }
- }
- return buff.String()
-}
-
-func sanitizedURL(val string) (string, error) {
- u, err := url.Parse(val)
- if err != nil {
- return "", err
- }
-
- // we use parseQuery but not u.Query to keep the order not change because
- // url.Values is a map which has a random order.
- queryValues, err := parseQuery(u.RawQuery)
- if err != nil {
- return "", err
- }
- // sanitize the url query params
- for i, query := range queryValues {
- queryValues[i].Key = html.EscapeString(query.Key)
- }
- u.RawQuery = encodeQueries(queryValues)
- // u.String() will also sanitize host/scheme/user/pass
- return u.String(), nil
-}
-
-// Performs the actual sanitization process.
-func (p *Policy) sanitizeWithBuff(r io.Reader) *bytes.Buffer {
- var buff bytes.Buffer
- if err := p.sanitize(r, &buff); err != nil {
- return &bytes.Buffer{}
- }
- return &buff
-}
-
-type asStringWriter struct {
- io.Writer
-}
-
-func (a *asStringWriter) WriteString(s string) (int, error) {
- return a.Write([]byte(s))
-}
-
-func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
- // It is possible that the developer has created the policy via:
- // p := bluemonday.Policy{}
- // rather than:
- // p := bluemonday.NewPolicy()
- // If this is the case, and if they haven't yet triggered an action that
- // would initialize the maps, then we need to do that.
- p.init()
-
- buff, ok := w.(stringWriterWriter)
- if !ok {
- buff = &asStringWriter{w}
- }
-
- var (
- skipElementContent bool
- skippingElementsCount int64
- skipClosingTag bool
- closingTagToSkipStack []string
- mostRecentlyStartedToken string
- )
-
- tokenizer := html.NewTokenizer(r)
- for {
- if tokenizer.Next() == html.ErrorToken {
- err := tokenizer.Err()
- if err == io.EOF {
- // End of input means end of processing
- return nil
- }
-
- // Raw tokenizer error
- return err
- }
-
- token := tokenizer.Token()
- switch token.Type {
- case html.DoctypeToken:
-
- // DocType is not handled as there is no safe parsing mechanism
- // provided by golang.org/x/net/html for the content, and this can
- // be misused to insert HTML tags that are not then sanitized
- //
- // One might wish to recursively sanitize here using the same policy
- // but I will need to do some further testing before considering
- // this.
-
- case html.CommentToken:
-
- // Comments are ignored by default
- if p.allowComments {
- // But if allowed then write the comment out as-is
- buff.WriteString(token.String())
- }
-
- case html.StartTagToken:
-
- mostRecentlyStartedToken = normaliseElementName(token.Data)
-
- switch normaliseElementName(token.Data) {
- case `script`:
- if !p.allowUnsafe {
- continue
- }
- case `style`:
- if !p.allowUnsafe {
- continue
- }
- }
-
- aps, ok := p.elsAndAttrs[token.Data]
- if !ok {
- aa, matched := p.matchRegex(token.Data)
- if !matched {
- if _, ok := p.setOfElementsToSkipContent[token.Data]; ok {
- skipElementContent = true
- skippingElementsCount++
- }
- if p.addSpaces {
- if _, err := buff.WriteString(" "); err != nil {
- return err
- }
- }
- break
- }
- aps = aa
- }
- if len(token.Attr) != 0 {
- token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
- }
-
- if len(token.Attr) == 0 {
- if !p.allowNoAttrs(token.Data) {
- skipClosingTag = true
- closingTagToSkipStack = append(closingTagToSkipStack, token.Data)
- if p.addSpaces {
- if _, err := buff.WriteString(" "); err != nil {
- return err
- }
- }
- break
- }
- }
-
- if !skipElementContent {
- if _, err := buff.WriteString(token.String()); err != nil {
- return err
- }
- }
-
- case html.EndTagToken:
-
- if mostRecentlyStartedToken == normaliseElementName(token.Data) {
- mostRecentlyStartedToken = ""
- }
-
- switch normaliseElementName(token.Data) {
- case `script`:
- if !p.allowUnsafe {
- continue
- }
- case `style`:
- if !p.allowUnsafe {
- continue
- }
- }
-
- if skipClosingTag && closingTagToSkipStack[len(closingTagToSkipStack)-1] == token.Data {
- closingTagToSkipStack = closingTagToSkipStack[:len(closingTagToSkipStack)-1]
- if len(closingTagToSkipStack) == 0 {
- skipClosingTag = false
- }
- if p.addSpaces {
- if _, err := buff.WriteString(" "); err != nil {
- return err
- }
- }
- break
- }
- if _, ok := p.elsAndAttrs[token.Data]; !ok {
- match := false
- for regex := range p.elsMatchingAndAttrs {
- if regex.MatchString(token.Data) {
- skipElementContent = false
- match = true
- break
- }
- }
- if _, ok := p.setOfElementsToSkipContent[token.Data]; ok && !match {
- skippingElementsCount--
- if skippingElementsCount == 0 {
- skipElementContent = false
- }
- }
- if !match {
- if p.addSpaces {
- if _, err := buff.WriteString(" "); err != nil {
- return err
- }
- }
- break
- }
- }
-
- if !skipElementContent {
- if _, err := buff.WriteString(token.String()); err != nil {
- return err
- }
- }
-
- case html.SelfClosingTagToken:
-
- switch normaliseElementName(token.Data) {
- case `script`:
- if !p.allowUnsafe {
- continue
- }
- case `style`:
- if !p.allowUnsafe {
- continue
- }
- }
-
- aps, ok := p.elsAndAttrs[token.Data]
- if !ok {
- aa, matched := p.matchRegex(token.Data)
- if !matched {
- if p.addSpaces && !matched {
- if _, err := buff.WriteString(" "); err != nil {
- return err
- }
- }
- break
- }
- aps = aa
- }
-
- if len(token.Attr) != 0 {
- token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
- }
-
- if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) {
- if p.addSpaces {
- if _, err := buff.WriteString(" "); err != nil {
- return err
- }
- }
- break
- }
- if !skipElementContent {
- if _, err := buff.WriteString(token.String()); err != nil {
- return err
- }
- }
-
- case html.TextToken:
-
- if !skipElementContent {
- switch mostRecentlyStartedToken {
- case `script`:
- // not encouraged, but if a policy allows JavaScript we
- // should not HTML escape it as that would break the output
- //
- // requires p.AllowUnsafe()
- if p.allowUnsafe {
- if _, err := buff.WriteString(token.Data); err != nil {
- return err
- }
- }
- case "style":
- // not encouraged, but if a policy allows CSS styles we
- // should not HTML escape it as that would break the output
- //
- // requires p.AllowUnsafe()
- if p.allowUnsafe {
- if _, err := buff.WriteString(token.Data); err != nil {
- return err
- }
- }
- default:
- // HTML escape the text
- if _, err := buff.WriteString(token.String()); err != nil {
- return err
- }
- }
- }
-
- default:
- // A token that didn't exist in the html package when we wrote this
- return fmt.Errorf("unknown token: %v", token)
- }
- }
-}
-
-// sanitizeAttrs takes a set of element attribute policies and the global
-// attribute policies and applies them to the []html.Attribute returning a set
-// of html.Attributes that match the policies
-func (p *Policy) sanitizeAttrs(
- elementName string,
- attrs []html.Attribute,
- aps map[string][]attrPolicy,
-) []html.Attribute {
-
- if len(attrs) == 0 {
- return attrs
- }
-
- hasStylePolicies := false
- sps, elementHasStylePolicies := p.elsAndStyles[elementName]
- if len(p.globalStyles) > 0 || (elementHasStylePolicies && len(sps) > 0) {
- hasStylePolicies = true
- }
- // no specific element policy found, look for a pattern match
- if !hasStylePolicies {
- for k, v := range p.elsMatchingAndStyles {
- if k.MatchString(elementName) {
- if len(v) > 0 {
- hasStylePolicies = true
- break
- }
- }
- }
- }
-
- // Builds a new attribute slice based on the whether the attribute has been
- // allowed explicitly or globally.
- cleanAttrs := []html.Attribute{}
-attrsLoop:
- for _, htmlAttr := range attrs {
- if p.allowDataAttributes {
- // If we see a data attribute, let it through.
- if isDataAttribute(htmlAttr.Key) {
- cleanAttrs = append(cleanAttrs, htmlAttr)
- continue
- }
- }
- // Is this a "style" attribute, and if so, do we need to sanitize it?
- if htmlAttr.Key == "style" && hasStylePolicies {
- htmlAttr = p.sanitizeStyles(htmlAttr, elementName)
- if htmlAttr.Val == "" {
- // We've sanitized away any and all styles; don't bother to
- // output the style attribute (even if it's allowed)
- continue
- } else {
- cleanAttrs = append(cleanAttrs, htmlAttr)
- continue
- }
- }
-
- // Is there an element specific attribute policy that applies?
- if apl, ok := aps[htmlAttr.Key]; ok {
- for _, ap := range apl {
- if ap.regexp != nil {
- if ap.regexp.MatchString(htmlAttr.Val) {
- cleanAttrs = append(cleanAttrs, htmlAttr)
- continue attrsLoop
- }
- } else {
- cleanAttrs = append(cleanAttrs, htmlAttr)
- continue attrsLoop
- }
- }
- }
-
- // Is there a global attribute policy that applies?
- if apl, ok := p.globalAttrs[htmlAttr.Key]; ok {
- for _, ap := range apl {
- if ap.regexp != nil {
- if ap.regexp.MatchString(htmlAttr.Val) {
- cleanAttrs = append(cleanAttrs, htmlAttr)
- continue attrsLoop
- }
- } else {
- cleanAttrs = append(cleanAttrs, htmlAttr)
- continue attrsLoop
- }
- }
- }
- }
-
- if len(cleanAttrs) == 0 {
- // If nothing was allowed, let's get out of here
- return cleanAttrs
- }
- // cleanAttrs now contains the attributes that are permitted
-
- if linkable(elementName) {
- if p.requireParseableURLs {
- // Ensure URLs are parseable:
- // - a.href
- // - area.href
- // - link.href
- // - blockquote.cite
- // - q.cite
- // - img.src
- // - script.src
- tmpAttrs := []html.Attribute{}
- for _, htmlAttr := range cleanAttrs {
- switch elementName {
- case "a", "area", "base", "link":
- if htmlAttr.Key == "href" {
- if u, ok := p.validURL(htmlAttr.Val); ok {
- htmlAttr.Val = u
- tmpAttrs = append(tmpAttrs, htmlAttr)
- }
- break
- }
- tmpAttrs = append(tmpAttrs, htmlAttr)
- case "blockquote", "del", "ins", "q":
- if htmlAttr.Key == "cite" {
- if u, ok := p.validURL(htmlAttr.Val); ok {
- htmlAttr.Val = u
- tmpAttrs = append(tmpAttrs, htmlAttr)
- }
- break
- }
- tmpAttrs = append(tmpAttrs, htmlAttr)
- case "audio", "embed", "iframe", "img", "script", "source", "track", "video":
- if htmlAttr.Key == "src" {
- if u, ok := p.validURL(htmlAttr.Val); ok {
- if p.srcRewriter != nil {
- parsedURL, err := url.Parse(u)
- if err != nil {
- fmt.Println(err)
- }
- p.srcRewriter(parsedURL)
- u = parsedURL.String()
- }
- htmlAttr.Val = u
- tmpAttrs = append(tmpAttrs, htmlAttr)
- }
- break
- }
- tmpAttrs = append(tmpAttrs, htmlAttr)
- default:
- tmpAttrs = append(tmpAttrs, htmlAttr)
- }
- }
- cleanAttrs = tmpAttrs
- }
-
- if (p.requireNoFollow ||
- p.requireNoFollowFullyQualifiedLinks ||
- p.requireNoReferrer ||
- p.requireNoReferrerFullyQualifiedLinks ||
- p.addTargetBlankToFullyQualifiedLinks) &&
- len(cleanAttrs) > 0 {
-
- // Add rel="nofollow" if a "href" exists
- switch elementName {
- case "a", "area", "base", "link":
- var hrefFound bool
- var externalLink bool
- for _, htmlAttr := range cleanAttrs {
- if htmlAttr.Key == "href" {
- hrefFound = true
-
- u, err := url.Parse(htmlAttr.Val)
- if err != nil {
- continue
- }
- if u.Host != "" {
- externalLink = true
- }
-
- continue
- }
- }
-
- if hrefFound {
- var (
- noFollowFound bool
- noReferrerFound bool
- targetBlankFound bool
- )
-
- addNoFollow := (p.requireNoFollow ||
- externalLink && p.requireNoFollowFullyQualifiedLinks)
-
- addNoReferrer := (p.requireNoReferrer ||
- externalLink && p.requireNoReferrerFullyQualifiedLinks)
-
- addTargetBlank := (externalLink &&
- p.addTargetBlankToFullyQualifiedLinks)
-
- tmpAttrs := []html.Attribute{}
- for _, htmlAttr := range cleanAttrs {
-
- var appended bool
- if htmlAttr.Key == "rel" && (addNoFollow || addNoReferrer) {
-
- if addNoFollow && !strings.Contains(htmlAttr.Val, "nofollow") {
- htmlAttr.Val += " nofollow"
- }
- if addNoReferrer && !strings.Contains(htmlAttr.Val, "noreferrer") {
- htmlAttr.Val += " noreferrer"
- }
- noFollowFound = addNoFollow
- noReferrerFound = addNoReferrer
- tmpAttrs = append(tmpAttrs, htmlAttr)
- appended = true
- }
-
- if elementName == "a" && htmlAttr.Key == "target" {
- if htmlAttr.Val == "_blank" {
- targetBlankFound = true
- }
- if addTargetBlank && !targetBlankFound {
- htmlAttr.Val = "_blank"
- targetBlankFound = true
- tmpAttrs = append(tmpAttrs, htmlAttr)
- appended = true
- }
- }
-
- if !appended {
- tmpAttrs = append(tmpAttrs, htmlAttr)
- }
- }
- if noFollowFound || noReferrerFound || targetBlankFound {
- cleanAttrs = tmpAttrs
- }
-
- if (addNoFollow && !noFollowFound) || (addNoReferrer && !noReferrerFound) {
- rel := html.Attribute{}
- rel.Key = "rel"
- if addNoFollow {
- rel.Val = "nofollow"
- }
- if addNoReferrer {
- if rel.Val != "" {
- rel.Val += " "
- }
- rel.Val += "noreferrer"
- }
- cleanAttrs = append(cleanAttrs, rel)
- }
-
- if elementName == "a" && addTargetBlank && !targetBlankFound {
- rel := html.Attribute{}
- rel.Key = "target"
- rel.Val = "_blank"
- targetBlankFound = true
- cleanAttrs = append(cleanAttrs, rel)
- }
-
- if targetBlankFound {
- // target="_blank" has a security risk that allows the
- // opened window/tab to issue JavaScript calls against
- // window.opener, which in effect allow the destination
- // of the link to control the source:
- // https://dev.to/ben/the-targetblank-vulnerability-by-example
- //
- // To mitigate this risk, we need to add a specific rel
- // attribute if it is not already present.
- // rel="noopener"
- //
- // Unfortunately this is processing the rel twice (we
- // already looked at it earlier ^^) as we cannot be sure
- // of the ordering of the href and rel, and whether we
- // have fully satisfied that we need to do this. This
- // double processing only happens *if* target="_blank"
- // is true.
- var noOpenerAdded bool
- tmpAttrs := []html.Attribute{}
- for _, htmlAttr := range cleanAttrs {
- var appended bool
- if htmlAttr.Key == "rel" {
- if strings.Contains(htmlAttr.Val, "noopener") {
- noOpenerAdded = true
- tmpAttrs = append(tmpAttrs, htmlAttr)
- } else {
- htmlAttr.Val += " noopener"
- noOpenerAdded = true
- tmpAttrs = append(tmpAttrs, htmlAttr)
- }
-
- appended = true
- }
- if !appended {
- tmpAttrs = append(tmpAttrs, htmlAttr)
- }
- }
- if noOpenerAdded {
- cleanAttrs = tmpAttrs
- } else {
- // rel attr was not found, or else noopener would
- // have been added already
- rel := html.Attribute{}
- rel.Key = "rel"
- rel.Val = "noopener"
- cleanAttrs = append(cleanAttrs, rel)
- }
-
- }
- }
- default:
- }
- }
- }
-
- if p.requireCrossOriginAnonymous && len(cleanAttrs) > 0 {
- switch elementName {
- case "audio", "img", "link", "script", "video":
- var crossOriginFound bool
- for i, htmlAttr := range cleanAttrs {
- if htmlAttr.Key == "crossorigin" {
- crossOriginFound = true
- cleanAttrs[i].Val = "anonymous"
- }
- }
-
- if !crossOriginFound {
- crossOrigin := html.Attribute{}
- crossOrigin.Key = "crossorigin"
- crossOrigin.Val = "anonymous"
- cleanAttrs = append(cleanAttrs, crossOrigin)
- }
- }
- }
-
- if p.requireSandboxOnIFrame != nil && elementName == "iframe" {
- var sandboxFound bool
- for i, htmlAttr := range cleanAttrs {
- if htmlAttr.Key == "sandbox" {
- sandboxFound = true
- var cleanVals []string
- cleanValsSet := make(map[string]bool)
- for _, val := range strings.Fields(htmlAttr.Val) {
- if p.requireSandboxOnIFrame[val] {
- if !cleanValsSet[val] {
- cleanVals = append(cleanVals, val)
- cleanValsSet[val] = true
- }
- }
- }
- cleanAttrs[i].Val = strings.Join(cleanVals, " ")
- }
- }
-
- if !sandboxFound {
- sandbox := html.Attribute{}
- sandbox.Key = "sandbox"
- sandbox.Val = ""
- cleanAttrs = append(cleanAttrs, sandbox)
- }
- }
-
- return cleanAttrs
-}
-
-func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.Attribute {
- sps := p.elsAndStyles[elementName]
- if len(sps) == 0 {
- sps = map[string][]stylePolicy{}
- // check for any matching elements, if we don't already have a policy found
- // if multiple matches are found they will be overwritten, it's best
- // to not have overlapping matchers
- for regex, policies := range p.elsMatchingAndStyles {
- if regex.MatchString(elementName) {
- for k, v := range policies {
- sps[k] = append(sps[k], v...)
- }
- }
- }
- }
-
- //Add semi-colon to end to fix parsing issue
- attr.Val = strings.TrimRight(attr.Val, " ")
- if len(attr.Val) > 0 && attr.Val[len(attr.Val)-1] != ';' {
- attr.Val = attr.Val + ";"
- }
- decs, err := parser.ParseDeclarations(attr.Val)
- if err != nil {
- attr.Val = ""
- return attr
- }
- clean := []string{}
- prefixes := []string{"-webkit-", "-moz-", "-ms-", "-o-", "mso-", "-xv-", "-atsc-", "-wap-", "-khtml-", "prince-", "-ah-", "-hp-", "-ro-", "-rim-", "-tc-"}
-
-decLoop:
- for _, dec := range decs {
- tempProperty := strings.ToLower(dec.Property)
- tempValue := removeUnicode(strings.ToLower(dec.Value))
- for _, i := range prefixes {
- tempProperty = strings.TrimPrefix(tempProperty, i)
- }
- if spl, ok := sps[tempProperty]; ok {
- for _, sp := range spl {
- if sp.handler != nil {
- if sp.handler(tempValue) {
- clean = append(clean, dec.Property+": "+dec.Value)
- continue decLoop
- }
- } else if len(sp.enum) > 0 {
- if stringInSlice(tempValue, sp.enum) {
- clean = append(clean, dec.Property+": "+dec.Value)
- continue decLoop
- }
- } else if sp.regexp != nil {
- if sp.regexp.MatchString(tempValue) {
- clean = append(clean, dec.Property+": "+dec.Value)
- continue decLoop
- }
- }
- }
- }
- if spl, ok := p.globalStyles[tempProperty]; ok {
- for _, sp := range spl {
- if sp.handler != nil {
- if sp.handler(tempValue) {
- clean = append(clean, dec.Property+": "+dec.Value)
- continue decLoop
- }
- } else if len(sp.enum) > 0 {
- if stringInSlice(tempValue, sp.enum) {
- clean = append(clean, dec.Property+": "+dec.Value)
- continue decLoop
- }
- } else if sp.regexp != nil {
- if sp.regexp.MatchString(tempValue) {
- clean = append(clean, dec.Property+": "+dec.Value)
- continue decLoop
- }
- }
- }
- }
- }
- if len(clean) > 0 {
- attr.Val = strings.Join(clean, "; ")
- } else {
- attr.Val = ""
- }
- return attr
-}
-
-func (p *Policy) allowNoAttrs(elementName string) bool {
- _, ok := p.setOfElementsAllowedWithoutAttrs[elementName]
- if !ok {
- for _, r := range p.setOfElementsMatchingAllowedWithoutAttrs {
- if r.MatchString(elementName) {
- ok = true
- break
- }
- }
- }
- return ok
-}
-
-func (p *Policy) validURL(rawurl string) (string, bool) {
- if p.requireParseableURLs {
- // URLs are valid if when space is trimmed the URL is valid
- rawurl = strings.TrimSpace(rawurl)
-
- // URLs cannot contain whitespace, unless it is a data-uri
- if strings.Contains(rawurl, " ") ||
- strings.Contains(rawurl, "\t") ||
- strings.Contains(rawurl, "\n") {
- if !strings.HasPrefix(rawurl, `data:`) {
- return "", false
- }
-
- // Remove \r and \n from base64 encoded data to pass url.Parse.
- matched := dataURIbase64Prefix.FindString(rawurl)
- if matched != "" {
- rawurl = matched + strings.Replace(
- strings.Replace(
- rawurl[len(matched):],
- "\r",
- "",
- -1,
- ),
- "\n",
- "",
- -1,
- )
- }
- }
-
- // URLs are valid if they parse
- u, err := url.Parse(rawurl)
- if err != nil {
- return "", false
- }
-
- if u.Scheme != "" {
- urlPolicies, ok := p.allowURLSchemes[u.Scheme]
- if !ok {
- for _, r := range p.allowURLSchemeRegexps {
- if r.MatchString(u.Scheme) {
- return u.String(), true
- }
- }
-
- return "", false
- }
-
- if len(urlPolicies) == 0 {
- return u.String(), true
- }
-
- for _, urlPolicy := range urlPolicies {
- if urlPolicy(u) {
- return u.String(), true
- }
- }
-
- return "", false
- }
-
- if p.allowRelativeURLs {
- if u.String() != "" {
- return u.String(), true
- }
- }
-
- return "", false
- }
-
- return rawurl, true
-}
-
-func linkable(elementName string) bool {
- switch elementName {
- case "a", "area", "base", "link":
- // elements that allow .href
- return true
- case "blockquote", "del", "ins", "q":
- // elements that allow .cite
- return true
- case "audio", "embed", "iframe", "img", "input", "script", "track", "video":
- // elements that allow .src
- return true
- default:
- return false
- }
-}
-
-// stringInSlice returns true if needle exists in haystack
-func stringInSlice(needle string, haystack []string) bool {
- for _, straw := range haystack {
- if strings.EqualFold(straw, needle) {
- return true
- }
- }
- return false
-}
-
-func isDataAttribute(val string) bool {
- if !dataAttribute.MatchString(val) {
- return false
- }
- rest := strings.Split(val, "data-")
- if len(rest) == 1 {
- return false
- }
- // data-xml* is invalid.
- if dataAttributeXMLPrefix.MatchString(rest[1]) {
- return false
- }
- // no uppercase or semi-colons allowed.
- if dataAttributeInvalidChars.MatchString(rest[1]) {
- return false
- }
- return true
-}
-
-func removeUnicode(value string) string {
- substitutedValue := value
- currentLoc := cssUnicodeChar.FindStringIndex(substitutedValue)
- for currentLoc != nil {
-
- character := substitutedValue[currentLoc[0]+1 : currentLoc[1]]
- character = strings.TrimSpace(character)
- if len(character) < 4 {
- character = strings.Repeat("0", 4-len(character)) + character
- } else {
- for len(character) > 4 {
- if character[0] != '0' {
- character = ""
- break
- } else {
- character = character[1:]
- }
- }
- }
- character = "\\u" + character
- translatedChar, err := strconv.Unquote(`"` + character + `"`)
- translatedChar = strings.TrimSpace(translatedChar)
- if err != nil {
- return ""
- }
- substitutedValue = substitutedValue[0:currentLoc[0]] + translatedChar + substitutedValue[currentLoc[1]:]
- currentLoc = cssUnicodeChar.FindStringIndex(substitutedValue)
- }
- return substitutedValue
-}
-
-func (p *Policy) matchRegex(elementName string) (map[string][]attrPolicy, bool) {
- aps := make(map[string][]attrPolicy, 0)
- matched := false
- for regex, attrs := range p.elsMatchingAndAttrs {
- if regex.MatchString(elementName) {
- matched = true
- for k, v := range attrs {
- aps[k] = append(aps[k], v...)
- }
- }
- }
- return aps, matched
-}
-
-// normaliseElementName takes a HTML element like <script> which is user input
-// and returns a lower case version of it that is immune to UTF-8 to ASCII
-// conversion tricks (like the use of upper case cyrillic i scrİpt which a
-// strings.ToLower would convert to script). Instead this func will preserve
-// all non-ASCII as their escaped equivalent, i.e. \u0130 which reveals the
-// characters when lower cased
-func normaliseElementName(str string) string {
- // that useful QuoteToASCII put quote marks at the start and end
- // so those are trimmed off
- return strings.TrimSuffix(
- strings.TrimPrefix(
- strings.ToLower(
- strconv.QuoteToASCII(str),
- ),
- `"`),
- `"`,
- )
-}
-
-type stringWriterWriter interface {
- io.Writer
- io.StringWriter
-}