1 files changed, 333 insertions, 0 deletions
diff --git a/vendor/github.com/k3a/html2text/html2text.go b/vendor/github.com/k3a/html2text/html2text.go
new file mode 100644
index 000000000..f79fbe395
--- /dev/null
+++ b/vendor/github.com/k3a/html2text/html2text.go
@@ -0,0 +1,333 @@
+package html2text
+
+import (
+	"bytes"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+// Line break constants
+// Deprecated: Please use HTML2TextWithOptions(text, WithUnixLineBreak())
+const (
+	WIN_LBR  = "\r\n"
+	UNIX_LBR = "\n"
+)
+
+var legacyLBR = WIN_LBR
+var badTagnamesRE = regexp.MustCompile(`^(head|script|style|a)($|\s+)`)
+var linkTagRE = regexp.MustCompile(`^(?i:a)(?:$|\s).*(?i:href)\s*=\s*('([^']*?)'|"([^"]*?)"|([^\s"'` + "`" + `=<>]+))`)
+var badLinkHrefRE = regexp.MustCompile(`javascript:`)
+var headersRE = regexp.MustCompile(`^(\/)?h[1-6]`)
+var numericEntityRE = regexp.MustCompile(`(?i)^#(x?[a-f0-9]+)$`)
+
+type options struct {
+	lbr            string
+	linksInnerText bool
+	listPrefix     string
+}
+
+func newOptions() *options {
+	// apply defaults
+	return &options{
+		lbr: WIN_LBR,
+	}
+}
+
+// Option is a functional option
+type Option func(*options)
+
+// WithUnixLineBreaks instructs the converter to use unix line breaks ("\n" instead of "\r\n" default)
+func WithUnixLineBreaks() Option {
+	return func(o *options) {
+		o.lbr = UNIX_LBR
+	}
+}
+
+// WithLinksInnerText instructs the converter to retain link tag inner text and append href URLs in angle brackets after the text
+// Example: click news <http://bit.ly/2n4wXRs>
+func WithLinksInnerText() Option {
+	return func(o *options) {
+		o.linksInnerText = true
+	}
+}
+
+// WithListSupportPrefix formats <ul> and <li> lists with the specified prefix
+func WithListSupportPrefix(prefix string) Option {
+	return func(o *options) {
+		o.listPrefix = prefix
+	}
+}
+
+// WithListSupport formats <ul> and <li> lists with " - " prefix
+func WithListSupport() Option {
+	return WithListSupportPrefix(" - ")
+}
+
+func parseHTMLEntity(entName string) (string, bool) {
+	if r, ok := entity[entName]; ok {
+		return string(r), true
+	}
+
+	if match := numericEntityRE.FindStringSubmatch(entName); len(match) == 2 {
+		var (
+			err    error
+			n      int64
+			digits = match[1]
+		)
+
+		if digits != "" && (digits[0] == 'x' || digits[0] == 'X') {
+			n, err = strconv.ParseInt(digits[1:], 16, 64)
+		} else {
+			n, err = strconv.ParseInt(digits, 10, 64)
+		}
+
+		if err == nil && (n == 9 || n == 10 || n == 13 || n > 31) {
+			return string(rune(n)), true
+		}
+	}
+
+	return "", false
+}
+
+// SetUnixLbr with argument true sets Unix-style line-breaks in output ("\n")
+// with argument false sets Windows-style line-breaks in output ("\r\n", the default)
+// Deprecated: Please use HTML2TextWithOptions(text, WithUnixLineBreak())
+func SetUnixLbr(b bool) {
+	if b {
+		legacyLBR = UNIX_LBR
+	} else {
+		legacyLBR = WIN_LBR
+	}
+}
+
+// HTMLEntitiesToText decodes HTML entities inside a provided
+// string and returns decoded text
+func HTMLEntitiesToText(htmlEntsText string) string {
+	outBuf := bytes.NewBufferString("")
+	inEnt := false
+
+	for i, r := range htmlEntsText {
+		switch {
+		case r == ';' && inEnt:
+			inEnt = false
+			continue
+
+		case r == '&': //possible html entity
+			entName := ""
+			isEnt := false
+
+			// parse the entity name - max 10 chars
+			chars := 0
+			for _, er := range htmlEntsText[i+1:] {
+				if er == ';' {
+					isEnt = true
+					break
+				} else {
+					entName += string(er)
+				}
+
+				chars++
+				if chars == 10 {
+					break
+				}
+			}
+
+			if isEnt {
+				if ent, isEnt := parseHTMLEntity(entName); isEnt {
+					outBuf.WriteString(ent)
+					inEnt = true
+					continue
+				}
+			}
+		}
+
+		if !inEnt {
+			outBuf.WriteRune(r)
+		}
+	}
+
+	return outBuf.String()
+}
+
+func writeSpace(outBuf *bytes.Buffer) {
+	bts := outBuf.Bytes()
+	if len(bts) > 0 && bts[len(bts)-1] != ' ' {
+		outBuf.WriteString(" ")
+	}
+}
+
+// HTML2Text converts html into a text form
+func HTML2Text(html string) string {
+	var opts []Option
+	if legacyLBR == UNIX_LBR {
+		opts = append(opts, WithUnixLineBreaks())
+	}
+	return HTML2TextWithOptions(html, opts...)
+}
+
+// HTML2TextWithOptions converts html into a text form with additional options
+func HTML2TextWithOptions(html string, reqOpts ...Option) string {
+	opts := newOptions()
+	for _, opt := range reqOpts {
+		opt(opts)
+	}
+
+	inLen := len(html)
+	tagStart := 0
+	inEnt := false
+	badTagStackDepth := 0 // if == 1 it means we are inside <head>...</head>
+	shouldOutput := true
+	// maintain a stack of <a> tag href links and output it after the tag's inner text (for opts.linksInnerText only)
+	hrefs := []string{}
+	// new line cannot be printed at the beginning or
+	// for <p> after a new line created by previous <p></p>
+	canPrintNewline := false
+
+	outBuf := bytes.NewBufferString("")
+
+	for i, r := range html {
+		if inLen > 0 && i == inLen-1 {
+			// prevent new line at the end of the document
+			canPrintNewline = false
+		}
+
+		switch {
+		// skip new lines and spaces adding a single space if not there yet
+		case r <= 0xD, r == 0x85, r == 0x2028, r == 0x2029, // new lines
+			r == ' ', r >= 0x2008 && r <= 0x200B: // spaces
+			if shouldOutput && badTagStackDepth == 0 && !inEnt {
+				//outBuf.WriteString(fmt.Sprintf("{DBG r:%c, inEnt:%t, tag:%s}", r, inEnt, html[tagStart:i]))
+				writeSpace(outBuf)
+			}
+			continue
+
+		case r == ';' && inEnt: // end of html entity
+			inEnt = false
+			continue
+
+		case r == '&' && shouldOutput: // possible html entity
+			entName := ""
+			isEnt := false
+
+			// parse the entity name - max 10 chars
+			chars := 0
+			for _, er := range html[i+1:] {
+				if er == ';' {
+					isEnt = true
+					break
+				} else {
+					entName += string(er)
+				}
+
+				chars++
+				if chars == 10 {
+					break
+				}
+			}
+
+			if isEnt {
+				if ent, isEnt := parseHTMLEntity(entName); isEnt {
+					outBuf.WriteString(ent)
+					inEnt = true
+					continue
+				}
+			}
+
+		case r == '<': // start of a tag
+			tagStart = i + 1
+			shouldOutput = false
+			continue
+
+		case r == '>': // end of a tag
+			shouldOutput = true
+			tag := html[tagStart:i]
+			tagNameLowercase := strings.ToLower(tag)
+
+			if tagNameLowercase == "/ul" || tagNameLowercase == "/ol" {
+				outBuf.WriteString(opts.lbr)
+			} else if tagNameLowercase == "li" || tagNameLowercase == "li/" {
+				if opts.listPrefix != "" {
+					outBuf.WriteString(opts.lbr + opts.listPrefix)
+				} else {
+					outBuf.WriteString(opts.lbr)
+				}
+			} else if headersRE.MatchString(tagNameLowercase) {
+				if canPrintNewline {
+					outBuf.WriteString(opts.lbr + opts.lbr)
+				}
+				canPrintNewline = false
+			} else if tagNameLowercase == "br" || tagNameLowercase == "br/" {
+				// new line
+				outBuf.WriteString(opts.lbr)
+			} else if tagNameLowercase == "p" || tagNameLowercase == "/p" {
+				if canPrintNewline {
+					outBuf.WriteString(opts.lbr + opts.lbr)
+				}
+				canPrintNewline = false
+			} else if opts.linksInnerText && tagNameLowercase == "/a" {
+				// end of link
+				// links can be empty can happen if the link matches the badLinkHrefRE
+				if len(hrefs) > 0 {
+					outBuf.WriteString(" <")
+					outBuf.WriteString(HTMLEntitiesToText(hrefs[0]))
+					outBuf.WriteString(">")
+					hrefs = hrefs[1:]
+				}
+			} else if opts.linksInnerText && linkTagRE.MatchString(tagNameLowercase) {
+				// parse link href
+				// add special handling for a tags
+				m := linkTagRE.FindStringSubmatch(tag)
+				if len(m) == 5 {
+					link := m[2]
+					if len(link) == 0 {
+						link = m[3]
+						if len(link) == 0 {
+							link = m[4]
+						}
+					}
+
+					if opts.linksInnerText && !badLinkHrefRE.MatchString(link) {
+						hrefs = append(hrefs, link)
+					}
+				}
+			} else if badTagnamesRE.MatchString(tagNameLowercase) {
+				// unwanted block
+				badTagStackDepth++
+
+				// if link inner text preservation is not enabled
+				// and the current tag is a link tag, parse its href and output that
+				if !opts.linksInnerText {
+					// parse link href
+					m := linkTagRE.FindStringSubmatch(tag)
+					if len(m) == 5 {
+						link := m[2]
+						if len(link) == 0 {
+							link = m[3]
+							if len(link) == 0 {
+								link = m[4]
+							}
+						}
+
+						if !badLinkHrefRE.MatchString(link) {
+							outBuf.WriteString(HTMLEntitiesToText(link))
+						}
+					}
+				}
+			} else if len(tagNameLowercase) > 0 && tagNameLowercase[0] == '/' &&
+				badTagnamesRE.MatchString(tagNameLowercase[1:]) {
+				// end of unwanted block
+				badTagStackDepth--
+			}
+			continue
+
+		} // switch end
+
+		if shouldOutput && badTagStackDepth == 0 && !inEnt {
+			canPrintNewline = true
+			outBuf.WriteRune(r)
+		}
+	}
+
+	return outBuf.String()
+}