From efd1a4f717afa83d3d3609f0d70e4da151a8dc9b Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Mon, 16 Sep 2024 14:00:23 +0200 Subject: [bugfix] Use better plaintext representation of status for filtering (#3301) * [bugfix] Use better plaintext representation of status for filtering * add new deps to readme * lint * update tests * update regexes * address review comments * remove now unused xxhash * whoops, wrong logger * Merge branch 'main' into status_filtering_bugfix * put cache in caches struct * pain --- vendor/github.com/k3a/html2text/html2text.go | 333 +++++++++++++++++++++++++++ 1 file changed, 333 insertions(+) create mode 100644 vendor/github.com/k3a/html2text/html2text.go (limited to 'vendor/github.com/k3a/html2text/html2text.go') diff --git a/vendor/github.com/k3a/html2text/html2text.go b/vendor/github.com/k3a/html2text/html2text.go new file mode 100644 index 000000000..f79fbe395 --- /dev/null +++ b/vendor/github.com/k3a/html2text/html2text.go @@ -0,0 +1,333 @@ +package html2text + +import ( + "bytes" + "regexp" + "strconv" + "strings" +) + +// Line break constants +// Deprecated: Please use HTML2TextWithOptions(text, WithUnixLineBreak()) +const ( + WIN_LBR = "\r\n" + UNIX_LBR = "\n" +) + +var legacyLBR = WIN_LBR +var badTagnamesRE = regexp.MustCompile(`^(head|script|style|a)($|\s+)`) +var linkTagRE = regexp.MustCompile(`^(?i:a)(?:$|\s).*(?i:href)\s*=\s*('([^']*?)'|"([^"]*?)"|([^\s"'` + "`" + `=<>]+))`) +var badLinkHrefRE = regexp.MustCompile(`javascript:`) +var headersRE = regexp.MustCompile(`^(\/)?h[1-6]`) +var numericEntityRE = regexp.MustCompile(`(?i)^#(x?[a-f0-9]+)$`) + +type options struct { + lbr string + linksInnerText bool + listPrefix string +} + +func newOptions() *options { + // apply defaults + return &options{ + lbr: WIN_LBR, + } +} + +// Option is a functional option +type Option func(*options) + +// WithUnixLineBreaks instructs the converter to use unix line breaks ("\n" instead of "\r\n" default) +func WithUnixLineBreaks() Option { + return func(o *options) { + o.lbr = UNIX_LBR + } +} + +// WithLinksInnerText instructs the converter to retain link tag inner text and append href URLs in angle brackets after the text +// Example: click news +func WithLinksInnerText() Option { + return func(o *options) { + o.linksInnerText = true + } +} + +// WithListSupportPrefix formats