diff options
author | 2022-05-07 16:55:27 +0100 | |
---|---|---|
committer | 2022-05-07 17:55:27 +0200 | |
commit | 26b74aefaf5d2a3cd26bd57652fe96a6a20ed034 (patch) | |
tree | db316febba8e0ada7a9360b059011dcc7ea138a3 /internal/text/link.go | |
parent | [performance] improved logrus output switching performance (#544) (diff) | |
download | gotosocial-26b74aefaf5d2a3cd26bd57652fe96a6a20ed034.tar.xz |
[bugfix] Fix existing bio text showing as HTML (#531)
* fix existing bio text showing as HTML
- updated replaced mentions to include instance
- strips HTML from account source note in Verify handler
- update text formatter to use buffers for string writes
Signed-off-by: kim <grufwub@gmail.com>
* go away linter
Signed-off-by: kim <grufwub@gmail.com>
* change buf reset location, change html mention tags
Signed-off-by: kim <grufwub@gmail.com>
* reduce FindLinks code complexity
Signed-off-by: kim <grufwub@gmail.com>
* fix HTML to text conversion
Signed-off-by: kim <grufwub@gmail.com>
* Update internal/regexes/regexes.go
Co-authored-by: Mina Galić <mina.galic@puppet.com>
* use improved html2text lib with more options
Signed-off-by: kim <grufwub@gmail.com>
* fix to produce actual plaintext from html
Signed-off-by: kim <grufwub@gmail.com>
* fix span tags instead written as space
Signed-off-by: kim <grufwub@gmail.com>
* performance improvements to regex replacements, fix link replace logic for un-html-ing in the future
Signed-off-by: kim <grufwub@gmail.com>
* fix tag/mention replacements to use input string, fix link replace to not include scheme
Signed-off-by: kim <grufwub@gmail.com>
* use matched input string for link replace href text
Signed-off-by: kim <grufwub@gmail.com>
* remove unused code (to appease linter :sobs:)
Signed-off-by: kim <grufwub@gmail.com>
* improve hashtagFinger regex to be more compliant
Signed-off-by: kim <grufwub@gmail.com>
* update breakReplacer to include both unix and windows line endings
Signed-off-by: kim <grufwub@gmail.com>
* add NoteRaw field to Account to store plaintext account bio, add migration for this, set for sensitive accounts
Signed-off-by: kim <grufwub@gmail.com>
* drop unnecessary code
Signed-off-by: kim <grufwub@gmail.com>
* update text package tests to fix logic changes
Signed-off-by: kim <grufwub@gmail.com>
* add raw note content testing to account update and account verify
Signed-off-by: kim <grufwub@gmail.com>
* remove unused modules
Signed-off-by: kim <grufwub@gmail.com>
* fix emoji regex
Signed-off-by: kim <grufwub@gmail.com>
* fix replacement of hashtags
Signed-off-by: kim <grufwub@gmail.com>
* update code comment
Signed-off-by: kim <grufwub@gmail.com>
Co-authored-by: Mina Galić <mina.galic@puppet.com>
Diffstat (limited to 'internal/text/link.go')
-rw-r--r-- | internal/text/link.go | 84 |
1 files changed, 27 insertions, 57 deletions
diff --git a/internal/text/link.go b/internal/text/link.go index d8d83df6d..f72c451f2 100644 --- a/internal/text/link.go +++ b/internal/text/link.go @@ -19,34 +19,28 @@ package text import ( + "bytes" "context" - "fmt" "net/url" + "strings" - "mvdan.cc/xurls/v2" + "github.com/superseriousbusiness/gotosocial/internal/regexes" ) -// schemes is the regex for schemes we accept when looking for links. -// Basically, we accept https or http. -var schemes = `(((http|https))://)` - // FindLinks parses the given string looking for recognizable URLs (including scheme). // It returns a list of those URLs, without changing the string, or an error if something goes wrong. // If no URLs are found within the given string, an empty slice and nil will be returned. -func FindLinks(in string) ([]*url.URL, error) { - rxStrict, err := xurls.StrictMatchingScheme(schemes) - if err != nil { - return nil, err - } - - urls := []*url.URL{} +func FindLinks(in string) []*url.URL { + var urls []*url.URL // bail already if we don't find anything - found := rxStrict.FindAllString(in, -1) + found := regexes.LinkScheme.FindAllString(in, -1) if len(found) == 0 { - return urls, nil + return nil } + urlmap := map[string]struct{}{} + // for each string we find, we want to parse it into a URL if we can // if we fail to parse it, just ignore this match and continue for _, f := range found { @@ -54,29 +48,18 @@ func FindLinks(in string) ([]*url.URL, error) { if err != nil { continue } - urls = append(urls, u) - } - // deduplicate the URLs - urlsDeduped := []*url.URL{} + // Calculate string + ustr := u.String() - for _, u := range urls { - if !contains(urlsDeduped, u) { - urlsDeduped = append(urlsDeduped, u) + if _, ok := urlmap[ustr]; !ok { + // Has not been encountered yet + urls = append(urls, u) + urlmap[ustr] = struct{}{} } } - return urlsDeduped, nil -} - -// contains checks if the given url is already within a slice of URLs -func contains(urls []*url.URL, url *url.URL) bool { - for _, u := range urls { - if u.String() == url.String() { - return true - } - } - return false + return urls } // ReplaceLinks replaces all detected links in a piece of text with their HTML (href) equivalents. @@ -84,33 +67,20 @@ func contains(urls []*url.URL, url *url.URL) bool { // href will end up double-formatted, if the text you pass here contains one or more hrefs already. // To avoid this, you should sanitize any HTML out of text before you pass it into this function. func (f *formatter) ReplaceLinks(ctx context.Context, in string) string { - rxStrict, err := xurls.StrictMatchingScheme(schemes) - if err != nil { - panic(err) - } - - replaced := rxStrict.ReplaceAllStringFunc(in, func(urlString string) string { + return regexes.ReplaceAllStringFunc(regexes.LinkScheme, in, func(urlString string, buf *bytes.Buffer) string { thisURL, err := url.Parse(urlString) if err != nil { return urlString // we can't parse it as a URL so don't replace it } - - shortString := thisURL.Hostname() - - if thisURL.Path != "" { - shortString += thisURL.Path - } - - if thisURL.Fragment != "" { - shortString = shortString + "#" + thisURL.Fragment - } - - if thisURL.RawQuery != "" { - shortString = shortString + "?" + thisURL.RawQuery - } - - replacement := fmt.Sprintf(`<a href="%s" rel="noopener">%s</a>`, urlString, shortString) - return replacement + // <a href="thisURL.String()" rel="noopener">urlString</a> + urlString = thisURL.String() + buf.WriteString(`<a href="`) + buf.WriteString(thisURL.String()) + buf.WriteString(`" rel="noopener">`) + urlString = strings.TrimPrefix(urlString, thisURL.Scheme) + urlString = strings.TrimPrefix(urlString, "://") + buf.WriteString(urlString) + buf.WriteString(`</a>`) + return buf.String() }) - return replaced } |