summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Daenney <daenney@users.noreply.github.com>2025-03-24 14:13:32 +0100
committerLibravatar GitHub <noreply@github.com>2025-03-24 14:13:32 +0100
commit1bf40e755c897dfedcfa1b54a5598475b99099d4 (patch)
tree2589cd2e5db9e73b67fdedab83355ca1d520bf77
parent[feature] Parse funkwhale `Album` as Statusable to allow barebones interactin... (diff)
downloadgotosocial-1bf40e755c897dfedcfa1b54a5598475b99099d4.tar.xz
feat: Relax URL matching (#3925)
* feat: Relax URL matching Instead of only linkifying things with an explicit http or https scheme, the xurls.Relaxed also matches links with known TLDs. This means that text like 'banana.com' will also be matched, despite the missing http/https scheme. This also works to linkify email addresses, which is handy. This should also ensure we catch links without a scheme for the purpose of spam checking.
-rw-r--r--internal/filter/spam/statusable.go2
-rw-r--r--internal/regexes/regexes.go14
-rw-r--r--internal/text/markdown.go2
-rw-r--r--internal/text/plain.go2
4 files changed, 7 insertions, 13 deletions
diff --git a/internal/filter/spam/statusable.go b/internal/filter/spam/statusable.go
index 60598f920..3e9e51697 100644
--- a/internal/filter/spam/statusable.go
+++ b/internal/filter/spam/statusable.go
@@ -375,7 +375,7 @@ func (f *Filter) errantLinks(
}
// Find + parse every http/https link in the status.
- rawLinks := regexes.LinkScheme.FindAllString(concat, -1)
+ rawLinks := regexes.URLLike.FindAllString(concat, -1)
links := make([]preppedLink, 0, len(rawLinks))
for _, rawLink := range rawLinks {
linkURI, err := url.Parse(rawLink)
diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go
index 515f69a12..a94387e47 100644
--- a/internal/regexes/regexes.go
+++ b/internal/regexes/regexes.go
@@ -22,7 +22,7 @@ import (
"regexp"
"sync"
- "mvdan.cc/xurls/v2"
+ xurls "mvdan.cc/xurls/v2"
)
const (
@@ -40,7 +40,6 @@ const (
reports = "reports"
accepts = "accepts"
- schemes = `(http|https)://` // Allowed URI protocols for parsing links in text.
alphaNumeric = `\p{L}\p{M}*|\p{N}` // A single number or script character in any language, including chars with accents.
usernameGrp = `(?:` + alphaNumeric + `|\.|\-|\_)` // Non-capturing group that matches against a single valid username character.
domainGrp = `(?:` + alphaNumeric + `|\.|\-|\:)` // Non-capturing group that matches against a single valid domain character.
@@ -79,14 +78,9 @@ const (
)
var (
- // LinkScheme captures http/https schemes in URLs.
- LinkScheme = func() *regexp.Regexp {
- rgx, err := xurls.StrictMatchingScheme(schemes)
- if err != nil {
- panic(err)
- }
- return rgx
- }()
+ // URLLike captures anything that resembles a URL. This includes URLs
+ // with or without a scheme, and emails.
+ URLLike = xurls.Relaxed()
// MentionName captures the username and domain part from
// a mention string such as @whatever_user@example.org,
diff --git a/internal/text/markdown.go b/internal/text/markdown.go
index 7e75f2898..163996d77 100644
--- a/internal/text/markdown.go
+++ b/internal/text/markdown.go
@@ -139,7 +139,7 @@ func (f *Formatter) fromMarkdown(
},
// Turns URLs into links.
extension.NewLinkify(
- extension.WithLinkifyURLRegexp(regexes.LinkScheme),
+ extension.WithLinkifyURLRegexp(regexes.URLLike),
),
extension.Strikethrough,
),
diff --git a/internal/text/plain.go b/internal/text/plain.go
index ee4947bf7..17e2800ec 100644
--- a/internal/text/plain.go
+++ b/internal/text/plain.go
@@ -168,7 +168,7 @@ func (f *Formatter) fromPlain(
},
// Turns URLs into links.
extension.NewLinkify(
- extension.WithLinkifyURLRegexp(regexes.LinkScheme),
+ extension.WithLinkifyURLRegexp(regexes.URLLike),
),
),
)