diff options
Diffstat (limited to 'internal/regexes/regexes.go')
-rw-r--r-- | internal/regexes/regexes.go | 36 |
1 files changed, 34 insertions, 2 deletions
diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go index 9302b544d..f05f9b391 100644 --- a/internal/regexes/regexes.go +++ b/internal/regexes/regexes.go @@ -19,8 +19,12 @@ package regexes import ( + "bytes" "fmt" "regexp" + "sync" + + "mvdan.cc/xurls/v2" ) const ( @@ -47,6 +51,16 @@ const ( ) var ( + schemes = `(http|https)://` + // LinkScheme captures http/https schemes in URLs. + LinkScheme = func() *regexp.Regexp { + rgx, err := xurls.StrictMatchingScheme(schemes) + if err != nil { + panic(err) + } + return rgx + }() + mentionName = `^@(\w+)(?:@([a-zA-Z0-9_\-\.:]+))?$` // MentionName captures the username and domain part from a mention string // such as @whatever_user@example.org, returning whatever_user and example.org (without the @ symbols) @@ -58,7 +72,7 @@ var ( MentionFinder = regexp.MustCompile(mentionFinder) // hashtag regex can be played with here: https://regex101.com/r/bPxeca/1 - hashtagFinder = fmt.Sprintf(`(?:^|\n|\s)(#[a-zA-Z0-9]{1,%d})(?:\b)`, maximumHashtagLength) + hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[a-zA-Z0-9]{1,%d})(?:#|\b)`, maximumHashtagLength) // HashtagFinder finds possible hashtags in a string. // It returns just the string part of the hashtag, not the # symbol. HashtagFinder = regexp.MustCompile(hashtagFinder) @@ -68,7 +82,7 @@ var ( EmojiShortcode = regexp.MustCompile(fmt.Sprintf("^%s$", emojiShortcode)) // emoji regex can be played with here: https://regex101.com/r/478XGM/1 - emojiFinderString = fmt.Sprintf(`(?:\B)?:(%s):(?:\B)?`, emojiShortcode) + emojiFinderString = fmt.Sprintf(`(?:\b)?:(%s):(?:\b)?`, emojiShortcode) // EmojiFinder extracts emoji strings from a piece of text. EmojiFinder = regexp.MustCompile(emojiFinderString) @@ -134,3 +148,21 @@ var ( // from eg /users/example_username/blocks/01F7XT5JZW1WMVSW1KADS8PVDH BlockPath = regexp.MustCompile(blockPath) ) + +// bufpool is a memory pool of byte buffers for use in our regex utility functions. +var bufpool = sync.Pool{ + New: func() any { + buf := bytes.NewBuffer(make([]byte, 0, 512)) + return buf + }, +} + +// ReplaceAllStringFunc will call through to .ReplaceAllStringFunc in the provided regex, but provide you a clean byte buffer for optimized string writes. +func ReplaceAllStringFunc(rgx *regexp.Regexp, src string, repl func(match string, buf *bytes.Buffer) string) string { + buf := bufpool.Get().(*bytes.Buffer) //nolint + defer bufpool.Put(buf) + return rgx.ReplaceAllStringFunc(src, func(match string) string { + buf.Reset() // reset use + return repl(match, buf) + }) +} |