diff options
Diffstat (limited to 'internal')
| -rw-r--r-- | internal/processing/status/util.go | 9 | ||||
| -rw-r--r-- | internal/text/link.go | 115 | ||||
| -rw-r--r-- | internal/text/link_test.go | 155 | ||||
| -rw-r--r-- | internal/text/plain.go | 3 | 
4 files changed, 279 insertions, 3 deletions
diff --git a/internal/processing/status/util.go b/internal/processing/status/util.go index b4d115f8d..f85e05478 100644 --- a/internal/processing/status/util.go +++ b/internal/processing/status/util.go @@ -8,6 +8,7 @@ import (  	"github.com/superseriousbusiness/gotosocial/internal/db"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/id" +	"github.com/superseriousbusiness/gotosocial/internal/text"  	"github.com/superseriousbusiness/gotosocial/internal/util"  ) @@ -248,13 +249,15 @@ func (p *processor) processContent(form *apimodel.AdvancedStatusCreateForm, acco  		form.Format = apimodel.StatusFormatDefault  	} +	// remove any existing html from the status +	content := text.RemoveHTML(form.Status) +  	// parse content out of the status depending on what format has been submitted -	var content string  	switch form.Format {  	case apimodel.StatusFormatPlain: -		content = p.formatter.FromPlain(form.Status, status.GTSMentions, status.GTSTags) +		content = p.formatter.FromPlain(content, status.GTSMentions, status.GTSTags)  	case apimodel.StatusFormatMarkdown: -		content = p.formatter.FromMarkdown(form.Status, status.GTSMentions, status.GTSTags) +		content = p.formatter.FromMarkdown(content, status.GTSMentions, status.GTSTags)  	default:  		return fmt.Errorf("format %s not recognised as a valid status format", form.Format)  	} diff --git a/internal/text/link.go b/internal/text/link.go new file mode 100644 index 000000000..440571a83 --- /dev/null +++ b/internal/text/link.go @@ -0,0 +1,115 @@ +/* +   GoToSocial +   Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + +   This program is free software: you can redistribute it and/or modify +   it under the terms of the GNU Affero General Public License as published by +   the Free Software Foundation, either version 3 of the License, or +   (at your option) any later version. + +   This program is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +   GNU Affero General Public License for more details. + +   You should have received a copy of the GNU Affero General Public License +   along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( +	"fmt" +	"net/url" + +	"mvdan.cc/xurls/v2" +) + +// schemes is the regex for schemes we accept when looking for links. +// Basically, we accept https or http. +var schemes = `(((http|https))://)` + +// FindLinks parses the given string looking for recognizable URLs (including scheme). +// It returns a list of those URLs, without changing the string, or an error if something goes wrong. +// If no URLs are found within the given string, an empty slice and nil will be returned. +func FindLinks(in string) ([]*url.URL, error) { +	rxStrict, err := xurls.StrictMatchingScheme(schemes) +	if err != nil { +		return nil, err +	} + +	urls := []*url.URL{} + +	// bail already if we don't find anything +	found := rxStrict.FindAllString(in, -1) +	if len(found) == 0 { +		return urls, nil +	} + +	// for each string we find, we want to parse it into a URL if we can +	// if we fail to parse it, just ignore this match and continue +	for _, f := range found { +		u, err := url.Parse(f) +		if err != nil { +			continue +		} +		urls = append(urls, u) +	} + +	// deduplicate the URLs +	urlsDeduped := []*url.URL{} + +	for _, u := range urls { +		if !contains(urlsDeduped, u) { +			urlsDeduped = append(urlsDeduped, u) +		} +	} + +	return urlsDeduped, nil +} + +// contains checks if the given url is already within a slice of URLs +func contains(urls []*url.URL, url *url.URL) bool { +	for _, u := range urls { +		if u.String() == url.String() { +			return true +		} +	} +	return false +} + +// ReplaceLinks replaces all detected links in a piece of text with their HTML (href) equivalents. +// Note: because Go doesn't allow negative lookbehinds in regex, it's possible that an already-formatted +// href will end up double-formatted, if the text you pass here contains one or more hrefs already. +// To avoid this, you should sanitize any HTML out of text before you pass it into this function. +func ReplaceLinks(in string) string { +	rxStrict, err := xurls.StrictMatchingScheme(schemes) +	if err != nil { +		panic(err) +	} + +	replaced := rxStrict.ReplaceAllStringFunc(in, func(urlString string) string { +		thisURL, err := url.Parse(urlString) +		if err != nil { +			return urlString // we can't parse it as a URL so don't replace it +		} + +		shortString := thisURL.Hostname() + +		if thisURL.Path != "" { +			shortString = shortString + thisURL.Path +		} + +		if thisURL.Fragment != "" { +			shortString = shortString + "#" + thisURL.Fragment +		} + +		if thisURL.RawQuery != "" { +			shortString = shortString + "?" + thisURL.RawQuery +		} + +		replacement := fmt.Sprintf(`<a href="%s" rel="noopener">%s</a>`, urlString, shortString) +		return replacement +	}) +	return replaced +} diff --git a/internal/text/link_test.go b/internal/text/link_test.go new file mode 100644 index 000000000..636f26f7f --- /dev/null +++ b/internal/text/link_test.go @@ -0,0 +1,155 @@ +/* +   GoToSocial +   Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + +   This program is free software: you can redistribute it and/or modify +   it under the terms of the GNU Affero General Public License as published by +   the Free Software Foundation, either version 3 of the License, or +   (at your option) any later version. + +   This program is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +   GNU Affero General Public License for more details. + +   You should have received a copy of the GNU Affero General Public License +   along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +package text_test + +import ( +	"testing" + +	"github.com/stretchr/testify/assert" +	"github.com/stretchr/testify/suite" +	"github.com/superseriousbusiness/gotosocial/internal/text" +) + +const text1 = ` +This is a text with some links in it. Here's link number one: https://example.org/link/to/something#fragment + +Here's link number two: http://test.example.org?q=bahhhhhhhhhhhh + +https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it + +really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme + +https://example.orghttps://google.com <-- this shouldn't work either, but it does?! OK +` + +const text2 = ` +this is one link: https://example.org + +this is the same link again: https://example.org + +these should be deduplicated +` + +const text3 = ` +here's a mailto link: mailto:whatever@test.org +` + +const text4 = ` +two similar links: + +https://example.org + +https://example.org/test +` + +const text5 = ` +what happens when we already have a link within an href? + +<a href="https://example.org">https://example.org</a> +` + +type TextTestSuite struct { +	suite.Suite +} + +func (suite *TextTestSuite) TestParseURLsFromText1() { +	urls, err := text.FindLinks(text1) + +	assert.NoError(suite.T(), err) + +	assert.Equal(suite.T(), "https://example.org/link/to/something#fragment", urls[0].String()) +	assert.Equal(suite.T(), "http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String()) +	assert.Equal(suite.T(), "https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String()) +	assert.Equal(suite.T(), "https://example.orghttps://google.com", urls[3].String()) +} + +func (suite *TextTestSuite) TestParseURLsFromText2() { +	urls, err := text.FindLinks(text2) +	assert.NoError(suite.T(), err) + +	// assert length 1 because the found links will be deduplicated +	assert.Len(suite.T(), urls, 1) +} + +func (suite *TextTestSuite) TestParseURLsFromText3() { +	urls, err := text.FindLinks(text3) +	assert.NoError(suite.T(), err) + +	// assert length 0 because `mailto:` isn't accepted +	assert.Len(suite.T(), urls, 0) +} + +func (suite *TextTestSuite) TestReplaceLinksFromText1() { +	replaced := text.ReplaceLinks(text1) +	assert.Equal(suite.T(), ` +This is a text with some links in it. Here's link number one: <a href="https://example.org/link/to/something#fragment" rel="noopener">example.org/link/to/something#fragment</a> + +Here's link number two: <a href="http://test.example.org?q=bahhhhhhhhhhhh" rel="noopener">test.example.org?q=bahhhhhhhhhhhh</a> + +<a href="https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it" rel="noopener">another.link.example.org/with/a/pretty/long/path/at/the/end/of/it</a> + +really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme + +<a href="https://example.orghttps://google.com" rel="noopener">example.orghttps//google.com</a> <-- this shouldn't work either, but it does?! OK +`, replaced) +} + +func (suite *TextTestSuite) TestReplaceLinksFromText2() { +	replaced := text.ReplaceLinks(text2) +	assert.Equal(suite.T(), ` +this is one link: <a href="https://example.org" rel="noopener">example.org</a> + +this is the same link again: <a href="https://example.org" rel="noopener">example.org</a> + +these should be deduplicated +`, replaced) +} + +func (suite *TextTestSuite) TestReplaceLinksFromText3() { +	// we know mailto links won't be replaced with hrefs -- we only accept https and http +	replaced := text.ReplaceLinks(text3) +	assert.Equal(suite.T(), ` +here's a mailto link: mailto:whatever@test.org +`, replaced) +} + +func (suite *TextTestSuite) TestReplaceLinksFromText4() { +	replaced := text.ReplaceLinks(text4) +	assert.Equal(suite.T(), ` +two similar links: + +<a href="https://example.org" rel="noopener">example.org</a> + +<a href="https://example.org/test" rel="noopener">example.org/test</a> +`, replaced) +} + +func (suite *TextTestSuite) TestReplaceLinksFromText5() { +	// we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function +	replaced := text.ReplaceLinks(text5) +	assert.Equal(suite.T(), ` +what happens when we already have a link within an href? + +<a href="<a href="https://example.org" rel="noopener">example.org</a>"><a href="https://example.org" rel="noopener">example.org</a></a> +`, replaced) +} + +func TestTextTestSuite(t *testing.T) { +	suite.Run(t, new(TextTestSuite)) +} diff --git a/internal/text/plain.go b/internal/text/plain.go index 24ef16f8e..4f6659484 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -28,6 +28,9 @@ import (  func (f *formatter) FromPlain(plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string {  	content := preformat(plain) +	// format links nicely +	content = ReplaceLinks(content) +  	// format mentions nicely  	for _, menchie := range mentions {  		targetAccount := >smodel.Account{}  | 
