From ea8ad8b346978b04b067eead8e1f2bbc3c1bfb45 Mon Sep 17 00:00:00 2001 From: Tobi Smethurst <31960611+tsmethurst@users.noreply.github.com> Date: Wed, 28 Jul 2021 11:42:26 +0200 Subject: Link parsing (#120) * add link parsing + formatting functionality * refinement + docs * add missing test * credit url library --- internal/text/link_test.go | 155 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 internal/text/link_test.go (limited to 'internal/text/link_test.go') diff --git a/internal/text/link_test.go b/internal/text/link_test.go new file mode 100644 index 000000000..636f26f7f --- /dev/null +++ b/internal/text/link_test.go @@ -0,0 +1,155 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package text_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/text" +) + +const text1 = ` +This is a text with some links in it. Here's link number one: https://example.org/link/to/something#fragment + +Here's link number two: http://test.example.org?q=bahhhhhhhhhhhh + +https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it + +really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme + +https://example.orghttps://google.com <-- this shouldn't work either, but it does?! OK +` + +const text2 = ` +this is one link: https://example.org + +this is the same link again: https://example.org + +these should be deduplicated +` + +const text3 = ` +here's a mailto link: mailto:whatever@test.org +` + +const text4 = ` +two similar links: + +https://example.org + +https://example.org/test +` + +const text5 = ` +what happens when we already have a link within an href? + +https://example.org +` + +type TextTestSuite struct { + suite.Suite +} + +func (suite *TextTestSuite) TestParseURLsFromText1() { + urls, err := text.FindLinks(text1) + + assert.NoError(suite.T(), err) + + assert.Equal(suite.T(), "https://example.org/link/to/something#fragment", urls[0].String()) + assert.Equal(suite.T(), "http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String()) + assert.Equal(suite.T(), "https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String()) + assert.Equal(suite.T(), "https://example.orghttps://google.com", urls[3].String()) +} + +func (suite *TextTestSuite) TestParseURLsFromText2() { + urls, err := text.FindLinks(text2) + assert.NoError(suite.T(), err) + + // assert length 1 because the found links will be deduplicated + assert.Len(suite.T(), urls, 1) +} + +func (suite *TextTestSuite) TestParseURLsFromText3() { + urls, err := text.FindLinks(text3) + assert.NoError(suite.T(), err) + + // assert length 0 because `mailto:` isn't accepted + assert.Len(suite.T(), urls, 0) +} + +func (suite *TextTestSuite) TestReplaceLinksFromText1() { + replaced := text.ReplaceLinks(text1) + assert.Equal(suite.T(), ` +This is a text with some links in it. Here's link number one: example.org/link/to/something#fragment + +Here's link number two: test.example.org?q=bahhhhhhhhhhhh + +another.link.example.org/with/a/pretty/long/path/at/the/end/of/it + +really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme + +example.orghttps//google.com <-- this shouldn't work either, but it does?! OK +`, replaced) +} + +func (suite *TextTestSuite) TestReplaceLinksFromText2() { + replaced := text.ReplaceLinks(text2) + assert.Equal(suite.T(), ` +this is one link: example.org + +this is the same link again: example.org + +these should be deduplicated +`, replaced) +} + +func (suite *TextTestSuite) TestReplaceLinksFromText3() { + // we know mailto links won't be replaced with hrefs -- we only accept https and http + replaced := text.ReplaceLinks(text3) + assert.Equal(suite.T(), ` +here's a mailto link: mailto:whatever@test.org +`, replaced) +} + +func (suite *TextTestSuite) TestReplaceLinksFromText4() { + replaced := text.ReplaceLinks(text4) + assert.Equal(suite.T(), ` +two similar links: + +example.org + +example.org/test +`, replaced) +} + +func (suite *TextTestSuite) TestReplaceLinksFromText5() { + // we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function + replaced := text.ReplaceLinks(text5) + assert.Equal(suite.T(), ` +what happens when we already have a link within an href? + +example.org">example.org +`, replaced) +} + +func TestTextTestSuite(t *testing.T) { + suite.Run(t, new(TextTestSuite)) +} -- cgit v1.2.3