diff options
Diffstat (limited to 'internal')
26 files changed, 821 insertions, 1309 deletions
diff --git a/internal/api/client/statuses/statuscreate_test.go b/internal/api/client/statuses/statuscreate_test.go index ab45af351..6802558ec 100644 --- a/internal/api/client/statuses/statuscreate_test.go +++ b/internal/api/client/statuses/statuscreate_test.go @@ -219,7 +219,7 @@ func (suite *StatusCreateTestSuite) TestPostAnotherNewStatus() {  	err = json.Unmarshal(b, statusReply)  	suite.NoError(err) -	suite.Equal("<p><a href=\"http://localhost:8080/tags/test\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>test</span></a> alright, should be able to post <a href=\"http://localhost:8080/tags/links\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>links</span></a> with fragments in them now, let's see........<br/><br/><a href=\"https://docs.gotosocial.org/en/latest/user_guide/posts/#links\" rel=\"noopener nofollow noreferrer\" target=\"_blank\">docs.gotosocial.org/en/latest/user_guide/posts/#links</a><br/><br/><a href=\"http://localhost:8080/tags/gotosocial\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>gotosocial</span></a><br/><br/>(tobi remember to pull the docker image challenge)</p>", statusReply.Content) +	suite.Equal("<p><a href=\"http://localhost:8080/tags/test\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>test</span></a> alright, should be able to post <a href=\"http://localhost:8080/tags/links\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>links</span></a> with fragments in them now, let's see........<br><br><a href=\"https://docs.gotosocial.org/en/latest/user_guide/posts/#links\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://docs.gotosocial.org/en/latest/user_guide/posts/#links</a><br><br><a href=\"http://localhost:8080/tags/gotosocial\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>gotosocial</span></a><br><br>(tobi remember to pull the docker image challenge)</p>", statusReply.Content)  }  func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() { @@ -252,7 +252,7 @@ func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() {  	suite.NoError(err)  	suite.Equal("", statusReply.SpoilerText) -	suite.Equal("<p>here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow: <br/> here's an emoji that isn't in the db: :test_emoji:</p>", statusReply.Content) +	suite.Equal("<p>here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow:<br>here's an emoji that isn't in the db: :test_emoji:</p>", statusReply.Content)  	suite.Len(statusReply.Emojis, 1)  	apiEmoji := statusReply.Emojis[0] @@ -371,7 +371,7 @@ func (suite *StatusCreateTestSuite) TestAttachNewMediaSuccess() {  	suite.NoError(err)  	suite.Equal("", statusResponse.SpoilerText) -	suite.Equal("<p>here's an image attachment</p>", statusResponse.Content) +	suite.Equal("<p>here's an image attachment</p>", statusResponse.Content)  	suite.False(statusResponse.Sensitive)  	suite.Equal(apimodel.VisibilityPublic, statusResponse.Visibility) diff --git a/internal/db/bundb/bundb.go b/internal/db/bundb/bundb.go index 0ab1d1b83..2f7a8a022 100644 --- a/internal/db/bundb/bundb.go +++ b/internal/db/bundb/bundb.go @@ -473,43 +473,40 @@ func sqlitePragmas(ctx context.Context, conn *DBConn) error {  	CONVERSION FUNCTIONS  */ -func (dbService *DBService) TagStringsToTags(ctx context.Context, tags []string, originAccountID string) ([]*gtsmodel.Tag, error) { +func (dbService *DBService) TagStringToTag(ctx context.Context, t string, originAccountID string) (*gtsmodel.Tag, error) {  	protocol := config.GetProtocol()  	host := config.GetHost() +	now := time.Now() -	newTags := []*gtsmodel.Tag{} -	for _, t := range tags { -		tag := >smodel.Tag{} -		// we can use selectorinsert here to create the new tag if it doesn't exist already -		// inserted will be true if this is a new tag we just created -		if err := dbService.conn.NewSelect().Model(tag).Where("LOWER(?) = LOWER(?)", bun.Ident("name"), t).Scan(ctx); err != nil { -			if err == sql.ErrNoRows { -				// tag doesn't exist yet so populate it -				newID, err := id.NewRandomULID() -				if err != nil { -					return nil, err -				} -				tag.ID = newID -				tag.URL = fmt.Sprintf("%s://%s/tags/%s", protocol, host, t) -				tag.Name = t -				tag.FirstSeenFromAccountID = originAccountID -				tag.CreatedAt = time.Now() -				tag.UpdatedAt = time.Now() -				useable := true -				tag.Useable = &useable -				listable := true -				tag.Listable = &listable -			} else { -				return nil, fmt.Errorf("error getting tag with name %s: %s", t, err) -			} -		} +	tag := >smodel.Tag{} +	// we can use selectorinsert here to create the new tag if it doesn't exist already +	// inserted will be true if this is a new tag we just created +	if err := dbService.conn.NewSelect().Model(tag).Where("LOWER(?) = LOWER(?)", bun.Ident("name"), t).Scan(ctx); err != nil && err != sql.ErrNoRows { +		return nil, fmt.Errorf("error getting tag with name %s: %s", t, err) +	} -		// bail already if the tag isn't useable -		if !*tag.Useable { -			continue +	if tag.ID == "" { +		// tag doesn't exist yet so populate it +		newID, err := id.NewRandomULID() +		if err != nil { +			return nil, err  		} -		tag.LastStatusAt = time.Now() -		newTags = append(newTags, tag) +		tag.ID = newID +		tag.URL = protocol + "://" + host + "/tags/" + t +		tag.Name = t +		tag.FirstSeenFromAccountID = originAccountID +		tag.CreatedAt = now +		tag.UpdatedAt = now +		useable := true +		tag.Useable = &useable +		listable := true +		tag.Listable = &listable +	} + +	// bail already if the tag isn't useable +	if !*tag.Useable { +		return nil, fmt.Errorf("tag %s is not useable", t)  	} -	return newTags, nil +	tag.LastStatusAt = now +	return tag, nil  } diff --git a/internal/db/db.go b/internal/db/db.go index aa1929da9..b66b21141 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -52,12 +52,12 @@ type DB interface {  		USEFUL CONVERSION FUNCTIONS  	*/ -	// TagStringsToTags takes a slice of deduplicated, lowercase tags in the form "somehashtag", which have been +	// TagStringToTag takes a lowercase tag in the form "somehashtag", which has been  	// used in a status. It takes the id of the account that wrote the status, and the id of the status itself, and then -	// returns a slice of *apimodel.Tag corresponding to the given tags. If the tag already exists in database, that tag +	// returns an *apimodel.Tag corresponding to the given tags. If the tag already exists in database, that tag  	// will be returned. Otherwise a pointer to a new tag struct will be created and returned.  	// -	// Note: this func doesn't/shouldn't do any manipulation of the tags in the DB, it's just for checking +	// Note: this func doesn't/shouldn't do any manipulation of tags in the DB, it's just for checking  	// if they exist in the db already, and conveniently returning them, or creating new tag structs. -	TagStringsToTags(ctx context.Context, tags []string, originAccountID string) ([]*gtsmodel.Tag, error) +	TagStringToTag(ctx context.Context, tag string, originAccountID string) (*gtsmodel.Tag, error)  } diff --git a/internal/processing/account/update.go b/internal/processing/account/update.go index 055d1f0e4..c7939034e 100644 --- a/internal/processing/account/update.go +++ b/internal/processing/account/update.go @@ -27,14 +27,12 @@ import (  	"github.com/superseriousbusiness/gotosocial/internal/ap"  	apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"  	"github.com/superseriousbusiness/gotosocial/internal/config" -	"github.com/superseriousbusiness/gotosocial/internal/db"  	"github.com/superseriousbusiness/gotosocial/internal/gtserror"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/log"  	"github.com/superseriousbusiness/gotosocial/internal/media"  	"github.com/superseriousbusiness/gotosocial/internal/messages"  	"github.com/superseriousbusiness/gotosocial/internal/text" -	"github.com/superseriousbusiness/gotosocial/internal/util"  	"github.com/superseriousbusiness/gotosocial/internal/validate"  ) @@ -47,14 +45,20 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, form  		account.Bot = form.Bot  	} -	var updateEmojis bool +	account.Emojis = []*gtsmodel.Emoji{} +	account.EmojiIDs = []string{}  	if form.DisplayName != nil {  		if err := validate.DisplayName(*form.DisplayName); err != nil {  			return nil, gtserror.NewErrorBadRequest(err)  		}  		account.DisplayName = text.SanitizePlaintext(*form.DisplayName) -		updateEmojis = true + +		formatResult := p.formatter.FromPlainEmojiOnly(ctx, p.parseMention, account.ID, "", account.DisplayName) +		for _, emoji := range formatResult.Emojis { +			account.Emojis = append(account.Emojis, emoji) +			account.EmojiIDs = append(account.EmojiIDs, emoji.ID) +		}  	}  	if form.Note != nil { @@ -66,36 +70,19 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, form  		account.NoteRaw = *form.Note  		// Process note to generate a valid HTML representation -		note, err := p.processNote(ctx, *form.Note, account) -		if err != nil { -			return nil, gtserror.NewErrorBadRequest(err) +		var f text.FormatFunc +		if account.StatusFormat == "markdown" { +			f = p.formatter.FromMarkdown +		} else { +			f = p.formatter.FromPlain  		} +		formatted := f(ctx, p.parseMention, account.ID, "", *form.Note)  		// Set updated HTML-ified note -		account.Note = note -		updateEmojis = true -	} - -	if updateEmojis { -		// account emojis -- treat the sanitized display name and raw -		// note like one long text for the purposes of deriving emojis -		accountEmojiShortcodes := util.DeriveEmojisFromText(account.DisplayName + "\n\n" + account.NoteRaw) -		account.Emojis = make([]*gtsmodel.Emoji, 0, len(accountEmojiShortcodes)) -		account.EmojiIDs = make([]string, 0, len(accountEmojiShortcodes)) - -		for _, shortcode := range accountEmojiShortcodes { -			emoji, err := p.db.GetEmojiByShortcodeDomain(ctx, shortcode, "") -			if err != nil { -				if err != db.ErrNoEntries { -					log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err) -				} -				continue -			} - -			if *emoji.VisibleInPicker && !*emoji.Disabled { -				account.Emojis = append(account.Emojis, emoji) -				account.EmojiIDs = append(account.EmojiIDs, emoji.ID) -			} +		account.Note = formatted.HTML +		for _, emoji := range formatted.Emojis { +			account.Emojis = append(account.Emojis, emoji) +			account.EmojiIDs = append(account.EmojiIDs, emoji.ID)  		}  	} @@ -240,35 +227,3 @@ func (p *processor) UpdateHeader(ctx context.Context, header *multipart.FileHead  	return processingMedia.LoadAttachment(ctx)  } - -func (p *processor) processNote(ctx context.Context, note string, account *gtsmodel.Account) (string, error) { -	if note == "" { -		return "", nil -	} - -	tagStrings := util.DeriveHashtagsFromText(note) -	tags, err := p.db.TagStringsToTags(ctx, tagStrings, account.ID) -	if err != nil { -		return "", err -	} - -	mentionStrings := util.DeriveMentionNamesFromText(note) -	mentions := []*gtsmodel.Mention{} -	for _, mentionString := range mentionStrings { -		mention, err := p.parseMention(ctx, mentionString, account.ID, "") -		if err != nil { -			continue -		} -		mentions = append(mentions, mention) -	} - -	// TODO: support emojis in account notes -	// emojiStrings := util.DeriveEmojisFromText(note) -	// emojis, err := p.db.EmojiStringsToEmojis(ctx, emojiStrings) - -	if account.StatusFormat == "markdown" { -		return p.formatter.FromMarkdown(ctx, note, mentions, tags, nil), nil -	} - -	return p.formatter.FromPlain(ctx, note, mentions, tags), nil -} diff --git a/internal/processing/account/update_test.go b/internal/processing/account/update_test.go index e4b046075..8ebce7888 100644 --- a/internal/processing/account/update_test.go +++ b/internal/processing/account/update_test.go @@ -76,8 +76,8 @@ func (suite *AccountUpdateTestSuite) TestAccountUpdateWithMention() {  	var (  		locked       = true  		displayName  = "new display name" -		note         = "#hello here i am!\n\ngo check out @1happyturtle, they have a cool account!\n" -		noteExpected = "<p><a href=\"http://localhost:8080/tags/hello\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hello</span></a> here i am!<br/><br/>go check out <span class=\"h-card\"><a href=\"http://localhost:8080/@1happyturtle\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>1happyturtle</span></a></span>, they have a cool account!</p>" +		note         = "#hello here i am!\n\ngo check out @1happyturtle, they have a cool account!" +		noteExpected = "<p><a href=\"http://localhost:8080/tags/hello\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hello</span></a> here i am!<br><br>go check out <span class=\"h-card\"><a href=\"http://localhost:8080/@1happyturtle\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>1happyturtle</span></a></span>, they have a cool account!</p>"  	)  	form := &apimodel.UpdateCredentialsRequest{ diff --git a/internal/processing/status/create.go b/internal/processing/status/create.go index 1a6177ef4..9e9d24c84 100644 --- a/internal/processing/status/create.go +++ b/internal/processing/status/create.go @@ -76,18 +76,6 @@ func (p *processor) Create(ctx context.Context, account *gtsmodel.Account, appli  		return nil, gtserror.NewErrorInternalError(err)  	} -	if err := p.ProcessMentions(ctx, form, account.ID, newStatus); err != nil { -		return nil, gtserror.NewErrorInternalError(err) -	} - -	if err := p.ProcessTags(ctx, form, account.ID, newStatus); err != nil { -		return nil, gtserror.NewErrorInternalError(err) -	} - -	if err := p.ProcessEmojis(ctx, form, account.ID, newStatus); err != nil { -		return nil, gtserror.NewErrorInternalError(err) -	} -  	if err := p.ProcessContent(ctx, form, account.ID, newStatus); err != nil {  		return nil, gtserror.NewErrorInternalError(err)  	} diff --git a/internal/processing/status/status.go b/internal/processing/status/status.go index 379b0661f..56b8b23eb 100644 --- a/internal/processing/status/status.go +++ b/internal/processing/status/status.go @@ -67,9 +67,6 @@ type Processor interface {  	ProcessReplyToID(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, thisAccountID string, status *gtsmodel.Status) gtserror.WithCode  	ProcessMediaIDs(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, thisAccountID string, status *gtsmodel.Status) gtserror.WithCode  	ProcessLanguage(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountDefaultLanguage string, status *gtsmodel.Status) error -	ProcessMentions(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error -	ProcessTags(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error -	ProcessEmojis(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error  	ProcessContent(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error  } diff --git a/internal/processing/status/util.go b/internal/processing/status/util.go index 2847d1c42..1115219cd 100644 --- a/internal/processing/status/util.go +++ b/internal/processing/status/util.go @@ -28,8 +28,7 @@ import (  	"github.com/superseriousbusiness/gotosocial/internal/db"  	"github.com/superseriousbusiness/gotosocial/internal/gtserror"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" -	"github.com/superseriousbusiness/gotosocial/internal/log" -	"github.com/superseriousbusiness/gotosocial/internal/util" +	"github.com/superseriousbusiness/gotosocial/internal/text"  )  func (p *processor) ProcessVisibility(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountDefaultVis gtsmodel.Visibility, status *gtsmodel.Status) error { @@ -212,80 +211,6 @@ func (p *processor) ProcessLanguage(ctx context.Context, form *apimodel.Advanced  	return nil  } -func (p *processor) ProcessMentions(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { -	mentionedAccountNames := util.DeriveMentionNamesFromText(form.Status) -	mentions := []*gtsmodel.Mention{} -	mentionIDs := []string{} - -	for _, mentionedAccountName := range mentionedAccountNames { -		gtsMention, err := p.parseMention(ctx, mentionedAccountName, accountID, status.ID) -		if err != nil { -			log.Errorf("ProcessMentions: error parsing mention %s from status: %s", mentionedAccountName, err) -			continue -		} - -		if err := p.db.Put(ctx, gtsMention); err != nil { -			log.Errorf("ProcessMentions: error putting mention in db: %s", err) -		} - -		mentions = append(mentions, gtsMention) -		mentionIDs = append(mentionIDs, gtsMention.ID) -	} - -	// add full populated gts menchies to the status for passing them around conveniently -	status.Mentions = mentions -	// add just the ids of the mentioned accounts to the status for putting in the db -	status.MentionIDs = mentionIDs - -	return nil -} - -func (p *processor) ProcessTags(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { -	tags := []string{} -	gtsTags, err := p.db.TagStringsToTags(ctx, util.DeriveHashtagsFromText(form.Status), accountID) -	if err != nil { -		return fmt.Errorf("error generating hashtags from status: %s", err) -	} -	for _, tag := range gtsTags { -		if err := p.db.Put(ctx, tag); err != nil { -			if !errors.Is(err, db.ErrAlreadyExists) { -				return fmt.Errorf("error putting tags in db: %s", err) -			} -		} -		tags = append(tags, tag.ID) -	} -	// add full populated gts tags to the status for passing them around conveniently -	status.Tags = gtsTags -	// add just the ids of the used tags to the status for putting in the db -	status.TagIDs = tags -	return nil -} - -func (p *processor) ProcessEmojis(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error { -	// for each emoji shortcode in the text, check if it's an enabled -	// emoji on this instance, and if so, add it to the status -	emojiShortcodes := util.DeriveEmojisFromText(form.SpoilerText + "\n\n" + form.Status) -	status.Emojis = make([]*gtsmodel.Emoji, 0, len(emojiShortcodes)) -	status.EmojiIDs = make([]string, 0, len(emojiShortcodes)) - -	for _, shortcode := range emojiShortcodes { -		emoji, err := p.db.GetEmojiByShortcodeDomain(ctx, shortcode, "") -		if err != nil { -			if err != db.ErrNoEntries { -				log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err) -			} -			continue -		} - -		if *emoji.VisibleInPicker && !*emoji.Disabled { -			status.Emojis = append(status.Emojis, emoji) -			status.EmojiIDs = append(status.EmojiIDs, emoji.ID) -		} -	} - -	return nil -} -  func (p *processor) ProcessContent(ctx context.Context, form *apimodel.AdvancedStatusCreateForm, accountID string, status *gtsmodel.Status) error {  	// if there's nothing in the status at all we can just return early  	if form.Status == "" { @@ -311,16 +236,43 @@ func (p *processor) ProcessContent(ctx context.Context, form *apimodel.AdvancedS  	}  	// parse content out of the status depending on what format has been submitted -	var formatted string +	var f text.FormatFunc  	switch form.Format {  	case apimodel.StatusFormatPlain: -		formatted = p.formatter.FromPlain(ctx, form.Status, status.Mentions, status.Tags) +		f = p.formatter.FromPlain  	case apimodel.StatusFormatMarkdown: -		formatted = p.formatter.FromMarkdown(ctx, form.Status, status.Mentions, status.Tags, status.Emojis) +		f = p.formatter.FromMarkdown  	default:  		return fmt.Errorf("format %s not recognised as a valid status format", form.Format)  	} +	formatted := f(ctx, p.parseMention, accountID, status.ID, form.Status) + +	// add full populated gts {mentions, tags, emojis} to the status for passing them around conveniently +	// add just their ids to the status for putting in the db +	status.Mentions = formatted.Mentions +	status.MentionIDs = make([]string, 0, len(formatted.Mentions)) +	for _, gtsmention := range formatted.Mentions { +		status.MentionIDs = append(status.MentionIDs, gtsmention.ID) +	} + +	status.Tags = formatted.Tags +	status.TagIDs = make([]string, 0, len(formatted.Tags)) +	for _, gtstag := range formatted.Tags { +		status.TagIDs = append(status.TagIDs, gtstag.ID) +	} + +	status.Emojis = formatted.Emojis +	status.EmojiIDs = make([]string, 0, len(formatted.Emojis)) +	for _, gtsemoji := range formatted.Emojis { +		status.EmojiIDs = append(status.EmojiIDs, gtsemoji.ID) +	} + +	spoilerformatted := p.formatter.FromPlainEmojiOnly(ctx, p.parseMention, accountID, status.ID, form.SpoilerText) +	for _, gtsemoji := range spoilerformatted.Emojis { +		status.Emojis = append(status.Emojis, gtsemoji) +		status.EmojiIDs = append(status.EmojiIDs, gtsemoji.ID) +	} -	status.Content = formatted +	status.Content = formatted.HTML  	return nil  } diff --git a/internal/processing/status/util_test.go b/internal/processing/status/util_test.go index d4be4337b..acd823188 100644 --- a/internal/processing/status/util_test.go +++ b/internal/processing/status/util_test.go @@ -29,22 +29,23 @@ import (  )  const ( -	statusText1                = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" -	statusText1ExpectedFull    = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br/><br/>Text</p>" -	statusText1ExpectedPartial = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/>#Hashtag<br/><br/>Text</p>" -	statusText2                = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\n#hashTAG" -	status2TextExpectedFull    = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashTAG</span></a></p>" -	status2TextExpectedPartial = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/>#Hashtag<br/><br/>#hashTAG</p>" +	statusText1         = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" +	statusText1Expected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text</p>" +	statusText2         = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\n#hashTAG" +	status2TextExpected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashTAG</span></a></p>"  )  type UtilTestSuite struct {  	StatusStandardTestSuite  } -func (suite *UtilTestSuite) TestProcessMentions1() { +func (suite *UtilTestSuite) TestProcessContent1() { +	/* +		TEST PREPARATION +	*/ +	// we need to partially process the status first since processContent expects a status with some stuff already set on it  	creatingAccount := suite.testAccounts["local_account_1"]  	mentionedAccount := suite.testAccounts["remote_account_1"] -  	form := &apimodel.AdvancedStatusCreateForm{  		StatusCreateRequest: apimodel.StatusCreateRequest{  			Status:      statusText1, @@ -70,8 +71,13 @@ func (suite *UtilTestSuite) TestProcessMentions1() {  		ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ",  	} -	err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status) +	/* +		ACTUAL TEST +	*/ + +	err := suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status)  	suite.NoError(err) +	suite.Equal(statusText1Expected, status.Content)  	suite.Len(status.Mentions, 1)  	newMention := status.Mentions[0] @@ -88,63 +94,16 @@ func (suite *UtilTestSuite) TestProcessMentions1() {  	suite.Equal(newMention.ID, status.MentionIDs[0])  } -func (suite *UtilTestSuite) TestProcessContentFull1() { -	/* -		TEST PREPARATION -	*/ -	// we need to partially process the status first since processContent expects a status with some stuff already set on it -	creatingAccount := suite.testAccounts["local_account_1"] -	form := &apimodel.AdvancedStatusCreateForm{ -		StatusCreateRequest: apimodel.StatusCreateRequest{ -			Status:      statusText1, -			MediaIDs:    []string{}, -			Poll:        nil, -			InReplyToID: "", -			Sensitive:   false, -			SpoilerText: "", -			Visibility:  apimodel.VisibilityPublic, -			ScheduledAt: "", -			Language:    "en", -			Format:      apimodel.StatusFormatPlain, -		}, -		AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{ -			Federated: nil, -			Boostable: nil, -			Replyable: nil, -			Likeable:  nil, -		}, -	} - -	status := >smodel.Status{ -		ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ", -	} - -	err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status) -	suite.NoError(err) -	suite.Empty(status.Content) // shouldn't be set yet - -	err = suite.status.ProcessTags(context.Background(), form, creatingAccount.ID, status) -	suite.NoError(err) -	suite.Empty(status.Content) // shouldn't be set yet - -	/* -		ACTUAL TEST -	*/ - -	err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status) -	suite.NoError(err) -	suite.Equal(statusText1ExpectedFull, status.Content) -} - -func (suite *UtilTestSuite) TestProcessContentPartial1() { +func (suite *UtilTestSuite) TestProcessContent2() {  	/*  		TEST PREPARATION  	*/  	// we need to partially process the status first since processContent expects a status with some stuff already set on it  	creatingAccount := suite.testAccounts["local_account_1"] +	mentionedAccount := suite.testAccounts["remote_account_1"]  	form := &apimodel.AdvancedStatusCreateForm{  		StatusCreateRequest: apimodel.StatusCreateRequest{ -			Status:      statusText1, +			Status:      statusText2,  			MediaIDs:    []string{},  			Poll:        nil,  			InReplyToID: "", @@ -167,50 +126,14 @@ func (suite *UtilTestSuite) TestProcessContentPartial1() {  		ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ",  	} -	err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status) -	suite.NoError(err) -	suite.Empty(status.Content) // shouldn't be set yet -  	/*  		ACTUAL TEST  	*/ -	err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status) +	err := suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status)  	suite.NoError(err) -	suite.Equal(statusText1ExpectedPartial, status.Content) -} -func (suite *UtilTestSuite) TestProcessMentions2() { -	creatingAccount := suite.testAccounts["local_account_1"] -	mentionedAccount := suite.testAccounts["remote_account_1"] - -	form := &apimodel.AdvancedStatusCreateForm{ -		StatusCreateRequest: apimodel.StatusCreateRequest{ -			Status:      statusText2, -			MediaIDs:    []string{}, -			Poll:        nil, -			InReplyToID: "", -			Sensitive:   false, -			SpoilerText: "", -			Visibility:  apimodel.VisibilityPublic, -			ScheduledAt: "", -			Language:    "en", -			Format:      apimodel.StatusFormatPlain, -		}, -		AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{ -			Federated: nil, -			Boostable: nil, -			Replyable: nil, -			Likeable:  nil, -		}, -	} - -	status := >smodel.Status{ -		ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ", -	} - -	err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status) -	suite.NoError(err) +	suite.Equal(status2TextExpected, status.Content)  	suite.Len(status.Mentions, 1)  	newMention := status.Mentions[0] @@ -227,96 +150,6 @@ func (suite *UtilTestSuite) TestProcessMentions2() {  	suite.Equal(newMention.ID, status.MentionIDs[0])  } -func (suite *UtilTestSuite) TestProcessContentFull2() { -	/* -		TEST PREPARATION -	*/ -	// we need to partially process the status first since processContent expects a status with some stuff already set on it -	creatingAccount := suite.testAccounts["local_account_1"] -	form := &apimodel.AdvancedStatusCreateForm{ -		StatusCreateRequest: apimodel.StatusCreateRequest{ -			Status:      statusText2, -			MediaIDs:    []string{}, -			Poll:        nil, -			InReplyToID: "", -			Sensitive:   false, -			SpoilerText: "", -			Visibility:  apimodel.VisibilityPublic, -			ScheduledAt: "", -			Language:    "en", -			Format:      apimodel.StatusFormatPlain, -		}, -		AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{ -			Federated: nil, -			Boostable: nil, -			Replyable: nil, -			Likeable:  nil, -		}, -	} - -	status := >smodel.Status{ -		ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ", -	} - -	err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status) -	suite.NoError(err) -	suite.Empty(status.Content) // shouldn't be set yet - -	err = suite.status.ProcessTags(context.Background(), form, creatingAccount.ID, status) -	suite.NoError(err) -	suite.Empty(status.Content) // shouldn't be set yet - -	/* -		ACTUAL TEST -	*/ - -	err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status) -	suite.NoError(err) - -	suite.Equal(status2TextExpectedFull, status.Content) -} - -func (suite *UtilTestSuite) TestProcessContentPartial2() { -	/* -		TEST PREPARATION -	*/ -	// we need to partially process the status first since processContent expects a status with some stuff already set on it -	creatingAccount := suite.testAccounts["local_account_1"] -	form := &apimodel.AdvancedStatusCreateForm{ -		StatusCreateRequest: apimodel.StatusCreateRequest{ -			Status:      statusText2, -			MediaIDs:    []string{}, -			Poll:        nil, -			InReplyToID: "", -			Sensitive:   false, -			SpoilerText: "", -			Visibility:  apimodel.VisibilityPublic, -			ScheduledAt: "", -			Language:    "en", -			Format:      apimodel.StatusFormatPlain, -		}, -		AdvancedVisibilityFlagsForm: apimodel.AdvancedVisibilityFlagsForm{ -			Federated: nil, -			Boostable: nil, -			Replyable: nil, -			Likeable:  nil, -		}, -	} - -	status := >smodel.Status{ -		ID: "01FCTDD78JJMX3K9KPXQ7ZQ8BJ", -	} - -	err := suite.status.ProcessMentions(context.Background(), form, creatingAccount.ID, status) -	suite.NoError(err) -	suite.Empty(status.Content) - -	err = suite.status.ProcessContent(context.Background(), form, creatingAccount.ID, status) -	suite.NoError(err) - -	suite.Equal(status2TextExpectedPartial, status.Content) -} -  func TestUtilTestSuite(t *testing.T) {  	suite.Run(t, new(UtilTestSuite))  } diff --git a/internal/text/common.go b/internal/text/common.go deleted file mode 100644 index 2293ca3fe..000000000 --- a/internal/text/common.go +++ /dev/null @@ -1,112 +0,0 @@ -/* -   GoToSocial -   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - -   This program is free software: you can redistribute it and/or modify -   it under the terms of the GNU Affero General Public License as published by -   the Free Software Foundation, either version 3 of the License, or -   (at your option) any later version. - -   This program is distributed in the hope that it will be useful, -   but WITHOUT ANY WARRANTY; without even the implied warranty of -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -   GNU Affero General Public License for more details. - -   You should have received a copy of the GNU Affero General Public License -   along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -package text - -import ( -	"bytes" -	"context" -	"strings" -	"unicode" - -	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" -	"github.com/superseriousbusiness/gotosocial/internal/log" -	"github.com/superseriousbusiness/gotosocial/internal/regexes" -	"github.com/superseriousbusiness/gotosocial/internal/util" -) - -func (f *formatter) ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string { -	spans := util.FindHashtagSpansInText(in) - -	if len(spans) == 0 { -		return in -	} - -	var b strings.Builder -	i := 0 - -spans: -	for _, t := range spans { -		b.WriteString(in[i:t.First]) -		i = t.Second -		tagAsEntered := in[t.First+1 : t.Second] - -		for _, tag := range tags { -			if strings.EqualFold(tagAsEntered, tag.Name) { -				// replace the #tag with the formatted tag content -				// `<a href="tag.URL" class="mention hashtag" rel="tag">#<span>tagAsEntered</span></a> -				b.WriteString(`<a href="`) -				b.WriteString(tag.URL) -				b.WriteString(`" class="mention hashtag" rel="tag">#<span>`) -				b.WriteString(tagAsEntered) -				b.WriteString(`</span></a>`) -				continue spans -			} -		} - -		b.WriteString(in[t.First:t.Second]) -	} - -	// Get the last bits. -	i = spans[len(spans)-1].Second -	b.WriteString(in[i:]) - -	return b.String() -} - -func (f *formatter) ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string { -	return regexes.ReplaceAllStringFunc(regexes.MentionFinder, in, func(match string, buf *bytes.Buffer) string { -		// we have a match, trim any spaces -		matchTrimmed := strings.TrimSpace(match) - -		// check through mentions to find what we're matching -		for _, menchie := range mentions { -			if strings.EqualFold(matchTrimmed, menchie.NameString) { -				// make sure we have an account attached to this mention -				if menchie.TargetAccount == nil { -					a, err := f.db.GetAccountByID(ctx, menchie.TargetAccountID) -					if err != nil { -						log.Errorf("error getting account with id %s from the db: %s", menchie.TargetAccountID, err) -						return match -					} -					menchie.TargetAccount = a -				} - -				// The mention's target is our target -				targetAccount := menchie.TargetAccount - -				// Add any dropped space from match -				if unicode.IsSpace(rune(match[0])) { -					buf.WriteByte(match[0]) -				} - -				// replace the mention with the formatted mention content -				// <span class="h-card"><a href="targetAccount.URL" class="u-url mention">@<span>targetAccount.Username</span></a></span> -				buf.WriteString(`<span class="h-card"><a href="`) -				buf.WriteString(targetAccount.URL) -				buf.WriteString(`" class="u-url mention">@<span>`) -				buf.WriteString(targetAccount.Username) -				buf.WriteString(`</span></a></span>`) -				return buf.String() -			} -		} - -		// the match wasn't in the list of mentions for whatever reason, so just return the match as we found it so nothing changes -		return match -	}) -} diff --git a/internal/text/common_test.go b/internal/text/common_test.go deleted file mode 100644 index 3949226ca..000000000 --- a/internal/text/common_test.go +++ /dev/null @@ -1,106 +0,0 @@ -/* -   GoToSocial -   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - -   This program is free software: you can redistribute it and/or modify -   it under the terms of the GNU Affero General Public License as published by -   the Free Software Foundation, either version 3 of the License, or -   (at your option) any later version. - -   This program is distributed in the hope that it will be useful, -   but WITHOUT ANY WARRANTY; without even the implied warranty of -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -   GNU Affero General Public License for more details. - -   You should have received a copy of the GNU Affero General Public License -   along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -package text_test - -import ( -	"context" -	"testing" -	"time" - -	"github.com/stretchr/testify/suite" -	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" -) - -const ( -	replaceMentionsString                 = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" -	replaceMentionsExpected               = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\n#Hashtag\n\nText" -	replaceHashtagsExpected               = "Another test @foss_satan@fossbros-anonymous.io\n\n<a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>Hashtag</span></a>\n\nText" -	replaceHashtagsAfterMentionsExpected  = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\n<a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>Hashtag</span></a>\n\nText" -	replaceMentionsWithLinkString         = "Another test @foss_satan@fossbros-anonymous.io\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060" -	replaceMentionsWithLinkStringExpected = "Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\">@<span>foss_satan</span></a></span>\n\nhttp://fossbros-anonymous.io/@foss_satan/statuses/6675ee73-fccc-4562-a46a-3e8cd9798060" -	replaceMentionsWithLinkSelfString     = "Mentioning myself: @the_mighty_zork\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR" -	replaceMemtionsWithLinkSelfExpected   = "Mentioning myself: <span class=\"h-card\"><a href=\"http://localhost:8080/@the_mighty_zork\" class=\"u-url mention\">@<span>the_mighty_zork</span></a></span>\n\nand linking to my own status: https://localhost:8080/@the_mighty_zork/statuses/01FGXKJRX2PMERJQ9EQF8Y6HCR" -) - -type CommonTestSuite struct { -	TextStandardTestSuite -} - -func (suite *CommonTestSuite) TestReplaceMentions() { -	foundMentions := []*gtsmodel.Mention{ -		suite.testMentions["zork_mention_foss_satan"], -	} - -	f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsString, foundMentions) -	suite.Equal(replaceMentionsExpected, f) -} - -func (suite *CommonTestSuite) TestReplaceHashtags() { -	foundTags := []*gtsmodel.Tag{ -		suite.testTags["Hashtag"], -	} - -	f := suite.formatter.ReplaceTags(context.Background(), replaceMentionsString, foundTags) - -	suite.Equal(replaceHashtagsExpected, f) -} - -func (suite *CommonTestSuite) TestReplaceHashtagsAfterReplaceMentions() { -	foundTags := []*gtsmodel.Tag{ -		suite.testTags["Hashtag"], -	} - -	f := suite.formatter.ReplaceTags(context.Background(), replaceMentionsExpected, foundTags) - -	suite.Equal(replaceHashtagsAfterMentionsExpected, f) -} - -func (suite *CommonTestSuite) TestReplaceMentionsWithLink() { -	foundMentions := []*gtsmodel.Mention{ -		suite.testMentions["zork_mention_foss_satan"], -	} - -	f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsWithLinkString, foundMentions) -	suite.Equal(replaceMentionsWithLinkStringExpected, f) -} - -func (suite *CommonTestSuite) TestReplaceMentionsWithLinkSelf() { -	mentioningAccount := suite.testAccounts["local_account_1"] - -	foundMentions := []*gtsmodel.Mention{ -		{ -			ID:               "01FGXKN5F815DVFVD53PN9NYM6", -			CreatedAt:        time.Now(), -			UpdatedAt:        time.Now(), -			StatusID:         "01FGXKP0S5THQXFC1D9R141DDR", -			OriginAccountID:  mentioningAccount.ID, -			TargetAccountID:  mentioningAccount.ID, -			NameString:       "@the_mighty_zork", -			TargetAccountURI: mentioningAccount.URI, -			TargetAccountURL: mentioningAccount.URL, -		}, -	} - -	f := suite.formatter.ReplaceMentions(context.Background(), replaceMentionsWithLinkSelfString, foundMentions) -	suite.Equal(replaceMemtionsWithLinkSelfExpected, f) -} - -func TestCommonTestSuite(t *testing.T) { -	suite.Run(t, new(CommonTestSuite)) -} diff --git a/internal/text/emojionly.go b/internal/text/emojionly.go new file mode 100644 index 000000000..1a3c0e968 --- /dev/null +++ b/internal/text/emojionly.go @@ -0,0 +1,71 @@ +/* +   GoToSocial +   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + +   This program is free software: you can redistribute it and/or modify +   it under the terms of the GNU Affero General Public License as published by +   the Free Software Foundation, either version 3 of the License, or +   (at your option) any later version. + +   This program is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +   GNU Affero General Public License for more details. + +   You should have received a copy of the GNU Affero General Public License +   along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( +	"bytes" +	"context" + +	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/log" +	"github.com/yuin/goldmark" +	"github.com/yuin/goldmark/parser" +	"github.com/yuin/goldmark/renderer/html" +	"github.com/yuin/goldmark/util" +) + +func (f *formatter) FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult { +	result := &FormatResult{ +		Mentions: []*gtsmodel.Mention{}, +		Tags:     []*gtsmodel.Tag{}, +		Emojis:   []*gtsmodel.Emoji{}, +	} +	// parse markdown text into html, using custom renderer to add hashtag/mention links +	md := goldmark.New( +		goldmark.WithRendererOptions( +			html.WithXHTML(), +			html.WithHardWraps(), +		), +		goldmark.WithParser( +			parser.NewParser( +				parser.WithBlockParsers( +					util.Prioritized(newPlaintextParser(), 500), +				), +			), +		), +		goldmark.WithExtensions( +			&customRenderer{f, ctx, pmf, authorID, statusID, true, result}, +		), +	) + +	var htmlContentBytes bytes.Buffer +	err := md.Convert([]byte(plain), &htmlContentBytes) +	if err != nil { +		log.Errorf("error formatting plaintext to HTML: %s", err) +	} +	result.HTML = htmlContentBytes.String() + +	// clean anything dangerous out of the HTML +	result.HTML = SanitizeHTML(result.HTML) + +	// shrink ray +	result.HTML = minifyHTML(result.HTML) + +	return result +} diff --git a/internal/text/formatter.go b/internal/text/formatter.go index cb4de402b..bdad6c0f8 100644 --- a/internal/text/formatter.go +++ b/internal/text/formatter.go @@ -26,20 +26,19 @@ import (  )  // Formatter wraps some logic and functions for parsing statuses and other text input into nice html. +// Each of the member functions returns a struct containing the formatted HTML and any tags, mentions, and +// emoji that were found in the text.  type Formatter interface {  	// FromPlain parses an HTML text from a plaintext. -	FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string +	FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult  	// FromMarkdown parses an HTML text from a markdown-formatted text. -	FromMarkdown(ctx context.Context, md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string - -	// ReplaceTags takes a piece of text and a slice of tags, and returns the same text with the tags nicely formatted as hrefs. -	ReplaceTags(ctx context.Context, in string, tags []*gtsmodel.Tag) string -	// ReplaceMentions takes a piece of text and a slice of mentions, and returns the same text with the mentions nicely formatted as hrefs. -	ReplaceMentions(ctx context.Context, in string, mentions []*gtsmodel.Mention) string -	// ReplaceLinks takes a piece of text, finds all recognizable links in that text, and replaces them with hrefs. -	ReplaceLinks(ctx context.Context, in string) string +	FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, md string) *FormatResult +	// FromPlainEmojiOnly parses an HTML text from a plaintext, only parsing emojis and not mentions etc. +	FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult  } +type FormatFunc func(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, text string) *FormatResult +  type formatter struct {  	db db.DB  } @@ -50,3 +49,10 @@ func NewFormatter(db db.DB) Formatter {  		db: db,  	}  } + +type FormatResult struct { +	HTML     string +	Mentions []*gtsmodel.Mention +	Tags     []*gtsmodel.Tag +	Emojis   []*gtsmodel.Emoji +} diff --git a/internal/text/formatter_test.go b/internal/text/formatter_test.go index 438a69c78..32ae74488 100644 --- a/internal/text/formatter_test.go +++ b/internal/text/formatter_test.go @@ -19,9 +19,13 @@  package text_test  import ( +	"context"  	"github.com/stretchr/testify/suite" +	"github.com/superseriousbusiness/gotosocial/internal/concurrency"  	"github.com/superseriousbusiness/gotosocial/internal/db"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/messages" +	"github.com/superseriousbusiness/gotosocial/internal/processing"  	"github.com/superseriousbusiness/gotosocial/internal/text"  	"github.com/superseriousbusiness/gotosocial/testrig"  ) @@ -29,7 +33,8 @@ import (  type TextStandardTestSuite struct {  	// standard suite interfaces  	suite.Suite -	db db.DB +	db           db.DB +	parseMention gtsmodel.ParseMentionFunc  	// standard suite models  	testTokens       map[string]*gtsmodel.Token @@ -41,6 +46,7 @@ type TextStandardTestSuite struct {  	testStatuses     map[string]*gtsmodel.Status  	testTags         map[string]*gtsmodel.Tag  	testMentions     map[string]*gtsmodel.Mention +	testEmojis       map[string]*gtsmodel.Emoji  	// module being tested  	formatter text.Formatter @@ -56,6 +62,7 @@ func (suite *TextStandardTestSuite) SetupSuite() {  	suite.testStatuses = testrig.NewTestStatuses()  	suite.testTags = testrig.NewTestTags()  	suite.testMentions = testrig.NewTestMentions() +	suite.testEmojis = testrig.NewTestEmojis()  }  func (suite *TextStandardTestSuite) SetupTest() { @@ -63,6 +70,11 @@ func (suite *TextStandardTestSuite) SetupTest() {  	testrig.InitTestConfig()  	suite.db = testrig.NewTestDB() + +	fedWorker := concurrency.NewWorkerPool[messages.FromFederator](-1, -1) +	federator := testrig.NewTestFederator(suite.db, testrig.NewTestTransportController(testrig.NewMockHTTPClient(nil, "../../testrig/media"), suite.db, fedWorker), nil, nil, fedWorker) +	suite.parseMention = processing.GetParseMentionFunc(suite.db, federator) +  	suite.formatter = text.NewFormatter(suite.db)  	testrig.StandardDBSetup(suite.db, nil) @@ -71,3 +83,11 @@ func (suite *TextStandardTestSuite) SetupTest() {  func (suite *TextStandardTestSuite) TearDownTest() {  	testrig.StandardDBTeardown(suite.db)  } + +func (suite *TextStandardTestSuite) FromMarkdown(text string) *text.FormatResult { +	return suite.formatter.FromMarkdown(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text) +} + +func (suite *TextStandardTestSuite) FromPlain(text string) *text.FormatResult { +	return suite.formatter.FromPlain(context.Background(), suite.parseMention, suite.testAccounts["local_account_1"].ID, "status_ID", text) +} diff --git a/internal/text/markdownextension.go b/internal/text/goldmark_extension.go index 2d8eae907..11e4fde28 100644 --- a/internal/text/markdownextension.go +++ b/internal/text/goldmark_extension.go @@ -17,8 +17,10 @@ package text  import (  	"context" -	"unicode" +	"fmt" +	"strings" +	"github.com/superseriousbusiness/gotosocial/internal/db"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/log"  	"github.com/superseriousbusiness/gotosocial/internal/regexes" @@ -46,8 +48,14 @@ type hashtag struct {  	Segment text.Segment  } +type emoji struct { +	ast.BaseInline +	Segment text.Segment +} +  var kindMention = ast.NewNodeKind("Mention")  var kindHashtag = ast.NewNodeKind("Hashtag") +var kindEmoji = ast.NewNodeKind("Emoji")  func (n *mention) Kind() ast.NodeKind {  	return kindMention @@ -57,14 +65,21 @@ func (n *hashtag) Kind() ast.NodeKind {  	return kindHashtag  } -// Dump is used by goldmark for debugging. It is implemented only minimally because -// it is not used in our code. +func (n *emoji) Kind() ast.NodeKind { +	return kindEmoji +} + +// Dump can be used for debugging.  func (n *mention) Dump(source []byte, level int) { -	ast.DumpHelper(n, source, level, nil, nil) +	fmt.Printf("%sMention: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))  }  func (n *hashtag) Dump(source []byte, level int) { -	ast.DumpHelper(n, source, level, nil, nil) +	fmt.Printf("%sHashtag: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source))) +} + +func (n *emoji) Dump(source []byte, level int) { +	fmt.Printf("%sEmoji: %s\n", strings.Repeat("    ", level), string(n.Segment.Value(source)))  }  // newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment. @@ -83,6 +98,13 @@ func newHashtag(s text.Segment) *hashtag {  	}  } +func newEmoji(s text.Segment) *emoji { +	return &emoji{ +		BaseInline: ast.BaseInline{}, +		Segment:    s, +	} +} +  // mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.  type mentionParser struct {  } @@ -90,6 +112,9 @@ type mentionParser struct {  type hashtagParser struct {  } +type emojiParser struct { +} +  func (p *mentionParser) Trigger() []byte {  	return []byte{'@'}  } @@ -98,11 +123,15 @@ func (p *hashtagParser) Trigger() []byte {  	return []byte{'#'}  } +func (p *emojiParser) Trigger() []byte { +	return []byte{':'} +} +  func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {  	before := block.PrecendingCharacter()  	line, segment := block.PeekLine() -	if !unicode.IsSpace(before) { +	if !util.IsMentionOrHashtagBoundary(before) {  		return nil  	} @@ -124,59 +153,88 @@ func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont  	line, segment := block.PeekLine()  	s := string(line) -	if !util.IsHashtagBoundary(before) { +	if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 {  		return nil  	}  	for i, r := range s {  		switch {  		case r == '#' && i == 0: +			// ignore initial #  			continue -		case !util.IsPermittedInHashtag(r) && !util.IsHashtagBoundary(r): +		case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r):  			// Fake hashtag, don't trust it  			return nil -		case util.IsHashtagBoundary(r): +		case util.IsMentionOrHashtagBoundary(r): +			if i <= 1 { +				// empty +				return nil +			}  			// End of hashtag  			block.Advance(i)  			return newHashtag(segment.WithStop(segment.Start + i))  		}  	} -	// If we don't find invalid characters before the end of the line then it's good -	block.Advance(len(s)) +	// If we don't find invalid characters before the end of the line then it's all hashtag, babey +	block.Advance(segment.Len())  	return newHashtag(segment)  } +func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { +	line, segment := block.PeekLine() + +	// unideal for performance but makes use of existing regex +	loc := regexes.EmojiFinder.FindIndex(line) +	switch { +	case loc == nil: +		fallthrough +	case loc[0] != 0: // fail if not found at start +		return nil +	default: +		block.Advance(loc[1]) +		return newEmoji(segment.WithStop(segment.Start + loc[1])) +	} +} +  // customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces. -// It is created in FromMarkdown to be used a goldmark extension, and the fields are used -// when rendering mentions and tags. +// It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the +// fields are used to report tags and mentions to the caller for use as metadata.  type customRenderer struct { -	f        *formatter -	ctx      context.Context -	mentions []*gtsmodel.Mention -	tags     []*gtsmodel.Tag +	f            *formatter +	ctx          context.Context +	parseMention gtsmodel.ParseMentionFunc +	accountID    string +	statusID     string +	emojiOnly    bool +	result       *FormatResult  }  func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {  	reg.Register(kindMention, r.renderMention)  	reg.Register(kindHashtag, r.renderHashtag) +	reg.Register(kindEmoji, r.renderEmoji)  }  func (r *customRenderer) Extend(m goldmark.Markdown) { +	// 1000 is set as the lowest priority, but it's arbitrary  	m.Parser().AddOptions(parser.WithInlineParsers( -		// 500 is pretty arbitrary here, it was copied from example goldmark extension code. -		// https://github.com/yuin/goldmark/blob/75d8cce5b78c7e1d5d9c4ca32c1164f0a1e57b53/extension/strikethrough.go#L111 -		mdutil.Prioritized(&mentionParser{}, 500), -		mdutil.Prioritized(&hashtagParser{}, 500), +		mdutil.Prioritized(&emojiParser{}, 1000),  	)) +	if !r.emojiOnly { +		m.Parser().AddOptions(parser.WithInlineParsers( +			mdutil.Prioritized(&mentionParser{}, 1000), +			mdutil.Prioritized(&hashtagParser{}, 1000), +		)) +	}  	m.Renderer().AddOptions(renderer.WithNodeRenderers( -		mdutil.Prioritized(r, 500), +		mdutil.Prioritized(r, 1000),  	))  }  // renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.  func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {  	if !entering { -		return ast.WalkContinue, nil +		return ast.WalkSkipChildren, nil  	}  	n, ok := node.(*mention) // this function is only registered for kindMention @@ -185,18 +243,18 @@ func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node a  	}  	text := string(n.Segment.Value(source)) -	html := r.f.ReplaceMentions(r.ctx, text, r.mentions) +	html := r.replaceMention(text)  	// we don't have much recourse if this fails  	if _, err := w.WriteString(html); err != nil { -		log.Errorf("error outputting markdown text: %s", err) +		log.Errorf("error writing HTML: %s", err)  	} -	return ast.WalkContinue, nil +	return ast.WalkSkipChildren, nil  }  func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {  	if !entering { -		return ast.WalkContinue, nil +		return ast.WalkSkipChildren, nil  	}  	n, ok := node.(*hashtag) // this function is only registered for kindHashtag @@ -205,11 +263,50 @@ func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node a  	}  	text := string(n.Segment.Value(source)) -	html := r.f.ReplaceTags(r.ctx, text, r.tags) +	html := r.replaceHashtag(text) +	_, err := w.WriteString(html)  	// we don't have much recourse if this fails -	if _, err := w.WriteString(html); err != nil { -		log.Errorf("error outputting markdown text: %s", err) +	if err != nil { +		log.Errorf("error writing HTML: %s", err) +	} +	return ast.WalkSkipChildren, nil +} + +// renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata. +func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { +	if !entering { +		return ast.WalkSkipChildren, nil +	} + +	n, ok := node.(*emoji) // this function is only registered for kindEmoji +	if !ok { +		log.Errorf("type assertion failed") +	} +	text := string(n.Segment.Value(source)) +	shortcode := text[1 : len(text)-1] + +	emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "") +	if err != nil { +		if err != db.ErrNoEntries { +			log.Errorf("error getting local emoji with shortcode %s: %s", shortcode, err) +		} +	} else if *emoji.VisibleInPicker && !*emoji.Disabled { +		listed := false +		for _, e := range r.result.Emojis { +			if e.Shortcode == emoji.Shortcode { +				listed = true +				break +			} +		} +		if !listed { +			r.result.Emojis = append(r.result.Emojis, emoji) +		} +	} + +	// we don't have much recourse if this fails +	if _, err := w.WriteString(text); err != nil { +		log.Errorf("error writing HTML: %s", err)  	} -	return ast.WalkContinue, nil +	return ast.WalkSkipChildren, nil  } diff --git a/internal/text/goldmark_plaintext.go b/internal/text/goldmark_plaintext.go new file mode 100644 index 000000000..84916b1d1 --- /dev/null +++ b/internal/text/goldmark_plaintext.go @@ -0,0 +1,64 @@ +/* +   GoToSocial +   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + +   This program is free software: you can redistribute it and/or modify +   it under the terms of the GNU Affero General Public License as published by +   the Free Software Foundation, either version 3 of the License, or +   (at your option) any later version. + +   This program is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +   GNU Affero General Public License for more details. + +   You should have received a copy of the GNU Affero General Public License +   along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( +	"github.com/yuin/goldmark/ast" +	"github.com/yuin/goldmark/parser" +	"github.com/yuin/goldmark/text" +) + +// plaintextParser implements goldmark.parser.BlockParser +type plaintextParser struct { +} + +var defaultPlaintextParser = &plaintextParser{} + +func newPlaintextParser() parser.BlockParser { +	return defaultPlaintextParser +} + +func (b *plaintextParser) Trigger() []byte { +	return nil +} + +func (b *plaintextParser) Open(parent ast.Node, reader text.Reader, pc parser.Context) (ast.Node, parser.State) { +	_, segment := reader.PeekLine() +	node := ast.NewParagraph() +	node.Lines().Append(segment) +	reader.Advance(segment.Len() - 1) +	return node, parser.NoChildren +} + +func (b *plaintextParser) Continue(node ast.Node, reader text.Reader, pc parser.Context) parser.State { +	_, segment := reader.PeekLine() +	node.Lines().Append(segment) +	reader.Advance(segment.Len() - 1) +	return parser.Continue | parser.NoChildren +} + +func (b *plaintextParser) Close(node ast.Node, reader text.Reader, pc parser.Context) {} + +func (b *plaintextParser) CanInterruptParagraph() bool { +	return false +} + +func (b *plaintextParser) CanAcceptIndentedLine() bool { +	return true +} diff --git a/internal/text/link.go b/internal/text/link.go deleted file mode 100644 index 2b2b45e73..000000000 --- a/internal/text/link.go +++ /dev/null @@ -1,86 +0,0 @@ -/* -   GoToSocial -   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - -   This program is free software: you can redistribute it and/or modify -   it under the terms of the GNU Affero General Public License as published by -   the Free Software Foundation, either version 3 of the License, or -   (at your option) any later version. - -   This program is distributed in the hope that it will be useful, -   but WITHOUT ANY WARRANTY; without even the implied warranty of -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -   GNU Affero General Public License for more details. - -   You should have received a copy of the GNU Affero General Public License -   along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -package text - -import ( -	"bytes" -	"context" -	"net/url" -	"strings" - -	"github.com/superseriousbusiness/gotosocial/internal/regexes" -) - -// FindLinks parses the given string looking for recognizable URLs (including scheme). -// It returns a list of those URLs, without changing the string, or an error if something goes wrong. -// If no URLs are found within the given string, an empty slice and nil will be returned. -func FindLinks(in string) []*url.URL { -	var urls []*url.URL - -	// bail already if we don't find anything -	found := regexes.LinkScheme.FindAllString(in, -1) -	if len(found) == 0 { -		return nil -	} - -	urlmap := map[string]struct{}{} - -	// for each string we find, we want to parse it into a URL if we can -	// if we fail to parse it, just ignore this match and continue -	for _, f := range found { -		u, err := url.Parse(f) -		if err != nil { -			continue -		} - -		// Calculate string -		ustr := u.String() - -		if _, ok := urlmap[ustr]; !ok { -			// Has not been encountered yet -			urls = append(urls, u) -			urlmap[ustr] = struct{}{} -		} -	} - -	return urls -} - -// ReplaceLinks replaces all detected links in a piece of text with their HTML (href) equivalents. -// Note: because Go doesn't allow negative lookbehinds in regex, it's possible that an already-formatted -// href will end up double-formatted, if the text you pass here contains one or more hrefs already. -// To avoid this, you should sanitize any HTML out of text before you pass it into this function. -func (f *formatter) ReplaceLinks(ctx context.Context, in string) string { -	return regexes.ReplaceAllStringFunc(regexes.LinkScheme, in, func(urlString string, buf *bytes.Buffer) string { -		thisURL, err := url.Parse(urlString) -		if err != nil { -			return urlString // we can't parse it as a URL so don't replace it -		} -		// <a href="thisURL.String()" rel="noopener">urlString</a> -		urlString = thisURL.String() -		buf.WriteString(`<a href="`) -		buf.WriteString(thisURL.String()) -		buf.WriteString(`" rel="noopener">`) -		urlString = strings.TrimPrefix(urlString, thisURL.Scheme) -		urlString = strings.TrimPrefix(urlString, "://") -		buf.WriteString(urlString) -		buf.WriteString(`</a>`) -		return buf.String() -	}) -} diff --git a/internal/text/link_test.go b/internal/text/link_test.go deleted file mode 100644 index dfb4656b8..000000000 --- a/internal/text/link_test.go +++ /dev/null @@ -1,157 +0,0 @@ -/* -   GoToSocial -   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - -   This program is free software: you can redistribute it and/or modify -   it under the terms of the GNU Affero General Public License as published by -   the Free Software Foundation, either version 3 of the License, or -   (at your option) any later version. - -   This program is distributed in the hope that it will be useful, -   but WITHOUT ANY WARRANTY; without even the implied warranty of -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -   GNU Affero General Public License for more details. - -   You should have received a copy of the GNU Affero General Public License -   along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -package text_test - -import ( -	"context" -	"testing" - -	"github.com/stretchr/testify/assert" -	"github.com/stretchr/testify/suite" -	"github.com/superseriousbusiness/gotosocial/internal/text" -) - -const text1 = ` -This is a text with some links in it. Here's link number one: https://example.org/link/to/something#fragment - -Here's link number two: http://test.example.org?q=bahhhhhhhhhhhh - -https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it - -really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme - -https://example.orghttps://google.com <-- this shouldn't work either, but it does?! OK -` - -const text2 = ` -this is one link: https://example.org - -this is the same link again: https://example.org - -these should be deduplicated -` - -const text3 = ` -here's a mailto link: mailto:whatever@test.org -` - -const text4 = ` -two similar links: - -https://example.org - -https://example.org/test -` - -const text5 = ` -what happens when we already have a link within an href? - -<a href="https://example.org">https://example.org</a> -` - -type LinkTestSuite struct { -	TextStandardTestSuite -} - -func (suite *LinkTestSuite) TestParseSimple() { -	f := suite.formatter.FromPlain(context.Background(), simple, nil, nil) -	suite.Equal(simpleExpected, f) -} - -func (suite *LinkTestSuite) TestParseURLsFromText1() { -	urls := text.FindLinks(text1) - -	suite.Equal("https://example.org/link/to/something#fragment", urls[0].String()) -	suite.Equal("http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String()) -	suite.Equal("https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it", urls[2].String()) -	suite.Equal("https://example.orghttps://google.com", urls[3].String()) -} - -func (suite *LinkTestSuite) TestParseURLsFromText2() { -	urls := text.FindLinks(text2) - -	// assert length 1 because the found links will be deduplicated -	assert.Len(suite.T(), urls, 1) -} - -func (suite *LinkTestSuite) TestParseURLsFromText3() { -	urls := text.FindLinks(text3) - -	// assert length 0 because `mailto:` isn't accepted -	assert.Len(suite.T(), urls, 0) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText1() { -	replaced := suite.formatter.ReplaceLinks(context.Background(), text1) -	suite.Equal(` -This is a text with some links in it. Here's link number one: <a href="https://example.org/link/to/something#fragment" rel="noopener">example.org/link/to/something#fragment</a> - -Here's link number two: <a href="http://test.example.org?q=bahhhhhhhhhhhh" rel="noopener">test.example.org?q=bahhhhhhhhhhhh</a> - -<a href="https://another.link.example.org/with/a/pretty/long/path/at/the/end/of/it" rel="noopener">another.link.example.org/with/a/pretty/long/path/at/the/end/of/it</a> - -really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme - -<a href="https://example.orghttps://google.com" rel="noopener">example.orghttps://google.com</a> <-- this shouldn't work either, but it does?! OK -`, replaced) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText2() { -	replaced := suite.formatter.ReplaceLinks(context.Background(), text2) -	suite.Equal(` -this is one link: <a href="https://example.org" rel="noopener">example.org</a> - -this is the same link again: <a href="https://example.org" rel="noopener">example.org</a> - -these should be deduplicated -`, replaced) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText3() { -	// we know mailto links won't be replaced with hrefs -- we only accept https and http -	replaced := suite.formatter.ReplaceLinks(context.Background(), text3) -	suite.Equal(` -here's a mailto link: mailto:whatever@test.org -`, replaced) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText4() { -	replaced := suite.formatter.ReplaceLinks(context.Background(), text4) -	suite.Equal(` -two similar links: - -<a href="https://example.org" rel="noopener">example.org</a> - -<a href="https://example.org/test" rel="noopener">example.org/test</a> -`, replaced) -} - -func (suite *LinkTestSuite) TestReplaceLinksFromText5() { -	// we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function -	replaced := suite.formatter.ReplaceLinks(context.Background(), text5) -	suite.Equal(` -what happens when we already have a link within an href? - -<a href="<a href="https://example.org" rel="noopener">example.org</a>"><a href="https://example.org" rel="noopener">example.org</a></a> -`, replaced) -} - -func TestLinkTestSuite(t *testing.T) { -	suite.Run(t, new(LinkTestSuite)) -} diff --git a/internal/text/markdown.go b/internal/text/markdown.go index dbe86d110..232f0f723 100644 --- a/internal/text/markdown.go +++ b/internal/text/markdown.go @@ -21,32 +21,19 @@ package text  import (  	"bytes"  	"context" -	"strings"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  	"github.com/superseriousbusiness/gotosocial/internal/log" -	"github.com/tdewolff/minify/v2" -	minifyHtml "github.com/tdewolff/minify/v2/html"  	"github.com/yuin/goldmark"  	"github.com/yuin/goldmark/extension"  	"github.com/yuin/goldmark/renderer/html"  ) -var ( -	m *minify.M -) - -func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag, emojis []*gtsmodel.Emoji) string { - -	// Temporarily replace all found emoji shortcodes in the markdown text with -	// their ID so that they're not parsed as anything by the markdown parser - -	// this fixes cases where emojis with some underscores in them are parsed as -	// words with emphasis, eg `:_some_emoji:` becomes `:<em>some</em>emoji:` -	// -	// Since the IDs of the emojis are just uppercase letters + numbers they should -	// be safe to pass through the markdown parser without unexpected effects. -	for _, e := range emojis { -		markdownText = strings.ReplaceAll(markdownText, ":"+e.Shortcode+":", ":"+e.ID+":") +func (f *formatter) FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, markdownText string) *FormatResult { +	result := &FormatResult{ +		Mentions: []*gtsmodel.Mention{}, +		Tags:     []*gtsmodel.Tag{}, +		Emojis:   []*gtsmodel.Emoji{},  	}  	// parse markdown text into html, using custom renderer to add hashtag/mention links @@ -57,7 +44,7 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti  			html.WithUnsafe(), // allows raw HTML  		),  		goldmark.WithExtensions( -			&customRenderer{f, ctx, mentions, tags}, +			&customRenderer{f, ctx, pmf, authorID, statusID, false, result},  			extension.Linkify, // turns URLs into links  			extension.Strikethrough,  		), @@ -66,30 +53,15 @@ func (f *formatter) FromMarkdown(ctx context.Context, markdownText string, menti  	var htmlContentBytes bytes.Buffer  	err := md.Convert([]byte(markdownText), &htmlContentBytes)  	if err != nil { -		log.Errorf("error rendering markdown to HTML: %s", err) -	} -	htmlContent := htmlContentBytes.String() - -	// Replace emoji IDs in the parsed html content with their shortcodes again -	for _, e := range emojis { -		htmlContent = strings.ReplaceAll(htmlContent, ":"+e.ID+":", ":"+e.Shortcode+":") +		log.Errorf("error formatting markdown to HTML: %s", err)  	} +	result.HTML = htmlContentBytes.String() -	// clean anything dangerous out of the html -	htmlContent = SanitizeHTML(htmlContent) +	// clean anything dangerous out of the HTML +	result.HTML = SanitizeHTML(result.HTML) -	if m == nil { -		m = minify.New() -		m.Add("text/html", &minifyHtml.Minifier{ -			KeepEndTags: true, -			KeepQuotes:  true, -		}) -	} - -	minified, err := m.String("text/html", htmlContent) -	if err != nil { -		log.Errorf("error minifying markdown text: %s", err) -	} +	// shrink ray +	result.HTML = minifyHTML(result.HTML) -	return minified +	return result  } diff --git a/internal/text/markdown_test.go b/internal/text/markdown_test.go index 384f4389c..80547f8b3 100644 --- a/internal/text/markdown_test.go +++ b/internal/text/markdown_test.go @@ -19,11 +19,9 @@  package text_test  import ( -	"context"  	"testing"  	"github.com/stretchr/testify/suite" -	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  )  var withCodeBlock = `# Title @@ -77,6 +75,16 @@ const (  	mdWithStrikethroughExpected     = "<p>I have <del>mdae</del> made an error</p>"  	mdWithLink                      = "Check out this code, i heard it was written by a sloth https://github.com/superseriousbusiness/gotosocial"  	mdWithLinkExpected              = "<p>Check out this code, i heard it was written by a sloth <a href=\"https://github.com/superseriousbusiness/gotosocial\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">https://github.com/superseriousbusiness/gotosocial</a></p>" +	mdObjectInCodeBlock             = "@foss_satan@fossbros-anonymous.io this is how to mention a user\n```\n@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n```\nhope that helps" +	mdObjectInCodeBlockExpected     = "<p><span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span> this is how to mention a user</p><pre><code>@the_mighty_zork hey bud! nice #ObjectOrientedProgramming software you've been writing lately! :rainbow:\n</code></pre><p>hope that helps</p>" +	mdItalicHashtag                 = "_#hashtag_" +	mdItalicHashtagExpected         = "<p><em><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>" +	mdItalicHashtags                = "_#hashtag #hashtag #hashtag_" +	mdItalicHashtagsExpected        = "<p><em><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a> <a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hashtag</span></a></em></p>" +	// BEWARE: sneaky unicode business going on. +	// the first ö is one rune, the second ö is an o with a combining diacritic. +	mdUnnormalizedHashtag         = "#hellöthere #hellöthere" +	mdUnnormalizedHashtagExpected = "<p><a href=\"http://localhost:8080/tags/hell%C3%B6there\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hellöthere</span></a> <a href=\"http://localhost:8080/tags/hell%C3%B6there\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>hellöthere</span></a></p>"  )  type MarkdownTestSuite struct { @@ -84,101 +92,112 @@ type MarkdownTestSuite struct {  }  func (suite *MarkdownTestSuite) TestParseSimple() { -	s := suite.formatter.FromMarkdown(context.Background(), simpleMarkdown, nil, nil, nil) -	suite.Equal(simpleMarkdownExpected, s) +	formatted := suite.FromMarkdown(simpleMarkdown) +	suite.Equal(simpleMarkdownExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithCodeBlock() { -	s := suite.formatter.FromMarkdown(context.Background(), withCodeBlock, nil, nil, nil) -	suite.Equal(withCodeBlockExpected, s) +	formatted := suite.FromMarkdown(withCodeBlock) +	suite.Equal(withCodeBlockExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithInlineCode() { -	s := suite.formatter.FromMarkdown(context.Background(), withInlineCode, nil, nil, nil) -	suite.Equal(withInlineCodeExpected, s) +	formatted := suite.FromMarkdown(withInlineCode) +	suite.Equal(withInlineCodeExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithInlineCode2() { -	s := suite.formatter.FromMarkdown(context.Background(), withInlineCode2, nil, nil, nil) -	suite.Equal(withInlineCode2Expected, s) +	formatted := suite.FromMarkdown(withInlineCode2) +	suite.Equal(withInlineCode2Expected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithHashtag() { -	foundTags := []*gtsmodel.Tag{ -		suite.testTags["Hashtag"], -	} - -	s := suite.formatter.FromMarkdown(context.Background(), withHashtag, nil, foundTags, nil) -	suite.Equal(withHashtagExpected, s) +	formatted := suite.FromMarkdown(withHashtag) +	suite.Equal(withHashtagExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithHTML() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithHTML, nil, nil, nil) -	suite.Equal(mdWithHTMLExpected, s) +	formatted := suite.FromMarkdown(mdWithHTML) +	suite.Equal(mdWithHTMLExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithCheekyHTML() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithCheekyHTML, nil, nil, nil) -	suite.Equal(mdWithCheekyHTMLExpected, s) +	formatted := suite.FromMarkdown(mdWithCheekyHTML) +	suite.Equal(mdWithCheekyHTMLExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithHashtagInitial() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithHashtagInitial, nil, []*gtsmodel.Tag{ -		suite.testTags["Hashtag"], -		suite.testTags["welcome"], -	}, nil) -	suite.Equal(mdWithHashtagInitialExpected, s) +	formatted := suite.FromMarkdown(mdWithHashtagInitial) +	suite.Equal(mdWithHashtagInitialExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseCodeBlockWithNewlines() { -	s := suite.formatter.FromMarkdown(context.Background(), mdCodeBlockWithNewlines, nil, nil, nil) -	suite.Equal(mdCodeBlockWithNewlinesExpected, s) +	formatted := suite.FromMarkdown(mdCodeBlockWithNewlines) +	suite.Equal(mdCodeBlockWithNewlinesExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithFootnote() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithFootnote, nil, nil, nil) -	suite.Equal(mdWithFootnoteExpected, s) +	formatted := suite.FromMarkdown(mdWithFootnote) +	suite.Equal(mdWithFootnoteExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseWithBlockquote() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithBlockQuote, nil, nil, nil) -	suite.Equal(mdWithBlockQuoteExpected, s) +	formatted := suite.FromMarkdown(mdWithBlockQuote) +	suite.Equal(mdWithBlockQuoteExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseHashtagWithCodeBlock() { -	s := suite.formatter.FromMarkdown(context.Background(), mdHashtagAndCodeBlock, nil, []*gtsmodel.Tag{ -		suite.testTags["Hashtag"], -	}, nil) -	suite.Equal(mdHashtagAndCodeBlockExpected, s) +	formatted := suite.FromMarkdown(mdHashtagAndCodeBlock) +	suite.Equal(mdHashtagAndCodeBlockExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseMentionWithCodeBlock() { -	s := suite.formatter.FromMarkdown(context.Background(), mdMentionAndCodeBlock, []*gtsmodel.Mention{ -		suite.testMentions["local_user_2_mention_zork"], -	}, nil, nil) -	suite.Equal(mdMentionAndCodeBlockExpected, s) +	formatted := suite.FromMarkdown(mdMentionAndCodeBlock) +	suite.Equal(mdMentionAndCodeBlockExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseSmartypants() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithSmartypants, []*gtsmodel.Mention{ -		suite.testMentions["local_user_2_mention_zork"], -	}, nil, nil) -	suite.Equal(mdWithSmartypantsExpected, s) +	formatted := suite.FromMarkdown(mdWithSmartypants) +	suite.Equal(mdWithSmartypantsExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseAsciiHeart() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithAsciiHeart, nil, nil, nil) -	suite.Equal(mdWithAsciiHeartExpected, s) +	formatted := suite.FromMarkdown(mdWithAsciiHeart) +	suite.Equal(mdWithAsciiHeartExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseStrikethrough() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithStrikethrough, nil, nil, nil) -	suite.Equal(mdWithStrikethroughExpected, s) +	formatted := suite.FromMarkdown(mdWithStrikethrough) +	suite.Equal(mdWithStrikethroughExpected, formatted.HTML)  }  func (suite *MarkdownTestSuite) TestParseLink() { -	s := suite.formatter.FromMarkdown(context.Background(), mdWithLink, nil, nil, nil) -	suite.Equal(mdWithLinkExpected, s) +	formatted := suite.FromMarkdown(mdWithLink) +	suite.Equal(mdWithLinkExpected, formatted.HTML) +} + +func (suite *MarkdownTestSuite) TestParseObjectInCodeBlock() { +	formatted := suite.FromMarkdown(mdObjectInCodeBlock) +	suite.Equal(mdObjectInCodeBlockExpected, formatted.HTML) +	suite.Len(formatted.Mentions, 1) +	suite.Equal("@foss_satan@fossbros-anonymous.io", formatted.Mentions[0].NameString) +	suite.Empty(formatted.Tags) +	suite.Empty(formatted.Emojis) +} + +func (suite *MarkdownTestSuite) TestParseItalicHashtag() { +	formatted := suite.FromMarkdown(mdItalicHashtag) +	suite.Equal(mdItalicHashtagExpected, formatted.HTML) +} + +func (suite *MarkdownTestSuite) TestParseItalicHashtags() { +	formatted := suite.FromMarkdown(mdItalicHashtags) +	suite.Equal(mdItalicHashtagsExpected, formatted.HTML) +} + +func (suite *MarkdownTestSuite) TestParseUnnormalizedHashtag() { +	formatted := suite.FromMarkdown(mdUnnormalizedHashtag) +	suite.Equal(mdUnnormalizedHashtagExpected, formatted.HTML)  }  func TestMarkdownTestSuite(t *testing.T) { diff --git a/internal/text/minify.go b/internal/text/minify.go new file mode 100644 index 000000000..62562c7ca --- /dev/null +++ b/internal/text/minify.go @@ -0,0 +1,45 @@ +/* +   GoToSocial +   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + +   This program is free software: you can redistribute it and/or modify +   it under the terms of the GNU Affero General Public License as published by +   the Free Software Foundation, either version 3 of the License, or +   (at your option) any later version. + +   This program is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +   GNU Affero General Public License for more details. + +   You should have received a copy of the GNU Affero General Public License +   along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( +	"github.com/superseriousbusiness/gotosocial/internal/log" +	"github.com/tdewolff/minify/v2" +	"github.com/tdewolff/minify/v2/html" +) + +var ( +	m *minify.M +) + +func minifyHTML(content string) string { +	if m == nil { +		m = minify.New() +		m.Add("text/html", &html.Minifier{ +			KeepEndTags: true, +			KeepQuotes:  true, +		}) +	} + +	minified, err := m.String("text/html", content) +	if err != nil { +		log.Errorf("error minifying HTML: %s", err) +	} +	return minified +} diff --git a/internal/text/plain.go b/internal/text/plain.go index a64a14f06..3549200c6 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -19,40 +19,56 @@  package text  import ( +	"bytes"  	"context" -	"html" -	"strings"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/log" +	"github.com/yuin/goldmark" +	"github.com/yuin/goldmark/extension" +	"github.com/yuin/goldmark/parser" +	"github.com/yuin/goldmark/renderer/html" +	"github.com/yuin/goldmark/util"  ) -// breakReplacer replaces new-lines with HTML breaks. -var breakReplacer = strings.NewReplacer( -	"\r\n", "<br/>", -	"\n", "<br/>", -) - -func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string { -	// trim any crap -	content := strings.TrimSpace(plain) - -	// clean 'er up -	content = html.EscapeString(content) - -	// format links nicely -	content = f.ReplaceLinks(ctx, content) +func (f *formatter) FromPlain(ctx context.Context, pmf gtsmodel.ParseMentionFunc, authorID string, statusID string, plain string) *FormatResult { +	result := &FormatResult{ +		Mentions: []*gtsmodel.Mention{}, +		Tags:     []*gtsmodel.Tag{}, +		Emojis:   []*gtsmodel.Emoji{}, +	} -	// format tags nicely -	content = f.ReplaceTags(ctx, content, tags) +	// parse markdown text into html, using custom renderer to add hashtag/mention links +	md := goldmark.New( +		goldmark.WithRendererOptions( +			html.WithXHTML(), +			html.WithHardWraps(), +		), +		goldmark.WithParser( +			parser.NewParser( +				parser.WithBlockParsers( +					util.Prioritized(newPlaintextParser(), 500), +				), +			), +		), +		goldmark.WithExtensions( +			&customRenderer{f, ctx, pmf, authorID, statusID, false, result}, +			extension.Linkify, // turns URLs into links +		), +	) -	// format mentions nicely -	content = f.ReplaceMentions(ctx, content, mentions) +	var htmlContentBytes bytes.Buffer +	err := md.Convert([]byte(plain), &htmlContentBytes) +	if err != nil { +		log.Errorf("error formatting plaintext to HTML: %s", err) +	} +	result.HTML = htmlContentBytes.String() -	// replace newlines with breaks -	content = breakReplacer.Replace(content) +	// clean anything dangerous out of the HTML +	result.HTML = SanitizeHTML(result.HTML) -	// wrap the whole thing in a pee -	content = `<p>` + content + `</p>` +	// shrink ray +	result.HTML = minifyHTML(result.HTML) -	return SanitizeHTML(content) +	return result  } diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go index 6b850cb45..3693ada9a 100644 --- a/internal/text/plain_test.go +++ b/internal/text/plain_test.go @@ -19,22 +19,21 @@  package text_test  import ( -	"context"  	"testing" +	"github.com/stretchr/testify/assert"  	"github.com/stretchr/testify/suite" -	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"  )  const ( -	simple           = "this is a plain and simple status" -	simpleExpected   = "<p>this is a plain and simple status</p>" -	withTag          = "here's a simple status that uses hashtag #welcome!" -	withTagExpected  = "<p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>" -	withHTML         = "<div>blah this should just be html escaped blah</div>" -	withHTMLExpected = "<p><div>blah this should just be html escaped blah</div></p>" -	moreComplex      = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText" -	moreComplexFull  = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br/><br/><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br/><br/>Text</p>" +	simple              = "this is a plain and simple status" +	simpleExpected      = "<p>this is a plain and simple status</p>" +	withTag             = "here's a simple status that uses hashtag #welcome!" +	withTagExpected     = "<p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/welcome\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>welcome</span></a>!</p>" +	withHTML            = "<div>blah this should just be html escaped blah</div>" +	withHTMLExpected    = "<p><div>blah this should just be html escaped blah</div></p>" +	moreComplex         = "Another test @foss_satan@fossbros-anonymous.io\n\n#Hashtag\n\nText\n\n:rainbow:" +	moreComplexExpected = "<p>Another test <span class=\"h-card\"><a href=\"http://fossbros-anonymous.io/@foss_satan\" class=\"u-url mention\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">@<span>foss_satan</span></a></span><br><br><a href=\"http://localhost:8080/tags/Hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a><br><br>Text<br><br>:rainbow:</p>"  )  type PlainTestSuite struct { @@ -42,35 +41,105 @@ type PlainTestSuite struct {  }  func (suite *PlainTestSuite) TestParseSimple() { -	f := suite.formatter.FromPlain(context.Background(), simple, nil, nil) -	suite.Equal(simpleExpected, f) +	formatted := suite.FromPlain(simple) +	suite.Equal(simpleExpected, formatted.HTML)  }  func (suite *PlainTestSuite) TestParseWithTag() { -	foundTags := []*gtsmodel.Tag{ -		suite.testTags["welcome"], -	} - -	f := suite.formatter.FromPlain(context.Background(), withTag, nil, foundTags) -	suite.Equal(withTagExpected, f) +	formatted := suite.FromPlain(withTag) +	suite.Equal(withTagExpected, formatted.HTML)  }  func (suite *PlainTestSuite) TestParseWithHTML() { -	f := suite.formatter.FromPlain(context.Background(), withHTML, nil, nil) -	suite.Equal(withHTMLExpected, f) +	formatted := suite.FromPlain(withHTML) +	suite.Equal(withHTMLExpected, formatted.HTML)  }  func (suite *PlainTestSuite) TestParseMoreComplex() { -	foundTags := []*gtsmodel.Tag{ -		suite.testTags["Hashtag"], -	} +	formatted := suite.FromPlain(moreComplex) +	suite.Equal(moreComplexExpected, formatted.HTML) +} + +func (suite *PlainTestSuite) TestLinkNoMention() { +	statusText := `here's a link to a post by zork + +https://example.com/@the_mighty_zork/statuses/01FGVP55XMF2K6316MQRX6PFG1 + +that link shouldn't come out formatted as a mention!` + +	menchies := suite.FromPlain(statusText).Mentions +	suite.Empty(menchies) +} + +func (suite *PlainTestSuite) TestDeriveMentionsEmpty() { +	statusText := `` +	menchies := suite.FromPlain(statusText).Mentions +	assert.Len(suite.T(), menchies, 0) +} + +func (suite *PlainTestSuite) TestDeriveHashtagsOK() { +	statusText := `weeeeeeee #testing123 #also testing + +# testing this one shouldn't work + +			#thisshouldwork #dupe #dupe!! #dupe + +	here's a link with a fragment: https://example.org/whatever#ahhh +	here's another link with a fragment: https://example.org/whatever/#ahhh -	foundMentions := []*gtsmodel.Mention{ -		suite.testMentions["zork_mention_foss_satan"], -	} +(#ThisShouldAlsoWork) #this_should_be_split + +#111111 thisalsoshouldn'twork#### ## + +#alimentación, #saúde, #lävistää, #ö, #네 +#ThisOneIsThirtyOneCharactersLon...  ...ng +#ThisOneIsThirteyCharactersLong +` + +	tags := suite.FromPlain(statusText).Tags +	assert.Len(suite.T(), tags, 13) +	assert.Equal(suite.T(), "testing123", tags[0].Name) +	assert.Equal(suite.T(), "also", tags[1].Name) +	assert.Equal(suite.T(), "thisshouldwork", tags[2].Name) +	assert.Equal(suite.T(), "dupe", tags[3].Name) +	assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[4].Name) +	assert.Equal(suite.T(), "this", tags[5].Name) +	assert.Equal(suite.T(), "111111", tags[6].Name) +	assert.Equal(suite.T(), "alimentación", tags[7].Name) +	assert.Equal(suite.T(), "saúde", tags[8].Name) +	assert.Equal(suite.T(), "lävistää", tags[9].Name) +	assert.Equal(suite.T(), "ö", tags[10].Name) +	assert.Equal(suite.T(), "네", tags[11].Name) +	assert.Equal(suite.T(), "ThisOneIsThirteyCharactersLong", tags[12].Name) + +	statusText = `#올빼미 hej` +	tags = suite.FromPlain(statusText).Tags +	assert.Equal(suite.T(), "올빼미", tags[0].Name) +} + +func (suite *PlainTestSuite) TestDeriveMultiple() { +	statusText := `Another test @foss_satan@fossbros-anonymous.io + +	#Hashtag + +	Text` + +	f := suite.FromPlain(statusText) + +	assert.Len(suite.T(), f.Mentions, 1) +	assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", f.Mentions[0].NameString) + +	assert.Len(suite.T(), f.Tags, 1) +	assert.Equal(suite.T(), "Hashtag", f.Tags[0].Name) + +	assert.Len(suite.T(), f.Emojis, 0) +} -	f := suite.formatter.FromPlain(context.Background(), moreComplex, foundMentions, foundTags) -	suite.Equal(moreComplexFull, f) +func (suite *PlainTestSuite) TestZalgoHashtag() { +	statusText := `yo who else loves #praying to #z̸͉̅a̸͚͋l̵͈̊g̸̫͌ỏ̷̪?` +	f := suite.FromPlain(statusText) +	assert.Len(suite.T(), f.Tags, 1) +	assert.Equal(suite.T(), "praying", f.Tags[0].Name)  }  func TestPlainTestSuite(t *testing.T) { diff --git a/internal/text/replace.go b/internal/text/replace.go new file mode 100644 index 000000000..5deab5d4d --- /dev/null +++ b/internal/text/replace.go @@ -0,0 +1,141 @@ +/* +   GoToSocial +   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org + +   This program is free software: you can redistribute it and/or modify +   it under the terms of the GNU Affero General Public License as published by +   the Free Software Foundation, either version 3 of the License, or +   (at your option) any later version. + +   This program is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +   GNU Affero General Public License for more details. + +   You should have received a copy of the GNU Affero General Public License +   along with this program.  If not, see <http://www.gnu.org/licenses/>. +*/ + +package text + +import ( +	"errors" +	"github.com/superseriousbusiness/gotosocial/internal/db" +	"github.com/superseriousbusiness/gotosocial/internal/log" +	"github.com/superseriousbusiness/gotosocial/internal/util" +	"golang.org/x/text/unicode/norm" +	"strings" +) + +const ( +	maximumHashtagLength = 30 +) + +// given a mention or a hashtag string, the methods in this file will attempt to parse it, +// add it to the database, and render it as HTML. If any of these steps fails, the method +// will just return the original string and log an error. + +// replaceMention takes a string in the form @username@domain.com or @localusername +func (r *customRenderer) replaceMention(text string) string { +	menchie, err := r.parseMention(r.ctx, text, r.accountID, r.statusID) +	if err != nil { +		log.Errorf("error parsing mention %s from status: %s", text, err) +		return text +	} + +	if r.statusID != "" { +		if err := r.f.db.Put(r.ctx, menchie); err != nil { +			log.Errorf("error putting mention in db: %s", err) +			return text +		} +	} + +	// only append if it's not been listed yet +	listed := false +	for _, m := range r.result.Mentions { +		if menchie.ID == m.ID { +			listed = true +			break +		} +	} +	if !listed { +		r.result.Mentions = append(r.result.Mentions, menchie) +	} + +	// make sure we have an account attached to this mention +	if menchie.TargetAccount == nil { +		a, err := r.f.db.GetAccountByID(r.ctx, menchie.TargetAccountID) +		if err != nil { +			log.Errorf("error getting account with id %s from the db: %s", menchie.TargetAccountID, err) +			return text +		} +		menchie.TargetAccount = a +	} + +	// The mention's target is our target +	targetAccount := menchie.TargetAccount + +	var b strings.Builder + +	// replace the mention with the formatted mention content +	// <span class="h-card"><a href="targetAccount.URL" class="u-url mention">@<span>targetAccount.Username</span></a></span> +	b.WriteString(`<span class="h-card"><a href="`) +	b.WriteString(targetAccount.URL) +	b.WriteString(`" class="u-url mention">@<span>`) +	b.WriteString(targetAccount.Username) +	b.WriteString(`</span></a></span>`) +	return b.String() +} + +// replaceMention takes a string in the form #HashedTag, and will normalize it before +// adding it to the db and turning it into HTML. +func (r *customRenderer) replaceHashtag(text string) string { +	// this normalization is specifically to avoid cases where visually-identical +	// hashtags are stored with different unicode representations (e.g. with combining +	// diacritics). It allows a tasteful number of combining diacritics to be used, +	// as long as they can be combined with parent characters to form regular letter +	// symbols. +	normalized := norm.NFC.String(text[1:]) + +	for i, r := range normalized { +		if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) { +			return text +		} +	} + +	tag, err := r.f.db.TagStringToTag(r.ctx, normalized, r.accountID) +	if err != nil { +		log.Errorf("error generating hashtags from status: %s", err) +		return text +	} + +	// only append if it's not been listed yet +	listed := false +	for _, t := range r.result.Tags { +		if tag.ID == t.ID { +			listed = true +			break +		} +	} +	if !listed { +		err = r.f.db.Put(r.ctx, tag) +		if err != nil { +			if !errors.Is(err, db.ErrAlreadyExists) { +				log.Errorf("error putting tags in db: %s", err) +				return text +			} +		} +		r.result.Tags = append(r.result.Tags, tag) +	} + +	var b strings.Builder +	// replace the #tag with the formatted tag content +	// `<a href="tag.URL" class="mention hashtag" rel="tag">#<span>tagAsEntered</span></a> +	b.WriteString(`<a href="`) +	b.WriteString(tag.URL) +	b.WriteString(`" class="mention hashtag" rel="tag">#<span>`) +	b.WriteString(normalized) +	b.WriteString(`</span></a>`) + +	return b.String() +} diff --git a/internal/util/statustools.go b/internal/util/statustools.go index 80a091623..a4bb15007 100644 --- a/internal/util/statustools.go +++ b/internal/util/statustools.go @@ -20,115 +20,19 @@ package util  import (  	"unicode" -	"unicode/utf8" - -	"github.com/superseriousbusiness/gotosocial/internal/regexes" -) - -const ( -	maximumHashtagLength = 30  ) -// DeriveMentionNamesFromText takes a plaintext (ie., not html-formatted) text, -// and applies a regex to it to return a deduplicated list of account names -// mentioned in that text, in the format "@user@example.org" or "@username" for -// local users. -func DeriveMentionNamesFromText(text string) []string { -	mentionedAccounts := []string{} -	for _, m := range regexes.MentionFinder.FindAllStringSubmatch(text, -1) { -		mentionedAccounts = append(mentionedAccounts, m[1]) -	} -	return UniqueStrings(mentionedAccounts) -} - -type Pair[A, B any] struct { -	First  A -	Second B -} - -// Byte index in original string -// `First` includes `#`. -type Span = Pair[int, int] - -// Takes a plaintext (ie., not HTML-formatted) text, -// and returns a slice of unique hashtags. -func DeriveHashtagsFromText(text string) []string { -	tagsMap := make(map[string]bool) -	tags := []string{} - -	for _, v := range FindHashtagSpansInText(text) { -		t := text[v.First+1 : v.Second] -		if _, value := tagsMap[t]; !value { -			tagsMap[t] = true -			tags = append(tags, t) -		} -	} - -	return tags -} - -// Takes a plaintext (ie., not HTML-formatted) text, -// and returns a list of pairs of indices into the original string, where -// hashtags are located. -func FindHashtagSpansInText(text string) []Span { -	tags := []Span{} -	start := 0 -	// Keep one rune of lookbehind. -	prev := ' ' -	inTag := false - -	for i, r := range text { -		if r == '#' && IsHashtagBoundary(prev) { -			// Start of hashtag. -			inTag = true -			start = i -		} else if inTag && !IsPermittedInHashtag(r) && !IsHashtagBoundary(r) { -			// Inside the hashtag, but it was a phoney, gottem. -			inTag = false -		} else if inTag && IsHashtagBoundary(r) { -			// End of hashtag. -			inTag = false -			appendTag(&tags, text, start, i) -		} else if irl := i + utf8.RuneLen(r); inTag && irl == len(text) { -			// End of text. -			appendTag(&tags, text, start, irl) -		} - -		prev = r -	} - -	return tags -} - -func appendTag(tags *[]Span, text string, start int, end int) { -	l := end - start - 1 -	// This check could be moved out into the parsing loop if necessary! -	if 0 < l && l <= maximumHashtagLength { -		*tags = append(*tags, Span{First: start, Second: end}) -	} -} - -// DeriveEmojisFromText takes a plaintext (ie., not html-formatted) text, -// and applies a regex to it to return a deduplicated list of emojis -// used in that text, without the surrounding `::` -func DeriveEmojisFromText(text string) []string { -	emojis := []string{} -	for _, m := range regexes.EmojiFinder.FindAllStringSubmatch(text, -1) { -		emojis = append(emojis, m[1]) -	} -	return UniqueStrings(emojis) +func IsPlausiblyInHashtag(r rune) bool { +	// Marks are allowed during parsing, prior to normalization, but not after, +	// since they may be combined into letters during normalization. +	return unicode.IsLetter(r) || unicode.IsNumber(r) || unicode.IsMark(r)  }  func IsPermittedInHashtag(r rune) bool {  	return unicode.IsLetter(r) || unicode.IsNumber(r)  } -// Decides where to break before or after a hashtag. -func IsHashtagBoundary(r rune) bool { -	return r == '#' || // `###lol` should work -		unicode.IsSpace(r) || // All kinds of Unicode whitespace. -		unicode.IsControl(r) || // All kinds of control characters, like tab. -		// Most kinds of punctuation except "Pc" ("Punctuation, connecting", like `_`). -		// But `someurl/#fragment` should not match, neither should HTML entities like `#`. -		('/' != r && '&' != r && !unicode.Is(unicode.Categories["Pc"], r) && unicode.IsPunct(r)) +// Decides where to break before or after a #hashtag or @mention +func IsMentionOrHashtagBoundary(r rune) bool { +	return unicode.IsSpace(r) || unicode.IsPunct(r)  } diff --git a/internal/util/statustools_test.go b/internal/util/statustools_test.go deleted file mode 100644 index bb01557c5..000000000 --- a/internal/util/statustools_test.go +++ /dev/null @@ -1,173 +0,0 @@ -/* -   GoToSocial -   Copyright (C) 2021-2023 GoToSocial Authors admin@gotosocial.org - -   This program is free software: you can redistribute it and/or modify -   it under the terms of the GNU Affero General Public License as published by -   the Free Software Foundation, either version 3 of the License, or -   (at your option) any later version. - -   This program is distributed in the hope that it will be useful, -   but WITHOUT ANY WARRANTY; without even the implied warranty of -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the -   GNU Affero General Public License for more details. - -   You should have received a copy of the GNU Affero General Public License -   along with this program.  If not, see <http://www.gnu.org/licenses/>. -*/ - -package util_test - -import ( -	"testing" - -	"github.com/stretchr/testify/assert" -	"github.com/stretchr/testify/suite" -	"github.com/superseriousbusiness/gotosocial/internal/util" -) - -type StatusTestSuite struct { -	suite.Suite -} - -func (suite *StatusTestSuite) TestLinkNoMention() { -	statusText := `here's a link to a post by zork: - -https://localhost:8080/@the_mighty_zork/statuses/01FGVP55XMF2K6316MQRX6PFG1 - -that link shouldn't come out formatted as a mention!` - -	menchies := util.DeriveMentionNamesFromText(statusText) -	suite.Empty(menchies) -} - -func (suite *StatusTestSuite) TestDeriveMentionsOK() { -	statusText := `@dumpsterqueer@example.org testing testing - -	is this thing on? - -	@someone_else@testing.best-horse.com can you confirm? @hello@test.lgbt - -	@thisisalocaluser! - -	here is a duplicate mention: @hello@test.lgbt @hello@test.lgbt - -	@account1@whatever.com @account2@whatever.com - -	` - -	menchies := util.DeriveMentionNamesFromText(statusText) -	assert.Len(suite.T(), menchies, 6) -	assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0]) -	assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1]) -	assert.Equal(suite.T(), "@hello@test.lgbt", menchies[2]) -	assert.Equal(suite.T(), "@thisisalocaluser", menchies[3]) -	assert.Equal(suite.T(), "@account1@whatever.com", menchies[4]) -	assert.Equal(suite.T(), "@account2@whatever.com", menchies[5]) -} - -func (suite *StatusTestSuite) TestDeriveMentionsEmpty() { -	statusText := `` -	menchies := util.DeriveMentionNamesFromText(statusText) -	assert.Len(suite.T(), menchies, 0) -} - -func (suite *StatusTestSuite) TestDeriveHashtagsOK() { -	statusText := `weeeeeeee #testing123 #also testing - -# testing this one shouldn't work - -			#thisshouldwork #dupe #dupe!! #dupe - -	here's a link with a fragment: https://example.org/whatever#ahhh -	here's another link with a fragment: https://example.org/whatever/#ahhh - -(#ThisShouldAlsoWork) #not_this_though - -#111111 thisalsoshouldn'twork#### ## - -#alimentación, #saúde, #lävistää, #ö, #네 -#ThisOneIsThirtyOneCharactersLon...  ...ng -#ThisOneIsThirteyCharactersLong -` - -	tags := util.DeriveHashtagsFromText(statusText) -	assert.Len(suite.T(), tags, 12) -	assert.Equal(suite.T(), "testing123", tags[0]) -	assert.Equal(suite.T(), "also", tags[1]) -	assert.Equal(suite.T(), "thisshouldwork", tags[2]) -	assert.Equal(suite.T(), "dupe", tags[3]) -	assert.Equal(suite.T(), "ThisShouldAlsoWork", tags[4]) -	assert.Equal(suite.T(), "111111", tags[5]) -	assert.Equal(suite.T(), "alimentación", tags[6]) -	assert.Equal(suite.T(), "saúde", tags[7]) -	assert.Equal(suite.T(), "lävistää", tags[8]) -	assert.Equal(suite.T(), "ö", tags[9]) -	assert.Equal(suite.T(), "네", tags[10]) -	assert.Equal(suite.T(), "ThisOneIsThirteyCharactersLong", tags[11]) - -	statusText = `#올빼미 hej` -	tags = util.DeriveHashtagsFromText(statusText) -	assert.Equal(suite.T(), "올빼미", tags[0]) -} - -func (suite *StatusTestSuite) TestHashtagSpansOK() { -	statusText := `#0 #3   #8aa` - -	spans := util.FindHashtagSpansInText(statusText) -	assert.Equal(suite.T(), 0, spans[0].First) -	assert.Equal(suite.T(), 2, spans[0].Second) -	assert.Equal(suite.T(), 3, spans[1].First) -	assert.Equal(suite.T(), 5, spans[1].Second) -	assert.Equal(suite.T(), 8, spans[2].First) -	assert.Equal(suite.T(), 12, spans[2].Second) -} - -func (suite *StatusTestSuite) TestDeriveEmojiOK() { -	statusText := `:test: :another: - -Here's some normal text with an :emoji: at the end - -:spaces shouldnt work: - -:emoji1::emoji2: - -:anotheremoji:emoji2: -:anotheremoji::anotheremoji::anotheremoji::anotheremoji: -:underscores_ok_too: -` - -	tags := util.DeriveEmojisFromText(statusText) -	assert.Len(suite.T(), tags, 7) -	assert.Equal(suite.T(), "test", tags[0]) -	assert.Equal(suite.T(), "another", tags[1]) -	assert.Equal(suite.T(), "emoji", tags[2]) -	assert.Equal(suite.T(), "emoji1", tags[3]) -	assert.Equal(suite.T(), "emoji2", tags[4]) -	assert.Equal(suite.T(), "anotheremoji", tags[5]) -	assert.Equal(suite.T(), "underscores_ok_too", tags[6]) -} - -func (suite *StatusTestSuite) TestDeriveMultiple() { -	statusText := `Another test @foss_satan@fossbros-anonymous.io - -	#HashTag - -	Text` - -	ms := util.DeriveMentionNamesFromText(statusText) -	hs := util.DeriveHashtagsFromText(statusText) -	es := util.DeriveEmojisFromText(statusText) - -	assert.Len(suite.T(), ms, 1) -	assert.Equal(suite.T(), "@foss_satan@fossbros-anonymous.io", ms[0]) - -	assert.Len(suite.T(), hs, 1) -	assert.Contains(suite.T(), hs, "HashTag") - -	assert.Len(suite.T(), es, 0) -} - -func TestStatusTestSuite(t *testing.T) { -	suite.Run(t, new(StatusTestSuite)) -}  | 
