diff options
author | 2023-07-31 15:47:35 +0200 | |
---|---|---|
committer | 2023-07-31 15:47:35 +0200 | |
commit | 2796a2e82f16ade9872008878cf88299bd66b4e7 (patch) | |
tree | 76f7b69cc1da57ca10b71c57abf1892575bea100 /internal/text/normalize.go | |
parent | [performance] cache follow, follow request and block ID lists (#2027) (diff) | |
download | gotosocial-2796a2e82f16ade9872008878cf88299bd66b4e7.tar.xz |
[feature] Hashtag federation (in/out), hashtag client API endpoints (#2032)
* update go-fed
* do the things
* remove unused columns from tags
* update to latest lingo from main
* further tag shenanigans
* serve stub page at tag endpoint
* we did it lads
* tests, oh tests, ohhh tests, oh tests (doo doo doo doo)
* swagger docs
* document hashtag usage + federation
* instanceGet
* don't bother parsing tag href
* rename whereStartsWith -> whereStartsLike
* remove GetOrCreateTag
* dont cache status tag timelineability
Diffstat (limited to 'internal/text/normalize.go')
-rw-r--r-- | internal/text/normalize.go | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/internal/text/normalize.go b/internal/text/normalize.go new file mode 100644 index 000000000..14caf6311 --- /dev/null +++ b/internal/text/normalize.go @@ -0,0 +1,60 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package text + +import ( + "strings" + + "github.com/superseriousbusiness/gotosocial/internal/util" + "golang.org/x/text/unicode/norm" +) + +const ( + maximumHashtagLength = 100 +) + +// NormalizeHashtag normalizes the given hashtag text by +// removing the initial '#' symbol, and then decomposing +// and canonically recomposing chars + combining diacritics +// in the text to single unicode characters, following +// Normalization Form C (https://unicode.org/reports/tr15/). +// +// Finally, it will do a check on the normalized string to +// ensure that it's below maximumHashtagLength chars, and +// contains only unicode letters and numbers. If this passes, +// returned bool will be true. +func NormalizeHashtag(text string) (string, bool) { + // This normalization is specifically to avoid cases + // where visually-identical hashtags are stored with + // different unicode representations (e.g. with combining + // diacritics). It allows a tasteful number of combining + // diacritics to be used, as long as they can be combined + // with parent characters to form regular letter symbols. + normalized := norm.NFC.String(strings.TrimPrefix(text, "#")) + + // Validate normalized. + ok := true + for i, r := range normalized { + if i >= maximumHashtagLength || !util.IsPermittedInHashtag(r) { + ok = false + break + } + } + + return normalized, ok +} |