summaryrefslogtreecommitdiff
path: root/internal/typeutils
diff options
context:
space:
mode:
authorLibravatar tobi <31960611+tsmethurst@users.noreply.github.com>2023-11-21 15:13:30 +0100
committerLibravatar GitHub <noreply@github.com>2023-11-21 15:13:30 +0100
commitcfefbc08d822cd85787d95dc2ee253e3368826d8 (patch)
treeaf6d6257dddca1645ab5f8e34a1c79ac80d82e0e /internal/typeutils
parent[docs] Annotate split-domain setup (#2372) (diff)
downloadgotosocial-cfefbc08d822cd85787d95dc2ee253e3368826d8.tar.xz
[feature] Federate status language in and out (#2366)
* [feature] Federate status language in + out * go fmt * tests, little fix * improve comments * unnest a bit * avoid unnecessary nil check * use more descriptive variable for contentMap * prefer instance languages when selecting from contentMap * update docs to reflect lang selection * rename rdfLangString -> rdfLangs * update comments to mention Pollable * iter through slice instead of map
Diffstat (limited to 'internal/typeutils')
-rw-r--r--internal/typeutils/astointernal.go15
-rw-r--r--internal/typeutils/astointernal_test.go9
-rw-r--r--internal/typeutils/internaltoas.go10
-rw-r--r--internal/typeutils/internaltoas_test.go44
-rw-r--r--internal/typeutils/util.go101
-rw-r--r--internal/typeutils/util_test.go114
-rw-r--r--internal/typeutils/wrap_test.go3
7 files changed, 273 insertions, 23 deletions
diff --git a/internal/typeutils/astointernal.go b/internal/typeutils/astointernal.go
index 707f51629..c7908ad24 100644
--- a/internal/typeutils/astointernal.go
+++ b/internal/typeutils/astointernal.go
@@ -244,9 +244,15 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
}
// status.Content
+ // status.Language
//
- // The (html-formatted) content of this status.
- status.Content = ap.ExtractContent(statusable)
+ // Many implementations set both content
+ // and contentMap; we can use these to
+ // infer the language of the status.
+ status.Content, status.Language = ContentToContentLanguage(
+ ctx,
+ ap.ExtractContent(statusable),
+ )
// status.Attachments
//
@@ -396,9 +402,6 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab
return &s
}()
- // language
- // TODO: we might be able to extract this from the contentMap field
-
// ActivityStreamsType
status.ActivityStreamsType = statusable.GetTypeName()
@@ -707,7 +710,7 @@ func (c *Converter) ASFlagToReport(ctx context.Context, flaggable ap.Flaggable)
// For Mastodon, this will just be a string, or nothing.
// In Misskey's case, it may also contain the URLs of
// one or more reported statuses, so extract these too.
- content := ap.ExtractContent(flaggable)
+ content := ap.ExtractContent(flaggable).Content
statusURIs := []*url.URL{}
inlineURLs := misskeyReportInlineURLs(content)
statusURIs = append(statusURIs, inlineURLs...)
diff --git a/internal/typeutils/astointernal_test.go b/internal/typeutils/astointernal_test.go
index 10ea422fa..851d57efc 100644
--- a/internal/typeutils/astointernal_test.go
+++ b/internal/typeutils/astointernal_test.go
@@ -45,6 +45,10 @@ func (suite *ASToInternalTestSuite) jsonToType(in string) vocab.Type {
suite.FailNow(err.Error())
}
+ if statusable, ok := t.(ap.Statusable); ok {
+ ap.NormalizeIncomingContent(statusable, m)
+ }
+
return t
}
@@ -103,7 +107,8 @@ func (suite *ASToInternalTestSuite) TestParsePublicStatus() {
suite.NoError(err)
suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning)
- suite.Equal(`<p>&gt; So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
+ suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
+ suite.Equal("en", status.Language)
}
func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() {
@@ -117,7 +122,7 @@ func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() {
suite.NoError(err)
suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning)
- suite.Equal(`<p>&gt; So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
+ suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)
// on statuses with no URL in them (like ones we get from pleroma sometimes) we should use the AP URI of the status as URL
suite.Equal("http://fossbros-anonymous.io/users/foss_satan/statuses/108138763199405167", status.URL)
diff --git a/internal/typeutils/internaltoas.go b/internal/typeutils/internaltoas.go
index 16467be40..ff502296b 100644
--- a/internal/typeutils/internaltoas.go
+++ b/internal/typeutils/internaltoas.go
@@ -607,9 +607,17 @@ func (c *Converter) StatusToAS(ctx context.Context, s *gtsmodel.Status) (ap.Stat
// conversation
// TODO
- // content -- the actual post itself
+ // content -- the actual post
+ // itself, plus the language
contentProp := streams.NewActivityStreamsContentProperty()
contentProp.AppendXMLSchemaString(s.Content)
+
+ if s.Language != "" {
+ contentProp.AppendRDFLangString(map[string]string{
+ s.Language: s.Content,
+ })
+ }
+
status.SetActivityStreamsContent(contentProp)
// attachments
diff --git a/internal/typeutils/internaltoas_test.go b/internal/typeutils/internaltoas_test.go
index 01dde66fb..878040dcc 100644
--- a/internal/typeutils/internaltoas_test.go
+++ b/internal/typeutils/internaltoas_test.go
@@ -340,6 +340,9 @@ func (suite *InternalToASTestSuite) TestStatusToAS() {
"attributedTo": "http://localhost:8080/users/the_mighty_zork",
"cc": "http://localhost:8080/users/the_mighty_zork/followers",
"content": "hello everyone!",
+ "contentMap": {
+ "en": "hello everyone!"
+ },
"id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
"published": "2021-10-20T12:40:37+02:00",
"replies": {
@@ -379,16 +382,21 @@ func (suite *InternalToASTestSuite) TestStatusWithTagsToASWithIDs() {
// http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams --
// will appear, so trim them out of the string for consistency
trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1]
- suite.Equal(` {
- "blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
- "mediaType": "image/jpeg",
- "name": "Black and white image of some 50's style text saying: Welcome On Board",
- "type": "Document",
- "url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
- },
+ suite.Equal(` [
+ {
+ "blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
+ "mediaType": "image/jpeg",
+ "name": "Black and white image of some 50's style text saying: Welcome On Board",
+ "type": "Document",
+ "url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
+ }
+ ],
"attributedTo": "http://localhost:8080/users/admin",
"cc": "http://localhost:8080/users/admin/followers",
"content": "hello world! #welcome ! first post on the instance :rainbow: !",
+ "contentMap": {
+ "en": "hello world! #welcome ! first post on the instance :rainbow: !"
+ },
"id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R",
"published": "2021-10-20T11:36:45Z",
"replies": {
@@ -446,16 +454,21 @@ func (suite *InternalToASTestSuite) TestStatusWithTagsToASFromDB() {
// http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams --
// will appear, so trim them out of the string for consistency
trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1]
- suite.Equal(` {
- "blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
- "mediaType": "image/jpeg",
- "name": "Black and white image of some 50's style text saying: Welcome On Board",
- "type": "Document",
- "url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
- },
+ suite.Equal(` [
+ {
+ "blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj",
+ "mediaType": "image/jpeg",
+ "name": "Black and white image of some 50's style text saying: Welcome On Board",
+ "type": "Document",
+ "url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg"
+ }
+ ],
"attributedTo": "http://localhost:8080/users/admin",
"cc": "http://localhost:8080/users/admin/followers",
"content": "hello world! #welcome ! first post on the instance :rainbow: !",
+ "contentMap": {
+ "en": "hello world! #welcome ! first post on the instance :rainbow: !"
+ },
"id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R",
"published": "2021-10-20T11:36:45Z",
"replies": {
@@ -519,6 +532,9 @@ func (suite *InternalToASTestSuite) TestStatusToASWithMentions() {
"http://localhost:8080/users/the_mighty_zork"
],
"content": "hi @the_mighty_zork welcome to the instance!",
+ "contentMap": {
+ "en": "hi @the_mighty_zork welcome to the instance!"
+ },
"id": "http://localhost:8080/users/admin/statuses/01FF25D5Q0DH7CHD57CTRS6WK0",
"inReplyTo": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
"published": "2021-11-20T13:32:16Z",
diff --git a/internal/typeutils/util.go b/internal/typeutils/util.go
index a19588221..8a8d4123b 100644
--- a/internal/typeutils/util.go
+++ b/internal/typeutils/util.go
@@ -31,6 +31,8 @@ import (
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/language"
+ "github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/text"
)
@@ -184,3 +186,102 @@ func placeholdUnknownAttachments(arr []apimodel.Attachment) (string, []apimodel.
return text.SanitizeToHTML(aside.String()), arr
}
+
+// ContentToContentLanguage tries to
+// extract a content string and language
+// tag string from the given intermediary
+// content.
+//
+// Either/both of the returned strings may
+// be empty, depending on how things go.
+func ContentToContentLanguage(
+ ctx context.Context,
+ content gtsmodel.Content,
+) (
+ string, // content
+ string, // language
+) {
+ var (
+ contentStr string
+ langTagStr string
+ )
+
+ switch contentMap := content.ContentMap; {
+ // Simplest case: no `contentMap`.
+ // Return `content`, even if empty.
+ case contentMap == nil:
+ return content.Content, ""
+
+ // `content` and `contentMap` set.
+ // Try to infer "primary" language.
+ case content.Content != "":
+ // Assume `content` is intended
+ // primary content, and look for
+ // corresponding language tag.
+ contentStr = content.Content
+
+ for t, c := range contentMap {
+ if contentStr == c {
+ langTagStr = t
+ break
+ }
+ }
+
+ // `content` not set; `contentMap`
+ // is set with only one value.
+ // This must be the "primary" lang.
+ case len(contentMap) == 1:
+ // Use an empty loop to
+ // get the values we want.
+ // nolint:revive
+ for langTagStr, contentStr = range contentMap {
+ }
+
+ // Only `contentMap` is set, with more
+ // than one value. Map order is not
+ // guaranteed so we can't know the
+ // "primary" language.
+ //
+ // Try to select content using our
+ // instance's configured languages.
+ //
+ // In case of no hits, just take the
+ // first tag and content in the map.
+ default:
+ instanceLangs := config.GetInstanceLanguages()
+ for _, langTagStr = range instanceLangs.TagStrs() {
+ if contentStr = contentMap[langTagStr]; contentStr != "" {
+ // Hit!
+ break
+ }
+ }
+
+ // If nothing found, just take
+ // the first entry we can get by
+ // breaking after the first iter.
+ if contentStr == "" {
+ for langTagStr, contentStr = range contentMap {
+ break
+ }
+ }
+ }
+
+ if langTagStr != "" {
+ // Found a lang tag for this content,
+ // make sure it's valid / parseable.
+ lang, err := language.Parse(langTagStr)
+ if err != nil {
+ log.Warnf(
+ ctx,
+ "could not parse %s as BCP47 language tag in status contentMap: %v",
+ langTagStr, err,
+ )
+ } else {
+ // Inferred the language!
+ // Use normalized version.
+ langTagStr = lang.TagStr
+ }
+ }
+
+ return contentStr, langTagStr
+}
diff --git a/internal/typeutils/util_test.go b/internal/typeutils/util_test.go
index e6610574b..0f852d399 100644
--- a/internal/typeutils/util_test.go
+++ b/internal/typeutils/util_test.go
@@ -18,7 +18,12 @@
package typeutils
import (
+ "context"
"testing"
+
+ "github.com/superseriousbusiness/gotosocial/internal/config"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/language"
)
func TestMisskeyReportContentURLs1(t *testing.T) {
@@ -44,3 +49,112 @@ misskey-formatted`
t.Fatalf("wanted 0 urls, got %d", l)
}
}
+
+func TestContentToContentLanguage(t *testing.T) {
+ type testcase struct {
+ content gtsmodel.Content
+ instanceLanguages language.Languages
+ expectedContent string
+ expectedLang string
+ }
+
+ ctx, cncl := context.WithCancel(context.Background())
+ defer cncl()
+
+ for i, testcase := range []testcase{
+ {
+ content: gtsmodel.Content{
+ Content: "hello world",
+ ContentMap: nil,
+ },
+ expectedContent: "hello world",
+ expectedLang: "",
+ },
+ {
+ content: gtsmodel.Content{
+ Content: "",
+ ContentMap: map[string]string{
+ "en": "hello world",
+ },
+ },
+ expectedContent: "hello world",
+ expectedLang: "en",
+ },
+ {
+ content: gtsmodel.Content{
+ Content: "bonjour le monde",
+ ContentMap: map[string]string{
+ "en": "hello world",
+ "fr": "bonjour le monde",
+ },
+ },
+ expectedContent: "bonjour le monde",
+ expectedLang: "fr",
+ },
+ {
+ content: gtsmodel.Content{
+ Content: "bonjour le monde",
+ ContentMap: map[string]string{
+ "en": "hello world",
+ },
+ },
+ expectedContent: "bonjour le monde",
+ expectedLang: "",
+ },
+ {
+ content: gtsmodel.Content{
+ Content: "",
+ ContentMap: map[string]string{
+ "en": "hello world",
+ "ru": "Привет, мир!",
+ "nl": "hallo wereld!",
+ "ca": "Hola món!",
+ },
+ },
+ instanceLanguages: language.Languages{
+ {TagStr: "en"},
+ {TagStr: "ca"},
+ },
+ expectedContent: "hello world",
+ expectedLang: "en",
+ },
+ {
+ content: gtsmodel.Content{
+ Content: "",
+ ContentMap: map[string]string{
+ "en": "hello world",
+ "ru": "Привет, мир!",
+ "nl": "hallo wereld!",
+ "ca": "Hola món!",
+ },
+ },
+ instanceLanguages: language.Languages{
+ {TagStr: "ca"},
+ {TagStr: "en"},
+ },
+ expectedContent: "Hola món!",
+ expectedLang: "ca",
+ },
+ } {
+ langs, err := language.InitLangs(testcase.instanceLanguages.TagStrs())
+ if err != nil {
+ t.Fatal(err)
+ }
+ config.SetInstanceLanguages(langs)
+
+ content, language := ContentToContentLanguage(ctx, testcase.content)
+ if content != testcase.expectedContent {
+ t.Errorf(
+ "test %d expected content '%s' got '%s'",
+ i, testcase.expectedContent, content,
+ )
+ }
+
+ if language != testcase.expectedLang {
+ t.Errorf(
+ "test %d expected language '%s' got '%s'",
+ i, testcase.expectedLang, language,
+ )
+ }
+ }
+}
diff --git a/internal/typeutils/wrap_test.go b/internal/typeutils/wrap_test.go
index 9d6d95983..453073ed6 100644
--- a/internal/typeutils/wrap_test.go
+++ b/internal/typeutils/wrap_test.go
@@ -85,6 +85,9 @@ func (suite *WrapTestSuite) TestWrapNoteInCreate() {
"attributedTo": "http://localhost:8080/users/the_mighty_zork",
"cc": "http://localhost:8080/users/the_mighty_zork/followers",
"content": "hello everyone!",
+ "contentMap": {
+ "en": "hello everyone!"
+ },
"id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",
"published": "2021-10-20T12:40:37+02:00",
"replies": {