diff options
Diffstat (limited to 'internal')
| -rw-r--r-- | internal/ap/ap_test.go | 6 | ||||
| -rw-r--r-- | internal/ap/extract.go | 37 | ||||
| -rw-r--r-- | internal/ap/extractcontent_test.go | 5 | ||||
| -rw-r--r-- | internal/ap/normalize.go | 285 | ||||
| -rw-r--r-- | internal/ap/normalize_test.go | 82 | ||||
| -rw-r--r-- | internal/ap/serialize.go | 139 | ||||
| -rw-r--r-- | internal/gtsmodel/status.go | 11 | ||||
| -rw-r--r-- | internal/typeutils/astointernal.go | 15 | ||||
| -rw-r--r-- | internal/typeutils/astointernal_test.go | 9 | ||||
| -rw-r--r-- | internal/typeutils/internaltoas.go | 10 | ||||
| -rw-r--r-- | internal/typeutils/internaltoas_test.go | 44 | ||||
| -rw-r--r-- | internal/typeutils/util.go | 101 | ||||
| -rw-r--r-- | internal/typeutils/util_test.go | 114 | ||||
| -rw-r--r-- | internal/typeutils/wrap_test.go | 3 | 
14 files changed, 695 insertions, 166 deletions
diff --git a/internal/ap/ap_test.go b/internal/ap/ap_test.go index 6a5073c63..583a37c53 100644 --- a/internal/ap/ap_test.go +++ b/internal/ap/ap_test.go @@ -93,6 +93,12 @@ func noteWithMentions1() vocab.ActivityStreamsNote {  	content := streams.NewActivityStreamsContentProperty()  	content.AppendXMLSchemaString("hey @f0x and @dumpsterqueer") + +	rdfLangString := make(map[string]string) +	rdfLangString["en"] = "hey @f0x and @dumpsterqueer" +	rdfLangString["fr"] = "bonjour @f0x et @dumpsterqueer" +	content.AppendRDFLangString(rdfLangString) +  	note.SetActivityStreamsContent(content)  	return note diff --git a/internal/ap/extract.go b/internal/ap/extract.go index 424f77409..3d92fa2ba 100644 --- a/internal/ap/extract.go +++ b/internal/ap/extract.go @@ -631,27 +631,34 @@ func ExtractPublicKey(i WithPublicKey) (  	return nil, nil, nil, gtserror.New("couldn't find public key")  } -// ExtractContent returns a string representation of the -// given interface's Content property, or an empty string -// if no Content is found. -func ExtractContent(i WithContent) string { -	contentProperty := i.GetActivityStreamsContent() -	if contentProperty == nil { -		return "" +// ExtractContent returns an intermediary representation of +// the given interface's Content and/or ContentMap property. +func ExtractContent(i WithContent) gtsmodel.Content { +	content := gtsmodel.Content{} + +	contentProp := i.GetActivityStreamsContent() +	if contentProp == nil { +		// No content at all. +		return content  	} -	for iter := contentProperty.Begin(); iter != contentProperty.End(); iter = iter.Next() { +	for iter := contentProp.Begin(); iter != contentProp.End(); iter = iter.Next() {  		switch { -		// Content may be parsed as IRI, depending on -		// how it's formatted, so account for this. -		case iter.IsXMLSchemaString(): -			return iter.GetXMLSchemaString() -		case iter.IsIRI(): -			return iter.GetIRI().String() +		case iter.IsRDFLangString() && +			len(content.ContentMap) == 0: +			content.ContentMap = iter.GetRDFLangString() + +		case iter.IsXMLSchemaString() && +			content.Content == "": +			content.Content = iter.GetXMLSchemaString() + +		case iter.IsIRI() && +			content.Content == "": +			content.Content = iter.GetIRI().String()  		}  	} -	return "" +	return content  }  // ExtractAttachments attempts to extract barebones MediaAttachment objects from given AS interface type. diff --git a/internal/ap/extractcontent_test.go b/internal/ap/extractcontent_test.go index 590d1b931..c899a10e1 100644 --- a/internal/ap/extractcontent_test.go +++ b/internal/ap/extractcontent_test.go @@ -30,10 +30,11 @@ type ExtractContentTestSuite struct {  func (suite *ExtractContentTestSuite) TestExtractContent1() {  	note := suite.noteWithMentions1 -  	content := ap.ExtractContent(note) -	suite.Equal("hey @f0x and @dumpsterqueer", content) +	suite.Equal("hey @f0x and @dumpsterqueer", content.Content) +	suite.Equal("bonjour @f0x et @dumpsterqueer", content.ContentMap["fr"]) +	suite.Equal("hey @f0x and @dumpsterqueer", content.ContentMap["en"])  }  func TestExtractContentTestSuite(t *testing.T) { diff --git a/internal/ap/normalize.go b/internal/ap/normalize.go index 192a2d740..a27527b84 100644 --- a/internal/ap/normalize.go +++ b/internal/ap/normalize.go @@ -20,11 +20,12 @@ package ap  import (  	"github.com/superseriousbusiness/activity/pub"  	"github.com/superseriousbusiness/activity/streams" +	"github.com/superseriousbusiness/gotosocial/internal/gtserror"  	"github.com/superseriousbusiness/gotosocial/internal/text"  )  /* -	NORMALIZE INCOMING +	INCOMING NORMALIZATION  	The below functions should be called to normalize the content  	of messages *COMING INTO* GoToSocial via the federation API,  	either as the result of delivery from a remote instance to this @@ -84,39 +85,84 @@ func NormalizeIncomingActivity(activity pub.Activity, rawJSON map[string]interfa  	}  } -// NormalizeIncomingContent replaces the Content of the given item -// with the sanitized version of the raw 'content' value from the -// raw json object map. +// normalizeContent normalizes the given content +// string by sanitizing its HTML and minimizing it.  // -// noop if there was no content in the json object map or the -// content was not a plain string. -func NormalizeIncomingContent(item WithContent, rawJSON map[string]interface{}) { -	rawContent, ok := rawJSON["content"] -	if !ok { -		// No content in rawJSON. -		// TODO: In future we might also -		// look for "contentMap" property. -		return +// Noop for non-string content. +func normalizeContent(rawContent interface{}) string { +	if rawContent == nil { +		// Nothing to fix. +		return ""  	}  	content, ok := rawContent.(string)  	if !ok { -		// Not interested in content arrays. -		return +		// Not interested in +		// content slices etc. +		return "" +	} + +	if content == "" { +		// Nothing to fix. +		return ""  	} -	// Content should be HTML encoded by default: +	// Content entries should be HTML encoded by default:  	// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-content  	//  	// TODO: sanitize differently based on mediaType.  	// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-mediatype  	content = text.SanitizeToHTML(content)  	content = text.MinifyHTML(content) +	return content +} -	// Set normalized content property from the raw string; -	// this replaces any existing content property on the item. +// NormalizeIncomingContent replaces the Content property of the given +// item with the normalized versions of the raw 'content' and 'contentMap' +// values from the raw json object map. +// +// noop if there was no 'content' or 'contentMap' in the json object map. +func NormalizeIncomingContent(item WithContent, rawJSON map[string]interface{}) { +	var ( +		rawContent    = rawJSON["content"] +		rawContentMap = rawJSON["contentMap"] +	) + +	if rawContent == nil && +		rawContentMap == nil { +		// Nothing to normalize, +		// leave no content on item. +		return +	} + +	// Create wrapper for normalized content.  	contentProp := streams.NewActivityStreamsContentProperty() -	contentProp.AppendXMLSchemaString(content) + +	// Fix 'content' if applicable. +	content := normalizeContent(rawContent) +	if content != "" { +		contentProp.AppendXMLSchemaString(content) +	} + +	// Fix 'contentMap' if applicable. +	contentMap, ok := rawContentMap.(map[string]interface{}) +	if ok { +		rdfLangs := make(map[string]string, len(contentMap)) + +		for lang, rawContent := range contentMap { +			content := normalizeContent(rawContent) +			if content != "" { +				rdfLangs[lang] = content +			} +		} + +		if len(rdfLangs) != 0 { +			contentProp.AppendRDFLangString(rdfLangs) +		} +	} + +	// Replace any existing content property +	// on the item with normalized version.  	item.SetActivityStreamsContent(contentProp)  } @@ -299,3 +345,204 @@ func NormalizeIncomingPollOptions(item WithOneOf, rawJSON map[string]interface{}  		NormalizeIncomingName(choiceable, rawChoice)  	}  } + +/* +	OUTGOING NORMALIZATION +	The below functions should be called to normalize the content +	of messages *GOING OUT OF* GoToSocial via the federation API, +	either as the result of delivery to a remote instance from this +	instance, or as a result of a remote instance doing an http call +	to us to dereference something. +*/ + +// NormalizeOutgoingAttachmentProp replaces single-entry Attachment objects with +// single-entry arrays, for better compatibility with other AP implementations. +// +// Ie: +// +//	"attachment": { +//	  ... +//	} +// +// becomes: +// +//	"attachment": [ +//	  { +//	    ... +//	  } +//	] +// +// Noop for items with no attachments, or with attachments that are already a slice. +func NormalizeOutgoingAttachmentProp(item WithAttachment, rawJSON map[string]interface{}) { +	attachment, ok := rawJSON["attachment"] +	if !ok { +		// No 'attachment', +		// nothing to change. +		return +	} + +	if _, ok := attachment.([]interface{}); ok { +		// Already slice, +		// nothing to change. +		return +	} + +	// Coerce single-object to slice. +	rawJSON["attachment"] = []interface{}{attachment} +} + +// NormalizeOutgoingContentProp normalizes go-fed's funky formatting of content and +// contentMap properties to a format better understood by other AP implementations. +// +// Ie., incoming "content" property like this: +// +//	"content": [ +//	  "hello world!", +//	  { +//	    "en": "hello world!" +//	  } +//	] +// +// Is unpacked to: +// +//	"content": "hello world!", +//	"contentMap": { +//	  "en": "hello world!" +//	} +// +// Noop if neither content nor contentMap are set. +func NormalizeOutgoingContentProp(item WithContent, rawJSON map[string]interface{}) { +	contentProp := item.GetActivityStreamsContent() +	if contentProp == nil { +		// Nothing to do, +		// bail early. +		return +	} + +	contentPropLen := contentProp.Len() +	if contentPropLen == 0 { +		// Nothing to do, +		// bail early. +		return +	} + +	var ( +		content    string +		contentMap map[string]string +	) + +	for iter := contentProp.Begin(); iter != contentProp.End(); iter = iter.Next() { +		switch { +		case iter.IsRDFLangString() && +			contentMap == nil: +			contentMap = iter.GetRDFLangString() + +		case content == "" && +			iter.IsXMLSchemaString(): +			content = iter.GetXMLSchemaString() +		} +	} + +	if content != "" { +		rawJSON["content"] = content +	} else { +		delete(rawJSON, "content") +	} + +	if contentMap != nil { +		rawJSON["contentMap"] = contentMap +	} else { +		delete(rawJSON, "contentMap") +	} +} + +// NormalizeOutgoingObjectProp normalizes each Object entry in the rawJSON of the given +// item by calling custom serialization / normalization functions on them in turn. +// +// This function also unnests single-entry arrays, so that: +// +//	"object": [ +//	  { +//	    ... +//	  } +//	] +// +// Becomes: +// +//	"object": { +//	  ... +//	} +// +// Noop for each Object entry that isn't an Accountable or Statusable. +func NormalizeOutgoingObjectProp(item WithObject, rawJSON map[string]interface{}) error { +	objectProp := item.GetActivityStreamsObject() +	if objectProp == nil { +		// Nothing to do, +		// bail early. +		return nil +	} + +	objectPropLen := objectProp.Len() +	if objectPropLen == 0 { +		// Nothing to do, +		// bail early. +		return nil +	} + +	// The thing we already serialized has objects +	// on it, so we should see if we need to custom +	// serialize any of those objects, and replace +	// them on the data map as necessary. +	objects := make([]interface{}, 0, objectPropLen) +	for iter := objectProp.Begin(); iter != objectProp.End(); iter = iter.Next() { +		if iter.IsIRI() { +			// Plain IRIs don't need custom serialization. +			objects = append(objects, iter.GetIRI().String()) +			continue +		} + +		var ( +			objectType = iter.GetType() +			objectSer  map[string]interface{} +		) + +		if objectType == nil { +			// This is awkward. +			return gtserror.Newf("could not resolve object iter %T to vocab.Type", iter) +		} + +		var err error + +		// In the below accountable and statusable serialization, +		// `@context` will be included in the wrapping type already, +		// so we shouldn't also include it in the object itself. +		switch tn := objectType.GetTypeName(); { +		case IsAccountable(tn): +			objectSer, err = serializeAccountable(objectType, false) + +		case IsStatusable(tn): +			// IsStatusable includes Pollable as well. +			objectSer, err = serializeStatusable(objectType, false) + +		default: +			// No custom serializer for this type; serialize as normal. +			objectSer, err = objectType.Serialize() +		} + +		if err != nil { +			return err +		} + +		objects = append(objects, objectSer) +	} + +	if objectPropLen == 1 { +		// Unnest single object. +		rawJSON["object"] = objects[0] +	} else { +		// Array of objects. +		rawJSON["object"] = objects +	} + +	return nil +} diff --git a/internal/ap/normalize_test.go b/internal/ap/normalize_test.go index cd1affe60..33b1f6ea6 100644 --- a/internal/ap/normalize_test.go +++ b/internal/ap/normalize_test.go @@ -46,6 +46,9 @@ func (suite *NormalizeTestSuite) getStatusable() (vocab.ActivityStreamsNote, map  		  "https://example.org/users/someone/followers"  		],  		"content": "UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class=\"hashtag\" data-tag=\"twittermigration\" href=\"https://example.org/tag/twittermigration\" rel=\"tag ugc\">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.", +		"contentMap": { +			"en": "UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class=\"hashtag\" data-tag=\"twittermigration\" href=\"https://example.org/tag/twittermigration\" rel=\"tag ugc\">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues." +		},  		"context": "https://example.org/contexts/01GX0MSHPER1E0FT022Q209EJZ",  		"conversation": "https://example.org/contexts/01GX0MSHPER1E0FT022Q209EJZ",  		"id": "https://example.org/objects/01GX0MT2PA58JNSMK11MCS65YD", @@ -182,7 +185,15 @@ func (suite *NormalizeTestSuite) getAccountable() (vocab.ActivityStreamsPerson,  func (suite *NormalizeTestSuite) TestNormalizeActivityObject() {  	note, rawNote := suite.getStatusable() -	suite.Equal(`update: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" data-tag="twittermigration" href="https://example.org/tag/twittermigration" rel="tag ugc">#TwitterMigration%3C/a%3E.%3Cbr%3E%3Cbr%3EIn%20fact,%20100,000%20new%20accounts%20have%20been%20created%20since%20last%20night.%3Cbr%3E%3Cbr%3ESince%20last%20night&%2339;s%20spike%208,000-12,000%20new%20accounts%20are%20being%20created%20every%20hour.%3Cbr%3E%3Cbr%3EYesterday,%20I%20estimated%20that%20Mastodon%20would%20have%208%20million%20users%20by%20the%20end%20of%20the%20week.%20That%20might%20happen%20a%20lot%20sooner%20if%20this%20trend%20continues.`, ap.ExtractContent(note)) +	content := ap.ExtractContent(note) +	suite.Equal( +		`update: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" data-tag="twittermigration" href="https://example.org/tag/twittermigration" rel="tag ugc">#TwitterMigration%3C/a%3E.%3Cbr%3E%3Cbr%3EIn%20fact,%20100,000%20new%20accounts%20have%20been%20created%20since%20last%20night.%3Cbr%3E%3Cbr%3ESince%20last%20night&%2339;s%20spike%208,000-12,000%20new%20accounts%20are%20being%20created%20every%20hour.%3Cbr%3E%3Cbr%3EYesterday,%20I%20estimated%20that%20Mastodon%20would%20have%208%20million%20users%20by%20the%20end%20of%20the%20week.%20That%20might%20happen%20a%20lot%20sooner%20if%20this%20trend%20continues.`, +		content.Content, +	) + +	// Malformed contentMap entry +	// will not be extractable yet. +	suite.Empty(content.ContentMap["en"])  	create := testrig.WrapAPNoteInCreate(  		testrig.URLMustParse("https://example.org/create_something"), @@ -192,7 +203,18 @@ func (suite *NormalizeTestSuite) TestNormalizeActivityObject() {  	)  	ap.NormalizeIncomingActivity(create, map[string]interface{}{"object": rawNote}) -	suite.Equal(`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`, ap.ExtractContent(note)) +	content = ap.ExtractContent(note) + +	suite.Equal( +		`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`, +		content.Content, +	) + +	// Content map entry should now be extractable. +	suite.Equal( +		`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`, +		content.ContentMap["en"], +	)  }  func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment() { @@ -202,12 +224,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment  	// the attachment(s) should be all jacked up.  	suite.Equal(`{    "@context": "https://www.w3.org/ns/activitystreams", -  "attachment": { -    "mediaType": "image/jpeg", -    "name": "description: here's \u003c\u003ca\u003e\u003e picture of a #cat,%20it%27s%20cute!%20here%27s%20some%20special%20characters:%20%22%22%20%5C%20weeee%27%27%27%27", -    "type": "Document", -    "url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg" -  }, +  "attachment": [ +    { +      "mediaType": "image/jpeg", +      "name": "description: here's \u003c\u003ca\u003e\u003e picture of a #cat,%20it%27s%20cute!%20here%27s%20some%20special%20characters:%20%22%22%20%5C%20weeee%27%27%27%27", +      "type": "Document", +      "url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg" +    } +  ],    "attributedTo": "https://example.org/users/hourlycatbot",    "id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",    "to": "https://www.w3.org/ns/activitystreams#Public", @@ -222,12 +246,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment  	// attachment should no longer be all jacked up.  	suite.Equal(`{    "@context": "https://www.w3.org/ns/activitystreams", -  "attachment": { -    "mediaType": "image/jpeg", -    "name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''", -    "type": "Document", -    "url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg" -  }, +  "attachment": [ +    { +      "mediaType": "image/jpeg", +      "name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''", +      "type": "Document", +      "url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg" +    } +  ],    "attributedTo": "https://example.org/users/hourlycatbot",    "id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",    "to": "https://www.w3.org/ns/activitystreams#Public", @@ -243,12 +269,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment  	// the attachment(s) should be all jacked up.  	suite.Equal(`{    "@context": "https://www.w3.org/ns/activitystreams", -  "attachment": { -    "mediaType": "image/jpeg", -    "name": "description: here's \u003c\u003ca\u003e\u003e picture of a #cat,%20it%27s%20cute!%20here%27s%20some%20special%20characters:%20%22%22%20%5C%20weeee%27%27%27%27", -    "type": "Document", -    "url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg" -  }, +  "attachment": [ +    { +      "mediaType": "image/jpeg", +      "name": "description: here's \u003c\u003ca\u003e\u003e picture of a #cat,%20it%27s%20cute!%20here%27s%20some%20special%20characters:%20%22%22%20%5C%20weeee%27%27%27%27", +      "type": "Document", +      "url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg" +    } +  ],    "attributedTo": "https://example.org/users/hourlycatbot",    "id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",    "to": "https://www.w3.org/ns/activitystreams#Public", @@ -263,12 +291,14 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment  	// attachment should no longer be all jacked up.  	suite.Equal(`{    "@context": "https://www.w3.org/ns/activitystreams", -  "attachment": { -    "mediaType": "image/jpeg", -    "name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''", -    "type": "Document", -    "url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg" -  }, +  "attachment": [ +    { +      "mediaType": "image/jpeg", +      "name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''", +      "type": "Document", +      "url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg" +    } +  ],    "attributedTo": "https://example.org/users/hourlycatbot",    "id": "https://example.org/users/hourlycatbot/statuses/01GYW48H311PZ78C5G856MGJJJ",    "to": "https://www.w3.org/ns/activitystreams#Public", diff --git a/internal/ap/serialize.go b/internal/ap/serialize.go index 368d7f9a2..944e67407 100644 --- a/internal/ap/serialize.go +++ b/internal/ap/serialize.go @@ -18,10 +18,9 @@  package ap  import ( -	"fmt" -  	"github.com/superseriousbusiness/activity/streams"  	"github.com/superseriousbusiness/activity/streams/vocab" +	"github.com/superseriousbusiness/gotosocial/internal/gtserror"  )  // Serialize is a custom serializer for ActivityStreams types. @@ -35,17 +34,20 @@ import (  //  // Currently, the following things will be custom serialized:  // -//   - OrderedCollection: 'orderedItems' property will always be made into an array. -//   - Any Accountable type: 'attachment' property will always be made into an array. -//   - Update: any Accountable 'object's set on an update will be custom serialized as above. +//   - OrderedCollection:       'orderedItems' property will always be made into an array. +//   - Any Accountable type:    'attachment' property will always be made into an array. +//   - Any Statusable type:     'attachment' property will always be made into an array; 'content' and 'contentMap' will be normalized. +//   - Any Activityable type:   any 'object's set on an activity will be custom serialized as above.  func Serialize(t vocab.Type) (m map[string]interface{}, e error) { -	switch t.GetTypeName() { -	case ObjectOrderedCollection: +	switch tn := t.GetTypeName(); { +	case tn == ObjectOrderedCollection:  		return serializeOrderedCollection(t) -	case ActorApplication, ActorGroup, ActorOrganization, ActorPerson, ActorService: +	case IsAccountable(tn):  		return serializeAccountable(t, true) -	case ActivityUpdate: -		return serializeWithObject(t) +	case IsStatusable(tn): +		return serializeStatusable(t, true) +	case IsActivityable(tn): +		return serializeActivityable(t, true)  	default:  		// No custom serializer necessary.  		return streams.Serialize(t) @@ -61,8 +63,8 @@ func Serialize(t vocab.Type) (m map[string]interface{}, e error) {  // See:  //   - https://github.com/go-fed/activity/issues/139  //   - https://github.com/mastodon/mastodon/issues/24225 -func serializeOrderedCollection(orderedCollection vocab.Type) (map[string]interface{}, error) { -	data, err := streams.Serialize(orderedCollection) +func serializeOrderedCollection(t vocab.Type) (map[string]interface{}, error) { +	data, err := streams.Serialize(t)  	if err != nil {  		return nil, err  	} @@ -99,7 +101,12 @@ func serializeOrderedCollection(orderedCollection vocab.Type) (map[string]interf  // If the accountable is being serialized as part of another object (eg., as the  // object of an activity), then includeContext should be set to false, as the  // @context entry should be included on the top-level/wrapping activity/object. -func serializeAccountable(accountable vocab.Type, includeContext bool) (map[string]interface{}, error) { +func serializeAccountable(t vocab.Type, includeContext bool) (map[string]interface{}, error) { +	accountable, ok := t.(Accountable) +	if !ok { +		return nil, gtserror.Newf("vocab.Type %T not accountable", t) +	} +  	var (  		data map[string]interface{}  		err  error @@ -115,91 +122,61 @@ func serializeAccountable(accountable vocab.Type, includeContext bool) (map[stri  		return nil, err  	} -	attachment, ok := data["attachment"] -	if !ok { -		// No 'attachment', nothing to change. -		return data, nil -	} - -	if _, ok := attachment.([]interface{}); ok { -		// Already slice. -		return data, nil -	} - -	// Coerce single-object to slice. -	data["attachment"] = []interface{}{attachment} +	NormalizeOutgoingAttachmentProp(accountable, data)  	return data, nil  } -func serializeWithObject(t vocab.Type) (map[string]interface{}, error) { -	withObject, ok := t.(WithObject) +func serializeStatusable(t vocab.Type, includeContext bool) (map[string]interface{}, error) { +	statusable, ok := t.(Statusable)  	if !ok { -		return nil, fmt.Errorf("serializeWithObject: could not resolve %T to WithObject", t) +		return nil, gtserror.Newf("vocab.Type %T not statusable", t) +	} + +	var ( +		data map[string]interface{} +		err  error +	) + +	if includeContext { +		data, err = streams.Serialize(statusable) +	} else { +		data, err = statusable.Serialize()  	} -	data, err := streams.Serialize(t)  	if err != nil {  		return nil, err  	} -	object := withObject.GetActivityStreamsObject() -	if object == nil { -		// Nothing to do, bail early. -		return data, nil +	NormalizeOutgoingAttachmentProp(statusable, data) +	NormalizeOutgoingContentProp(statusable, data) + +	return data, nil +} + +func serializeActivityable(t vocab.Type, includeContext bool) (map[string]interface{}, error) { +	activityable, ok := t.(Activityable) +	if !ok { +		return nil, gtserror.Newf("vocab.Type %T not activityable", t)  	} -	objectLen := object.Len() -	if objectLen == 0 { -		// Nothing to do, bail early. -		return data, nil +	var ( +		data map[string]interface{} +		err  error +	) + +	if includeContext { +		data, err = streams.Serialize(activityable) +	} else { +		data, err = activityable.Serialize()  	} -	// The thing we already serialized has objects -	// on it, so we should see if we need to custom -	// serialize any of those objects, and replace -	// them on the data map as necessary. -	objects := make([]interface{}, 0, objectLen) -	for iter := object.Begin(); iter != object.End(); iter = iter.Next() { -		if iter.IsIRI() { -			// Plain IRIs don't need custom serialization. -			objects = append(objects, iter.GetIRI().String()) -			continue -		} - -		var ( -			objectType = iter.GetType() -			objectSer  map[string]interface{} -		) - -		if objectType == nil { -			// This is awkward. -			return nil, fmt.Errorf("serializeWithObject: could not resolve object iter %T to vocab.Type", iter) -		} - -		switch objectType.GetTypeName() { -		case ActorApplication, ActorGroup, ActorOrganization, ActorPerson, ActorService: -			// @context will be included in wrapping type already, -			// we don't need to include it in the object itself. -			objectSer, err = serializeAccountable(objectType, false) -		default: -			// No custom serializer for this type; serialize as normal. -			objectSer, err = objectType.Serialize() -		} - -		if err != nil { -			return nil, err -		} - -		objects = append(objects, objectSer) +	if err != nil { +		return nil, err  	} -	if objectLen == 1 { -		// Unnest single object. -		data["object"] = objects[0] -	} else { -		// Array of objects. -		data["object"] = objects +	if err := NormalizeOutgoingObjectProp(activityable, data); err != nil { +		return nil, err  	}  	return data, nil diff --git a/internal/gtsmodel/status.go b/internal/gtsmodel/status.go index a009a726d..9b93e34a1 100644 --- a/internal/gtsmodel/status.go +++ b/internal/gtsmodel/status.go @@ -237,3 +237,14 @@ const (  	// VisibilityDefault is used when no other setting can be found.  	VisibilityDefault Visibility = VisibilityUnlocked  ) + +// Content models the simple string content +// of a status along with its ContentMap, +// which contains content entries keyed by +// BCP47 language tag. +// +// Content and/or ContentMap may be zero/nil. +type Content struct { +	Content    string +	ContentMap map[string]string +} diff --git a/internal/typeutils/astointernal.go b/internal/typeutils/astointernal.go index 707f51629..c7908ad24 100644 --- a/internal/typeutils/astointernal.go +++ b/internal/typeutils/astointernal.go @@ -244,9 +244,15 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab  	}  	// status.Content +	// status.Language  	// -	// The (html-formatted) content of this status. -	status.Content = ap.ExtractContent(statusable) +	// Many implementations set both content +	// and contentMap; we can use these to +	// infer the language of the status. +	status.Content, status.Language = ContentToContentLanguage( +		ctx, +		ap.ExtractContent(statusable), +	)  	// status.Attachments  	// @@ -396,9 +402,6 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab  		return &s  	}() -	// language -	// TODO: we might be able to extract this from the contentMap field -  	// ActivityStreamsType  	status.ActivityStreamsType = statusable.GetTypeName() @@ -707,7 +710,7 @@ func (c *Converter) ASFlagToReport(ctx context.Context, flaggable ap.Flaggable)  	// For Mastodon, this will just be a string, or nothing.  	// In Misskey's case, it may also contain the URLs of  	// one or more reported statuses, so extract these too. -	content := ap.ExtractContent(flaggable) +	content := ap.ExtractContent(flaggable).Content  	statusURIs := []*url.URL{}  	inlineURLs := misskeyReportInlineURLs(content)  	statusURIs = append(statusURIs, inlineURLs...) diff --git a/internal/typeutils/astointernal_test.go b/internal/typeutils/astointernal_test.go index 10ea422fa..851d57efc 100644 --- a/internal/typeutils/astointernal_test.go +++ b/internal/typeutils/astointernal_test.go @@ -45,6 +45,10 @@ func (suite *ASToInternalTestSuite) jsonToType(in string) vocab.Type {  		suite.FailNow(err.Error())  	} +	if statusable, ok := t.(ap.Statusable); ok { +		ap.NormalizeIncomingContent(statusable, m) +	} +  	return t  } @@ -103,7 +107,8 @@ func (suite *ASToInternalTestSuite) TestParsePublicStatus() {  	suite.NoError(err)  	suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning) -	suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous.  It has a normative definition, a tacit definition, and an ideal definition.  One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content) +	suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content) +	suite.Equal("en", status.Language)  }  func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() { @@ -117,7 +122,7 @@ func (suite *ASToInternalTestSuite) TestParsePublicStatusNoURL() {  	suite.NoError(err)  	suite.Equal("reading: Punishment and Reward in the Corporate University", status.ContentWarning) -	suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous.  It has a normative definition, a tacit definition, and an ideal definition.  One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content) +	suite.Equal(`<p>> So we have to examine critical thinking as a signifier, dynamic and ambiguous. It has a normative definition, a tacit definition, and an ideal definition. One of the hallmarks of graduate training is learning to comprehend those definitions and applying the correct one as needed for professional success.</p>`, status.Content)  	// on statuses with no URL in them (like ones we get from pleroma sometimes) we should use the AP URI of the status as URL  	suite.Equal("http://fossbros-anonymous.io/users/foss_satan/statuses/108138763199405167", status.URL) diff --git a/internal/typeutils/internaltoas.go b/internal/typeutils/internaltoas.go index 16467be40..ff502296b 100644 --- a/internal/typeutils/internaltoas.go +++ b/internal/typeutils/internaltoas.go @@ -607,9 +607,17 @@ func (c *Converter) StatusToAS(ctx context.Context, s *gtsmodel.Status) (ap.Stat  	// conversation  	// TODO -	// content -- the actual post itself +	// content -- the actual post +	// itself, plus the language  	contentProp := streams.NewActivityStreamsContentProperty()  	contentProp.AppendXMLSchemaString(s.Content) + +	if s.Language != "" { +		contentProp.AppendRDFLangString(map[string]string{ +			s.Language: s.Content, +		}) +	} +  	status.SetActivityStreamsContent(contentProp)  	// attachments diff --git a/internal/typeutils/internaltoas_test.go b/internal/typeutils/internaltoas_test.go index 01dde66fb..878040dcc 100644 --- a/internal/typeutils/internaltoas_test.go +++ b/internal/typeutils/internaltoas_test.go @@ -340,6 +340,9 @@ func (suite *InternalToASTestSuite) TestStatusToAS() {    "attributedTo": "http://localhost:8080/users/the_mighty_zork",    "cc": "http://localhost:8080/users/the_mighty_zork/followers",    "content": "hello everyone!", +  "contentMap": { +    "en": "hello everyone!" +  },    "id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",    "published": "2021-10-20T12:40:37+02:00",    "replies": { @@ -379,16 +382,21 @@ func (suite *InternalToASTestSuite) TestStatusWithTagsToASWithIDs() {  	// http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams --  	// will appear, so trim them out of the string for consistency  	trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1] -	suite.Equal(` { -    "blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj", -    "mediaType": "image/jpeg", -    "name": "Black and white image of some 50's style text saying: Welcome On Board", -    "type": "Document", -    "url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg" -  }, +	suite.Equal(` [ +    { +      "blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj", +      "mediaType": "image/jpeg", +      "name": "Black and white image of some 50's style text saying: Welcome On Board", +      "type": "Document", +      "url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg" +    } +  ],    "attributedTo": "http://localhost:8080/users/admin",    "cc": "http://localhost:8080/users/admin/followers",    "content": "hello world! #welcome ! first post on the instance :rainbow: !", +  "contentMap": { +    "en": "hello world! #welcome ! first post on the instance :rainbow: !" +  },    "id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R",    "published": "2021-10-20T11:36:45Z",    "replies": { @@ -446,16 +454,21 @@ func (suite *InternalToASTestSuite) TestStatusWithTagsToASFromDB() {  	// http://joinmastodon.org/ns, https://www.w3.org/ns/activitystreams --  	// will appear, so trim them out of the string for consistency  	trimmed := strings.SplitAfter(string(bytes), `"attachment":`)[1] -	suite.Equal(` { -    "blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj", -    "mediaType": "image/jpeg", -    "name": "Black and white image of some 50's style text saying: Welcome On Board", -    "type": "Document", -    "url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg" -  }, +	suite.Equal(` [ +    { +      "blurhash": "LNJRdVM{00Rj%Mayt7j[4nWBofRj", +      "mediaType": "image/jpeg", +      "name": "Black and white image of some 50's style text saying: Welcome On Board", +      "type": "Document", +      "url": "http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg" +    } +  ],    "attributedTo": "http://localhost:8080/users/admin",    "cc": "http://localhost:8080/users/admin/followers",    "content": "hello world! #welcome ! first post on the instance :rainbow: !", +  "contentMap": { +    "en": "hello world! #welcome ! first post on the instance :rainbow: !" +  },    "id": "http://localhost:8080/users/admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R",    "published": "2021-10-20T11:36:45Z",    "replies": { @@ -519,6 +532,9 @@ func (suite *InternalToASTestSuite) TestStatusToASWithMentions() {      "http://localhost:8080/users/the_mighty_zork"    ],    "content": "hi @the_mighty_zork welcome to the instance!", +  "contentMap": { +    "en": "hi @the_mighty_zork welcome to the instance!" +  },    "id": "http://localhost:8080/users/admin/statuses/01FF25D5Q0DH7CHD57CTRS6WK0",    "inReplyTo": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",    "published": "2021-11-20T13:32:16Z", diff --git a/internal/typeutils/util.go b/internal/typeutils/util.go index a19588221..8a8d4123b 100644 --- a/internal/typeutils/util.go +++ b/internal/typeutils/util.go @@ -31,6 +31,8 @@ import (  	apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"  	"github.com/superseriousbusiness/gotosocial/internal/config"  	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/language" +	"github.com/superseriousbusiness/gotosocial/internal/log"  	"github.com/superseriousbusiness/gotosocial/internal/regexes"  	"github.com/superseriousbusiness/gotosocial/internal/text"  ) @@ -184,3 +186,102 @@ func placeholdUnknownAttachments(arr []apimodel.Attachment) (string, []apimodel.  	return text.SanitizeToHTML(aside.String()), arr  } + +// ContentToContentLanguage tries to +// extract a content string and language +// tag string from the given intermediary +// content. +// +// Either/both of the returned strings may +// be empty, depending on how things go. +func ContentToContentLanguage( +	ctx context.Context, +	content gtsmodel.Content, +) ( +	string, // content +	string, // language +) { +	var ( +		contentStr string +		langTagStr string +	) + +	switch contentMap := content.ContentMap; { +	// Simplest case: no `contentMap`. +	// Return `content`, even if empty. +	case contentMap == nil: +		return content.Content, "" + +	// `content` and `contentMap` set. +	// Try to infer "primary" language. +	case content.Content != "": +		// Assume `content` is intended +		// primary content, and look for +		// corresponding language tag. +		contentStr = content.Content + +		for t, c := range contentMap { +			if contentStr == c { +				langTagStr = t +				break +			} +		} + +	// `content` not set; `contentMap` +	// is set with only one value. +	// This must be the "primary" lang. +	case len(contentMap) == 1: +		// Use an empty loop to +		// get the values we want. +		// nolint:revive +		for langTagStr, contentStr = range contentMap { +		} + +	// Only `contentMap` is set, with more +	// than one value. Map order is not +	// guaranteed so we can't know the +	// "primary" language. +	// +	// Try to select content using our +	// instance's configured languages. +	// +	// In case of no hits, just take the +	// first tag and content in the map. +	default: +		instanceLangs := config.GetInstanceLanguages() +		for _, langTagStr = range instanceLangs.TagStrs() { +			if contentStr = contentMap[langTagStr]; contentStr != "" { +				// Hit! +				break +			} +		} + +		// If nothing found, just take +		// the first entry we can get by +		// breaking after the first iter. +		if contentStr == "" { +			for langTagStr, contentStr = range contentMap { +				break +			} +		} +	} + +	if langTagStr != "" { +		// Found a lang tag for this content, +		// make sure it's valid / parseable. +		lang, err := language.Parse(langTagStr) +		if err != nil { +			log.Warnf( +				ctx, +				"could not parse %s as BCP47 language tag in status contentMap: %v", +				langTagStr, err, +			) +		} else { +			// Inferred the language! +			// Use normalized version. +			langTagStr = lang.TagStr +		} +	} + +	return contentStr, langTagStr +} diff --git a/internal/typeutils/util_test.go b/internal/typeutils/util_test.go index e6610574b..0f852d399 100644 --- a/internal/typeutils/util_test.go +++ b/internal/typeutils/util_test.go @@ -18,7 +18,12 @@  package typeutils  import ( +	"context"  	"testing" + +	"github.com/superseriousbusiness/gotosocial/internal/config" +	"github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +	"github.com/superseriousbusiness/gotosocial/internal/language"  )  func TestMisskeyReportContentURLs1(t *testing.T) { @@ -44,3 +49,112 @@ misskey-formatted`  		t.Fatalf("wanted 0 urls, got %d", l)  	}  } + +func TestContentToContentLanguage(t *testing.T) { +	type testcase struct { +		content           gtsmodel.Content +		instanceLanguages language.Languages +		expectedContent   string +		expectedLang      string +	} + +	ctx, cncl := context.WithCancel(context.Background()) +	defer cncl() + +	for i, testcase := range []testcase{ +		{ +			content: gtsmodel.Content{ +				Content:    "hello world", +				ContentMap: nil, +			}, +			expectedContent: "hello world", +			expectedLang:    "", +		}, +		{ +			content: gtsmodel.Content{ +				Content: "", +				ContentMap: map[string]string{ +					"en": "hello world", +				}, +			}, +			expectedContent: "hello world", +			expectedLang:    "en", +		}, +		{ +			content: gtsmodel.Content{ +				Content: "bonjour le monde", +				ContentMap: map[string]string{ +					"en": "hello world", +					"fr": "bonjour le monde", +				}, +			}, +			expectedContent: "bonjour le monde", +			expectedLang:    "fr", +		}, +		{ +			content: gtsmodel.Content{ +				Content: "bonjour le monde", +				ContentMap: map[string]string{ +					"en": "hello world", +				}, +			}, +			expectedContent: "bonjour le monde", +			expectedLang:    "", +		}, +		{ +			content: gtsmodel.Content{ +				Content: "", +				ContentMap: map[string]string{ +					"en": "hello world", +					"ru": "Привет, мир!", +					"nl": "hallo wereld!", +					"ca": "Hola món!", +				}, +			}, +			instanceLanguages: language.Languages{ +				{TagStr: "en"}, +				{TagStr: "ca"}, +			}, +			expectedContent: "hello world", +			expectedLang:    "en", +		}, +		{ +			content: gtsmodel.Content{ +				Content: "", +				ContentMap: map[string]string{ +					"en": "hello world", +					"ru": "Привет, мир!", +					"nl": "hallo wereld!", +					"ca": "Hola món!", +				}, +			}, +			instanceLanguages: language.Languages{ +				{TagStr: "ca"}, +				{TagStr: "en"}, +			}, +			expectedContent: "Hola món!", +			expectedLang:    "ca", +		}, +	} { +		langs, err := language.InitLangs(testcase.instanceLanguages.TagStrs()) +		if err != nil { +			t.Fatal(err) +		} +		config.SetInstanceLanguages(langs) + +		content, language := ContentToContentLanguage(ctx, testcase.content) +		if content != testcase.expectedContent { +			t.Errorf( +				"test %d expected content '%s' got '%s'", +				i, testcase.expectedContent, content, +			) +		} + +		if language != testcase.expectedLang { +			t.Errorf( +				"test %d expected language '%s' got '%s'", +				i, testcase.expectedLang, language, +			) +		} +	} +} diff --git a/internal/typeutils/wrap_test.go b/internal/typeutils/wrap_test.go index 9d6d95983..453073ed6 100644 --- a/internal/typeutils/wrap_test.go +++ b/internal/typeutils/wrap_test.go @@ -85,6 +85,9 @@ func (suite *WrapTestSuite) TestWrapNoteInCreate() {      "attributedTo": "http://localhost:8080/users/the_mighty_zork",      "cc": "http://localhost:8080/users/the_mighty_zork/followers",      "content": "hello everyone!", +    "contentMap": { +      "en": "hello everyone!" +    },      "id": "http://localhost:8080/users/the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY",      "published": "2021-10-20T12:40:37+02:00",      "replies": {  | 
