summaryrefslogtreecommitdiff
path: root/internal/ap/normalize.go
blob: a27527b843b431f69d7b2be146ce3d5dfd798a2c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.

package ap

import (
	"github.com/superseriousbusiness/activity/pub"
	"github.com/superseriousbusiness/activity/streams"
	"github.com/superseriousbusiness/gotosocial/internal/gtserror"
	"github.com/superseriousbusiness/gotosocial/internal/text"
)

/*
	INCOMING NORMALIZATION
	The below functions should be called to normalize the content
	of messages *COMING INTO* GoToSocial via the federation API,
	either as the result of delivery from a remote instance to this
	instance, or as a result of this instance doing an http call to
	another instance to dereference something.
*/

// NormalizeIncomingActivityObject normalizes the 'object'.'content' field of the given Activity.
//
// The rawActivity map should the freshly deserialized json representation of the Activity.
//
// This function is a noop if the type passed in is anything except a Create or Update with a Statusable or Accountable as its Object.
func NormalizeIncomingActivity(activity pub.Activity, rawJSON map[string]interface{}) {
	// From the activity extract the data vocab.Type + its "raw" JSON.
	dataIfaces, rawData, ok := ExtractActivityData(activity, rawJSON)
	if !ok || len(dataIfaces) != len(rawData) {
		// non-equal lengths *shouldn't* happen,
		// but this is just an integrity check.
		return
	}

	// Iterate over the available data.
	for i, dataIface := range dataIfaces {
		// Try to get as vocab.Type, else
		// skip this entry for normalization.
		dataType := dataIface.GetType()
		if dataType == nil {
			continue
		}

		// Get the raw data map at index, else skip
		// this entry due to impossible normalization.
		rawData, ok := rawData[i].(map[string]any)
		if !ok {
			continue
		}

		if statusable, ok := ToStatusable(dataType); ok {
			if pollable, ok := ToPollable(dataType); ok {
				// Normalize the Pollable specific properties.
				NormalizeIncomingPollOptions(pollable, rawData)
			}

			// Normalize everything we can on the statusable.
			NormalizeIncomingContent(statusable, rawData)
			NormalizeIncomingAttachments(statusable, rawData)
			NormalizeIncomingSummary(statusable, rawData)
			NormalizeIncomingName(statusable, rawData)
			continue
		}

		if accountable, ok := ToAccountable(dataType); ok {
			// Normalize everything we can on the accountable.
			NormalizeIncomingSummary(accountable, rawData)
			continue
		}
	}
}

// normalizeContent normalizes the given content
// string by sanitizing its HTML and minimizing it.
//
// Noop for non-string content.
func normalizeContent(rawContent interface{}) string {
	if rawContent == nil {
		// Nothing to fix.
		return ""
	}

	content, ok := rawContent.(string)
	if !ok {
		// Not interested in
		// content slices etc.
		return ""
	}

	if content == "" {
		// Nothing to fix.
		return ""
	}

	// Content entries should be HTML encoded by default:
	// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-content
	//
	// TODO: sanitize differently based on mediaType.
	// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-mediatype
	content = text.SanitizeToHTML(content)
	content = text.MinifyHTML(content)
	return content
}

// NormalizeIncomingContent replaces the Content property of the given
// item with the normalized versions of the raw 'content' and 'contentMap'
// values from the raw json object map.
//
// noop if there was no 'content' or 'contentMap' in the json object map.
func NormalizeIncomingContent(item WithContent, rawJSON map[string]interface{}) {
	var (
		rawContent    = rawJSON["content"]
		rawContentMap = rawJSON["contentMap"]
	)

	if rawContent == nil &&
		rawContentMap == nil {
		// Nothing to normalize,
		// leave no content on item.
		return
	}

	// Create wrapper for normalized content.
	contentProp := streams.NewActivityStreamsContentProperty()

	// Fix 'content' if applicable.
	content := normalizeContent(rawContent)
	if content != "" {
		contentProp.AppendXMLSchemaString(content)
	}

	// Fix 'contentMap' if applicable.
	contentMap, ok := rawContentMap.(map[string]interface{})
	if ok {
		rdfLangs := make(map[string]string, len(contentMap))

		for lang, rawContent := range contentMap {
			content := normalizeContent(rawContent)
			if content != "" {
				rdfLangs[lang] = content
			}
		}

		if len(rdfLangs) != 0 {
			contentProp.AppendRDFLangString(rdfLangs)
		}
	}

	// Replace any existing content property
	// on the item with normalized version.
	item.SetActivityStreamsContent(contentProp)
}

// NormalizeIncomingAttachments normalizes all attachments (if any) of the given
// item, replacing the 'name' (aka content warning) field of each attachment
// with the raw 'name' value from the raw json object map, and doing sanitization
// on the result.
//
// noop if there are no attachments; noop if attachment is not a format
// we can understand.
func NormalizeIncomingAttachments(item WithAttachment, rawJSON map[string]interface{}) {
	rawAttachments, ok := rawJSON["attachment"]
	if !ok {
		// No attachments in rawJSON.
		return
	}

	// Convert to slice if not already,
	// so we can iterate through it.
	var attachments []interface{}
	if attachments, ok = rawAttachments.([]interface{}); !ok {
		attachments = []interface{}{rawAttachments}
	}

	attachmentProperty := item.GetActivityStreamsAttachment()
	if attachmentProperty == nil {
		// Nothing to do here.
		return
	}

	if l := attachmentProperty.Len(); l == 0 || l != len(attachments) {
		// Mismatch between item and
		// JSON, can't normalize.
		return
	}

	// Keep an index of where we are in the iter;
	// we need this so we can modify the correct
	// attachment, in case of multiples.
	i := -1

	for iter := attachmentProperty.Begin(); iter != attachmentProperty.End(); iter = iter.Next() {
		i++

		t := iter.GetType()
		if t == nil {
			continue
		}

		attachmentable, ok := t.(Attachmentable)
		if !ok {
			continue
		}

		rawAttachment, ok := attachments[i].(map[string]interface{})
		if !ok {
			continue
		}

		NormalizeIncomingName(attachmentable, rawAttachment)
	}
}

// NormalizeIncomingSummary replaces the Summary of the given item
// with the sanitized version of the raw 'summary' value from the
// raw json object map.
//
// noop if there was no summary in the json object map or the
// summary was not a plain string.
func NormalizeIncomingSummary(item WithSummary, rawJSON map[string]interface{}) {
	rawSummary, ok := rawJSON["summary"]
	if !ok {
		// No summary in rawJSON.
		return
	}

	summary, ok := rawSummary.(string)
	if !ok {
		// Not interested in non-string summary.
		return
	}

	// Summary should be HTML encoded:
	// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-summary
	summary = text.SanitizeToHTML(summary)
	summary = text.MinifyHTML(summary)

	// Set normalized summary property from the raw string; this
	// will replace any existing summary property on the item.
	summaryProp := streams.NewActivityStreamsSummaryProperty()
	summaryProp.AppendXMLSchemaString(summary)
	item.SetActivityStreamsSummary(summaryProp)
}

// NormalizeIncomingName replaces the Name of the given item
// with the raw 'name' value from the raw json object map.
//
// noop if there was no name in the json object map or the
// name was not a plain string.
func NormalizeIncomingName(item WithName, rawJSON map[string]interface{}) {
	rawName, ok := rawJSON["name"]
	if !ok {
		// No name in rawJSON.
		return
	}

	name, ok := rawName.(string)
	if !ok {
		// Not interested in non-string name.
		return
	}

	// Name *must not* include any HTML markup:
	// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-name
	//
	// todo: We probably want to update this to allow
	// *escaped* HTML markup, but for now just nuke it.
	name = text.SanitizeToPlaintext(name)

	// Set normalized name property from the raw string; this
	// will replace any existing name property on the item.
	nameProp := streams.NewActivityStreamsNameProperty()
	nameProp.AppendXMLSchemaString(name)
	item.SetActivityStreamsName(nameProp)
}

// NormalizeIncomingOneOf normalizes all oneOf (if any) of the given
// item, replacing the 'name' field of each oneOf with the raw 'name'
// value from the raw json object map, and doing sanitization
// on the result.
//
// noop if there are no oneOf; noop if oneOf is not expected format.
func NormalizeIncomingPollOptions(item WithOneOf, rawJSON map[string]interface{}) {
	var oneOf []interface{}

	// Get the raw one-of JSON data.
	rawOneOf, ok := rawJSON["oneOf"]
	if !ok {
		return
	}

	// Convert to slice if not already, so we can iterate.
	if oneOf, ok = rawOneOf.([]interface{}); !ok {
		oneOf = []interface{}{rawOneOf}
	}

	// Extract the one-of property from interface.
	oneOfProp := item.GetActivityStreamsOneOf()
	if oneOfProp == nil {
		return
	}

	// Check we have useable one-of JSON-vs-unmarshaled data.
	if l := oneOfProp.Len(); l == 0 || l != len(oneOf) {
		return
	}

	// Get start and end of iter.
	start := oneOfProp.Begin()
	end := oneOfProp.End()

	// Iterate a counter, from start through to end iter item.
	for i, iter := 0, start; iter != end; i, iter = i+1, iter.Next() {
		// Get item type.
		t := iter.GetType()

		// Check fulfills Choiceable type
		// (this accounts for nil input type).
		choiceable, ok := t.(PollOptionable)
		if !ok {
			continue
		}

		// Get the corresponding raw one-of data.
		rawChoice, ok := oneOf[i].(map[string]interface{})
		if !ok {
			continue
		}

		NormalizeIncomingName(choiceable, rawChoice)
	}
}

/*
	OUTGOING NORMALIZATION
	The below functions should be called to normalize the content
	of messages *GOING OUT OF* GoToSocial via the federation API,
	either as the result of delivery to a remote instance from this
	instance, or as a result of a remote instance doing an http call
	to us to dereference something.
*/

// NormalizeOutgoingAttachmentProp replaces single-entry Attachment objects with
// single-entry arrays, for better compatibility with other AP implementations.
//
// Ie:
//
//	"attachment": {
//	  ...
//	}
//
// becomes:
//
//	"attachment": [
//	  {
//	    ...
//	  }
//	]
//
// Noop for items with no attachments, or with attachments that are already a slice.
func NormalizeOutgoingAttachmentProp(item WithAttachment, rawJSON map[string]interface{}) {
	attachment, ok := rawJSON["attachment"]
	if !ok {
		// No 'attachment',
		// nothing to change.
		return
	}

	if _, ok := attachment.([]interface{}); ok {
		// Already slice,
		// nothing to change.
		return
	}

	// Coerce single-object to slice.
	rawJSON["attachment"] = []interface{}{attachment}
}

// NormalizeOutgoingContentProp normalizes go-fed's funky formatting of content and
// contentMap properties to a format better understood by other AP implementations.
//
// Ie., incoming "content" property like this:
//
//	"content": [
//	  "hello world!",
//	  {
//	    "en": "hello world!"
//	  }
//	]
//
// Is unpacked to:
//
//	"content": "hello world!",
//	"contentMap": {
//	  "en": "hello world!"
//	}
//
// Noop if neither content nor contentMap are set.
func NormalizeOutgoingContentProp(item WithContent, rawJSON map[string]interface{}) {
	contentProp := item.GetActivityStreamsContent()
	if contentProp == nil {
		// Nothing to do,
		// bail early.
		return
	}

	contentPropLen := contentProp.Len()
	if contentPropLen == 0 {
		// Nothing to do,
		// bail early.
		return
	}

	var (
		content    string
		contentMap map[string]string
	)

	for iter := contentProp.Begin(); iter != contentProp.End(); iter = iter.Next() {
		switch {
		case iter.IsRDFLangString() &&
			contentMap == nil:
			contentMap = iter.GetRDFLangString()

		case content == "" &&
			iter.IsXMLSchemaString():
			content = iter.GetXMLSchemaString()
		}
	}

	if content != "" {
		rawJSON["content"] = content
	} else {
		delete(rawJSON, "content")
	}

	if contentMap != nil {
		rawJSON["contentMap"] = contentMap
	} else {
		delete(rawJSON, "contentMap")
	}
}

// NormalizeOutgoingObjectProp normalizes each Object entry in the rawJSON of the given
// item by calling custom serialization / normalization functions on them in turn.
//
// This function also unnests single-entry arrays, so that:
//
//	"object": [
//	  {
//	    ...
//	  }
//	]
//
// Becomes:
//
//	"object": {
//	  ...
//	}
//
// Noop for each Object entry that isn't an Accountable or Statusable.
func NormalizeOutgoingObjectProp(item WithObject, rawJSON map[string]interface{}) error {
	objectProp := item.GetActivityStreamsObject()
	if objectProp == nil {
		// Nothing to do,
		// bail early.
		return nil
	}

	objectPropLen := objectProp.Len()
	if objectPropLen == 0 {
		// Nothing to do,
		// bail early.
		return nil
	}

	// The thing we already serialized has objects
	// on it, so we should see if we need to custom
	// serialize any of those objects, and replace
	// them on the data map as necessary.
	objects := make([]interface{}, 0, objectPropLen)
	for iter := objectProp.Begin(); iter != objectProp.End(); iter = iter.Next() {
		if iter.IsIRI() {
			// Plain IRIs don't need custom serialization.
			objects = append(objects, iter.GetIRI().String())
			continue
		}

		var (
			objectType = iter.GetType()
			objectSer  map[string]interface{}
		)

		if objectType == nil {
			// This is awkward.
			return gtserror.Newf("could not resolve object iter %T to vocab.Type", iter)
		}

		var err error

		// In the below accountable and statusable serialization,
		// `@context` will be included in the wrapping type already,
		// so we shouldn't also include it in the object itself.
		switch tn := objectType.GetTypeName(); {
		case IsAccountable(tn):
			objectSer, err = serializeAccountable(objectType, false)

		case IsStatusable(tn):
			// IsStatusable includes Pollable as well.
			objectSer, err = serializeStatusable(objectType, false)

		default:
			// No custom serializer for this type; serialize as normal.
			objectSer, err = objectType.Serialize()
		}

		if err != nil {
			return err
		}

		objects = append(objects, objectSer)
	}

	if objectPropLen == 1 {
		// Unnest single object.
		rawJSON["object"] = objects[0]
	} else {
		// Array of objects.
		rawJSON["object"] = objects
	}

	return nil
}