diff options
author | 2023-11-10 19:29:26 +0100 | |
---|---|---|
committer | 2023-11-10 19:29:26 +0100 | |
commit | ba9d6b467a1f03447789844048d913738c843569 (patch) | |
tree | 5a464ee4a33f26e3284179582ab6d3332d9d5388 /internal/media | |
parent | [chore/bugfix/horror] Allow `expires_in` and poll choices to be parsed from s... (diff) | |
download | gotosocial-ba9d6b467a1f03447789844048d913738c843569.tar.xz |
[feature] Media attachment placeholders (#2331)
* [feature] Use placeholders for unknown media types
* fix read of underreported small files
* switch to reduce nesting
* simplify cleanup
Diffstat (limited to 'internal/media')
-rw-r--r-- | internal/media/manager.go | 301 | ||||
-rw-r--r-- | internal/media/manager_test.go | 399 | ||||
-rw-r--r-- | internal/media/processingemoji.go | 91 | ||||
-rw-r--r-- | internal/media/processingmedia.go | 344 | ||||
-rw-r--r-- | internal/media/test/Frantz-Fanon-The-Wretched-of-the-Earth-1965.pdf | bin | 0 -> 1027775 bytes | |||
-rw-r--r-- | internal/media/types.go | 3 | ||||
-rw-r--r-- | internal/media/util.go | 42 |
7 files changed, 731 insertions, 449 deletions
diff --git a/internal/media/manager.go b/internal/media/manager.go index afe686cb9..dfae37d80 100644 --- a/internal/media/manager.go +++ b/internal/media/manager.go @@ -20,7 +20,6 @@ package media import ( "context" "errors" - "fmt" "io" "time" @@ -32,6 +31,7 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/state" "github.com/superseriousbusiness/gotosocial/internal/uris" + "github.com/superseriousbusiness/gotosocial/internal/util" ) var SupportedMIMETypes = []string{ @@ -57,52 +57,67 @@ func NewManager(state *state.State) *Manager { return m } -// PreProcessMedia begins the process of decoding and storing the given data as an attachment. -// It will return a pointer to a ProcessingMedia struct upon which further actions can be performed, such as getting -// the finished media, thumbnail, attachment, etc. +// PreProcessMedia begins the process of decoding +// and storing the given data as an attachment. +// It will return a pointer to a ProcessingMedia +// struct upon which further actions can be performed, +// such as getting the finished media, thumbnail, +// attachment, etc. // -// data should be a function that the media manager can call to return a reader containing the media data. +// - data: a function that the media manager can call +// to return a reader containing the media data. +// - accountID: the account that the media belongs to. +// - ai: optional and can be nil. Any additional information +// about the attachment provided will be put in the database. // -// accountID should be the account that the media belongs to. -// -// ai is optional and can be nil. Any additional information about the attachment provided will be put in the database. -// -// Note: unlike ProcessMedia, this will NOT queue the media to be asynchronously processed. -func (m *Manager) PreProcessMedia(ctx context.Context, data DataFunc, accountID string, ai *AdditionalMediaInfo) (*ProcessingMedia, error) { - id, err := id.NewRandomULID() - if err != nil { - return nil, err - } - - avatar := false - header := false - cached := false +// Note: unlike ProcessMedia, this will NOT +// queue the media to be asynchronously processed. +func (m *Manager) PreProcessMedia( + data DataFunc, + accountID string, + ai *AdditionalMediaInfo, +) *ProcessingMedia { + // Populate initial fields on the new media, + // leaving out fields with values we don't know + // yet. These will be overwritten as we go. now := time.Now() - - // populate initial fields on the media attachment -- some of these will be overwritten as we proceed attachment := >smodel.MediaAttachment{ - ID: id, - CreatedAt: now, - UpdatedAt: now, - StatusID: "", - URL: "", // we don't know yet because it depends on the uncalled DataFunc - RemoteURL: "", - Type: gtsmodel.FileTypeUnknown, // we don't know yet because it depends on the uncalled DataFunc - FileMeta: gtsmodel.FileMeta{}, - AccountID: accountID, - Description: "", - ScheduledStatusID: "", - Blurhash: "", - Processing: gtsmodel.ProcessingStatusReceived, - File: gtsmodel.File{UpdatedAt: now}, - Thumbnail: gtsmodel.Thumbnail{UpdatedAt: now}, - Avatar: &avatar, - Header: &header, - Cached: &cached, + ID: id.NewULID(), + CreatedAt: now, + UpdatedAt: now, + Type: gtsmodel.FileTypeUnknown, + FileMeta: gtsmodel.FileMeta{}, + AccountID: accountID, + Processing: gtsmodel.ProcessingStatusReceived, + File: gtsmodel.File{ + UpdatedAt: now, + ContentType: "application/octet-stream", + }, + Thumbnail: gtsmodel.Thumbnail{UpdatedAt: now}, + Avatar: util.Ptr(false), + Header: util.Ptr(false), + Cached: util.Ptr(false), } - // check if we have additional info to add to the attachment, - // and overwrite some of the attachment fields if so + attachment.URL = uris.URIForAttachment( + accountID, + string(TypeAttachment), + string(SizeOriginal), + attachment.ID, + "unknown", + ) + + attachment.File.Path = uris.StoragePathForAttachment( + accountID, + string(TypeAttachment), + string(SizeOriginal), + attachment.ID, + "unknown", + ) + + // Check if we were provided additional info + // to add to the attachment, and overwrite + // some of the attachment fields if so. if ai != nil { if ai.CreatedAt != nil { attachment.CreatedAt = *ai.CreatedAt @@ -151,14 +166,21 @@ func (m *Manager) PreProcessMedia(ctx context.Context, data DataFunc, accountID mgr: m, } - return processingMedia, nil + return processingMedia } -// PreProcessMediaRecache refetches, reprocesses, and recaches an existing attachment that has been uncached via cleaner pruning. +// PreProcessMediaRecache refetches, reprocesses, +// and recaches an existing attachment that has +// been uncached via cleaner pruning. // -// Note: unlike ProcessMedia, this will NOT queue the media to be asychronously processed. -func (m *Manager) PreProcessMediaRecache(ctx context.Context, data DataFunc, attachmentID string) (*ProcessingMedia, error) { - // get the existing attachment from database. +// Note: unlike ProcessMedia, this will NOT queue +// the media to be asychronously processed. +func (m *Manager) PreProcessMediaRecache( + ctx context.Context, + data DataFunc, + attachmentID string, +) (*ProcessingMedia, error) { + // Get the existing attachment from database. attachment, err := m.state.DB.GetAttachmentByID(ctx, attachmentID) if err != nil { return nil, err @@ -167,43 +189,39 @@ func (m *Manager) PreProcessMediaRecache(ctx context.Context, data DataFunc, att processingMedia := &ProcessingMedia{ media: attachment, dataFn: data, - recache: true, // indicate it's a recache + recache: true, // Indicate it's a recache. mgr: m, } return processingMedia, nil } -// ProcessMedia will call PreProcessMedia, followed by queuing the media to be processing in the media worker queue. -func (m *Manager) ProcessMedia(ctx context.Context, data DataFunc, accountID string, ai *AdditionalMediaInfo) (*ProcessingMedia, error) { - // Create a new processing media object for this media request. - media, err := m.PreProcessMedia(ctx, data, accountID, ai) - if err != nil { - return nil, err - } - - // Attempt to add this media processing item to the worker queue. - _ = m.state.Workers.Media.MustEnqueueCtx(ctx, media.Process) - - return media, nil -} - -// PreProcessEmoji begins the process of decoding and storing the given data as an emoji. -// It will return a pointer to a ProcessingEmoji struct upon which further actions can be performed, such as getting -// the finished media, thumbnail, attachment, etc. -// -// data should be a function that the media manager can call to return a reader containing the emoji data. -// -// shortcode should be the emoji shortcode without the ':'s around it. +// PreProcessEmoji begins the process of decoding and storing +// the given data as an emoji. It will return a pointer to a +// ProcessingEmoji struct upon which further actions can be +// performed, such as getting the finished media, thumbnail, +// attachment, etc. // -// id is the database ID that should be used to store the emoji. +// - data: function that the media manager can call +// to return a reader containing the emoji data. +// - shortcode: the emoji shortcode without the ':'s around it. +// - emojiID: database ID that should be used to store the emoji. +// - uri: ActivityPub URI/ID of the emoji. +// - ai: optional and can be nil. Any additional information +// about the emoji provided will be put in the database. +// - refresh: refetch/refresh the emoji. // -// uri is the ActivityPub URI/ID of the emoji. -// -// ai is optional and can be nil. Any additional information about the emoji provided will be put in the database. -// -// Note: unlike ProcessEmoji, this will NOT queue the emoji to be asynchronously processed. -func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode string, emojiID string, uri string, ai *AdditionalEmojiInfo, refresh bool) (*ProcessingEmoji, error) { +// Note: unlike ProcessEmoji, this will NOT queue +// the emoji to be asynchronously processed. +func (m *Manager) PreProcessEmoji( + ctx context.Context, + data DataFunc, + shortcode string, + emojiID string, + uri string, + ai *AdditionalEmojiInfo, + refresh bool, +) (*ProcessingEmoji, error) { var ( newPathID string emoji *gtsmodel.Emoji @@ -217,18 +235,22 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode } if refresh { - // Look for existing emoji by given ID. + // Existing emoji! + emoji, err = m.state.DB.GetEmojiByID(ctx, emojiID) if err != nil { - return nil, gtserror.Newf("error fetching emoji to refresh from the db: %s", err) + err = gtserror.Newf("error fetching emoji to refresh from the db: %w", err) + return nil, err } - // if this is a refresh, we will end up with new images - // stored for this emoji, so we can use an io.Closer callback - // to perform clean up of the old images from storage + // Since this is a refresh, we will end up with + // new images stored for this emoji, so we should + // use an io.Closer callback to perform clean up + // of the original images from storage. originalData := data originalImagePath := emoji.ImagePath originalImageStaticPath := emoji.ImageStaticPath + data = func(ctx context.Context) (io.ReadCloser, int64, error) { // Call original data func. rc, sz, err := originalData(ctx) @@ -251,49 +273,81 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode return iotools.ReadCloser(rc, c), sz, nil } + // Reuse existing shortcode and URI - + // these don't change when we refresh. + emoji.Shortcode = shortcode + emoji.URI = uri + + // Use a new ID to create a new path + // for the new images, to get around + // needing to do cache invalidation. newPathID, err = id.NewRandomULID() if err != nil { return nil, gtserror.Newf("error generating alternateID for emoji refresh: %s", err) } - // store + serve static image at new path ID - emoji.ImageStaticURL = uris.GenerateURIForAttachment(instanceAcc.ID, string(TypeEmoji), string(SizeStatic), newPathID, mimePng) - emoji.ImageStaticPath = fmt.Sprintf("%s/%s/%s/%s.%s", instanceAcc.ID, TypeEmoji, SizeStatic, newPathID, mimePng) - - emoji.Shortcode = shortcode - emoji.URI = uri + emoji.ImageStaticURL = uris.URIForAttachment( + instanceAcc.ID, + string(TypeEmoji), + string(SizeStatic), + newPathID, + // All static emojis + // are encoded as png. + mimePng, + ) + + emoji.ImageStaticPath = uris.StoragePathForAttachment( + instanceAcc.ID, + string(TypeEmoji), + string(SizeStatic), + newPathID, + // All static emojis + // are encoded as png. + mimePng, + ) } else { - disabled := false - visibleInPicker := true - - // populate initial fields on the emoji -- some of these will be overwritten as we proceed + // New emoji! + + imageStaticURL := uris.URIForAttachment( + instanceAcc.ID, + string(TypeEmoji), + string(SizeStatic), + emojiID, + // All static emojis + // are encoded as png. + mimePng, + ) + + imageStaticPath := uris.StoragePathForAttachment( + instanceAcc.ID, + string(TypeEmoji), + string(SizeStatic), + emojiID, + // All static emojis + // are encoded as png. + mimePng, + ) + + // Populate initial fields on the new emoji, + // leaving out fields with values we don't know + // yet. These will be overwritten as we go. emoji = >smodel.Emoji{ ID: emojiID, CreatedAt: now, + UpdatedAt: now, Shortcode: shortcode, - Domain: "", // assume our own domain unless told otherwise - ImageRemoteURL: "", - ImageStaticRemoteURL: "", - ImageURL: "", // we don't know yet - ImageStaticURL: uris.GenerateURIForAttachment(instanceAcc.ID, string(TypeEmoji), string(SizeStatic), emojiID, mimePng), // all static emojis are encoded as png - ImagePath: "", // we don't know yet - ImageStaticPath: fmt.Sprintf("%s/%s/%s/%s.%s", instanceAcc.ID, TypeEmoji, SizeStatic, emojiID, mimePng), // all static emojis are encoded as png - ImageContentType: "", // we don't know yet - ImageStaticContentType: mimeImagePng, // all static emojis are encoded as png - ImageFileSize: 0, - ImageStaticFileSize: 0, - Disabled: &disabled, + ImageStaticURL: imageStaticURL, + ImageStaticPath: imageStaticPath, + ImageStaticContentType: mimeImagePng, + ImageUpdatedAt: now, + Disabled: util.Ptr(false), URI: uri, - VisibleInPicker: &visibleInPicker, - CategoryID: "", + VisibleInPicker: util.Ptr(true), } } - emoji.ImageUpdatedAt = now - emoji.UpdatedAt = now - - // check if we have additional info to add to the emoji, - // and overwrite some of the emoji fields if so + // Check if we have additional info to add to the emoji, + // and overwrite some of the emoji fields if so. if ai != nil { if ai.CreatedAt != nil { emoji.CreatedAt = *ai.CreatedAt @@ -335,11 +389,17 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode return processingEmoji, nil } -// PreProcessEmojiRecache refetches, reprocesses, and recaches an existing emoji that has been uncached via cleaner pruning. +// PreProcessEmojiRecache refetches, reprocesses, and recaches +// an existing emoji that has been uncached via cleaner pruning. // -// Note: unlike ProcessEmoji, this will NOT queue the emoji to be asychronously processed. -func (m *Manager) PreProcessEmojiRecache(ctx context.Context, data DataFunc, emojiID string) (*ProcessingEmoji, error) { - // get the existing emoji from the database. +// Note: unlike ProcessEmoji, this will NOT queue the emoji to +// be asychronously processed. +func (m *Manager) PreProcessEmojiRecache( + ctx context.Context, + data DataFunc, + emojiID string, +) (*ProcessingEmoji, error) { + // Get the existing emoji from the database. emoji, err := m.state.DB.GetEmojiByID(ctx, emojiID) if err != nil { return nil, err @@ -348,15 +408,24 @@ func (m *Manager) PreProcessEmojiRecache(ctx context.Context, data DataFunc, emo processingEmoji := &ProcessingEmoji{ emoji: emoji, dataFn: data, - existing: true, // inidcate recache + existing: true, // Indicate recache. mgr: m, } return processingEmoji, nil } -// ProcessEmoji will call PreProcessEmoji, followed by queuing the emoji to be processing in the emoji worker queue. -func (m *Manager) ProcessEmoji(ctx context.Context, data DataFunc, shortcode string, id string, uri string, ai *AdditionalEmojiInfo, refresh bool) (*ProcessingEmoji, error) { +// ProcessEmoji will call PreProcessEmoji, followed +// by queuing the emoji in the emoji worker queue. +func (m *Manager) ProcessEmoji( + ctx context.Context, + data DataFunc, + shortcode string, + id string, + uri string, + ai *AdditionalEmojiInfo, + refresh bool, +) (*ProcessingEmoji, error) { // Create a new processing emoji object for this emoji request. emoji, err := m.PreProcessEmoji(ctx, data, shortcode, id, uri, ai, refresh) if err != nil { diff --git a/internal/media/manager_test.go b/internal/media/manager_test.go index cd0f9c24b..4a3d3c886 100644 --- a/internal/media/manager_test.go +++ b/internal/media/manager_test.go @@ -33,7 +33,6 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/media" "github.com/superseriousbusiness/gotosocial/internal/state" gtsstorage "github.com/superseriousbusiness/gotosocial/internal/storage" - "github.com/superseriousbusiness/gotosocial/testrig" ) type ManagerTestSuite struct { @@ -319,8 +318,7 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlocking() { accountID := "01FS1X72SK9ZPW0J1QQ68BD264" // process the media with no additional info provided - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() @@ -376,6 +374,131 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlocking() { suite.Equal(processedThumbnailBytesExpected, processedThumbnailBytes) } +func (suite *ManagerTestSuite) TestSimpleJpegProcessPartial() { + ctx := context.Background() + + data := func(_ context.Context) (io.ReadCloser, int64, error) { + // load bytes from a test image + b, err := os.ReadFile("./test/test-jpeg.jpg") + if err != nil { + panic(err) + } + + // Fuck up the bytes a bit by cutting + // off the second half, tee hee! + b = b[:len(b)/2] + + return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil + } + + accountID := "01FS1X72SK9ZPW0J1QQ68BD264" + + // process the media with no additional info provided + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) + + // fetch the attachment id from the processing media + attachmentID := processingMedia.AttachmentID() + + // do a blocking call to fetch the attachment + attachment, err := processingMedia.LoadAttachment(ctx) + + // Since we're cutting off the byte stream + // halfway through, we should get an error here. + suite.EqualError(err, "finish: error decoding image: unexpected EOF") + suite.NotNil(attachment) + + // make sure it's got the stuff set on it that we expect + // the attachment ID and accountID we expect + suite.Equal(attachmentID, attachment.ID) + suite.Equal(accountID, attachment.AccountID) + + // file meta should be correctly derived from the image + suite.Zero(attachment.FileMeta) + suite.Equal("image/jpeg", attachment.File.ContentType) + suite.Equal("image/jpeg", attachment.Thumbnail.ContentType) + suite.Empty(attachment.Blurhash) + + // now make sure the attachment is in the database + dbAttachment, err := suite.db.GetAttachmentByID(ctx, attachmentID) + suite.NoError(err) + suite.NotNil(dbAttachment) + + // Attachment should have type unknown + suite.Equal(gtsmodel.FileTypeUnknown, dbAttachment.Type) + + // Nothing should be in storage for this attachment. + stored, err := suite.storage.Has(ctx, attachment.File.Path) + if err != nil { + suite.FailNow(err.Error()) + } + suite.False(stored) + + stored, err = suite.storage.Has(ctx, attachment.Thumbnail.Path) + if err != nil { + suite.FailNow(err.Error()) + } + suite.False(stored) +} + +func (suite *ManagerTestSuite) TestPDFProcess() { + ctx := context.Background() + + data := func(_ context.Context) (io.ReadCloser, int64, error) { + // load bytes from Frantz + b, err := os.ReadFile("./test/Frantz-Fanon-The-Wretched-of-the-Earth-1965.pdf") + if err != nil { + panic(err) + } + + return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil + } + + accountID := "01FS1X72SK9ZPW0J1QQ68BD264" + + // process the media with no additional info provided + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) + + // fetch the attachment id from the processing media + attachmentID := processingMedia.AttachmentID() + + // do a blocking call to fetch the attachment + attachment, err := processingMedia.LoadAttachment(ctx) + suite.NoError(err) + suite.NotNil(attachment) + + // make sure it's got the stuff set on it that we expect + // the attachment ID and accountID we expect + suite.Equal(attachmentID, attachment.ID) + suite.Equal(accountID, attachment.AccountID) + + // file meta should be correctly derived from the image + suite.Zero(attachment.FileMeta) + suite.Equal("application/pdf", attachment.File.ContentType) + suite.Equal("image/jpeg", attachment.Thumbnail.ContentType) + suite.Empty(attachment.Blurhash) + + // now make sure the attachment is in the database + dbAttachment, err := suite.db.GetAttachmentByID(ctx, attachmentID) + suite.NoError(err) + suite.NotNil(dbAttachment) + + // Attachment should have type unknown + suite.Equal(gtsmodel.FileTypeUnknown, dbAttachment.Type) + + // Nothing should be in storage for this attachment. + stored, err := suite.storage.Has(ctx, attachment.File.Path) + if err != nil { + suite.FailNow(err.Error()) + } + suite.False(stored) + + stored, err = suite.storage.Has(ctx, attachment.Thumbnail.Path) + if err != nil { + suite.FailNow(err.Error()) + } + suite.False(stored) +} + func (suite *ManagerTestSuite) TestSlothVineProcessBlocking() { ctx := context.Background() @@ -391,8 +514,7 @@ func (suite *ManagerTestSuite) TestSlothVineProcessBlocking() { accountID := "01FS1X72SK9ZPW0J1QQ68BD264" // process the media with no additional info provided - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() @@ -467,8 +589,7 @@ func (suite *ManagerTestSuite) TestLongerMp4ProcessBlocking() { accountID := "01FS1X72SK9ZPW0J1QQ68BD264" // process the media with no additional info provided - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() @@ -543,8 +664,7 @@ func (suite *ManagerTestSuite) TestBirdnestMp4ProcessBlocking() { accountID := "01FS1X72SK9ZPW0J1QQ68BD264" // process the media with no additional info provided - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() @@ -621,13 +741,16 @@ func (suite *ManagerTestSuite) TestNotAnMp4ProcessBlocking() { accountID := "01FS1X72SK9ZPW0J1QQ68BD264" // pre processing should go fine but... - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) // we should get an error while loading attachment, err := processingMedia.LoadAttachment(ctx) suite.EqualError(err, "finish: error decoding video: error determining video metadata: [width height framerate]") - suite.Nil(attachment) + + // partial attachment should be + // returned, with 'unknown' type. + suite.NotNil(attachment) + suite.Equal(gtsmodel.FileTypeUnknown, attachment.Type) } func (suite *ManagerTestSuite) TestSimpleJpegProcessBlockingNoContentLengthGiven() { @@ -646,8 +769,7 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlockingNoContentLengthGiven accountID := "01FS1X72SK9ZPW0J1QQ68BD264" // process the media with no additional info provided - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() @@ -719,8 +841,7 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlockingReadCloser() { accountID := "01FS1X72SK9ZPW0J1QQ68BD264" // process the media with no additional info provided - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() @@ -791,8 +912,7 @@ func (suite *ManagerTestSuite) TestPngNoAlphaChannelProcessBlocking() { accountID := "01FS1X72SK9ZPW0J1QQ68BD264" // process the media with no additional info provided - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() @@ -863,8 +983,7 @@ func (suite *ManagerTestSuite) TestPngAlphaChannelProcessBlocking() { accountID := "01FS1X72SK9ZPW0J1QQ68BD264" // process the media with no additional info provided - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() @@ -935,8 +1054,7 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlockingWithCallback() { accountID := "01FS1X72SK9ZPW0J1QQ68BD264" // process the media with no additional info provided - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() @@ -992,166 +1110,6 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlockingWithCallback() { suite.Equal(processedThumbnailBytesExpected, processedThumbnailBytes) } -func (suite *ManagerTestSuite) TestSimpleJpegProcessAsync() { - ctx, cncl := context.WithTimeout(context.Background(), time.Second*30) - defer cncl() - - data := func(_ context.Context) (io.ReadCloser, int64, error) { - // load bytes from a test image - b, err := os.ReadFile("./test/test-jpeg.jpg") - if err != nil { - panic(err) - } - return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil - } - - accountID := "01FS1X72SK9ZPW0J1QQ68BD264" - - // process the media with no additional info provided - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) - - // fetch the attachment id from the processing media - attachmentID := processingMedia.AttachmentID() - - // wait for processing to complete - var attachment *gtsmodel.MediaAttachment - if !testrig.WaitFor(func() bool { - attachment, err = suite.db.GetAttachmentByID(ctx, attachmentID) - return err == nil && attachment != nil - }) { - suite.FailNow("timed out waiting for attachment to process") - } - - // make sure it's got the stuff set on it that we expect - // the attachment ID and accountID we expect - suite.Equal(attachmentID, attachment.ID) - suite.Equal(accountID, attachment.AccountID) - - // file meta should be correctly derived from the image - suite.EqualValues(gtsmodel.Original{ - Width: 1920, Height: 1080, Size: 2073600, Aspect: 1.7777777777777777, - }, attachment.FileMeta.Original) - suite.EqualValues(gtsmodel.Small{ - Width: 512, Height: 288, Size: 147456, Aspect: 1.7777777777777777, - }, attachment.FileMeta.Small) - suite.Equal("image/jpeg", attachment.File.ContentType) - suite.Equal("image/jpeg", attachment.Thumbnail.ContentType) - suite.Equal(269739, attachment.File.FileSize) - suite.Equal("LiBzRk#6V[WF_NvzV@WY_3rqV@a$", attachment.Blurhash) - - // now make sure the attachment is in the database - dbAttachment, err := suite.db.GetAttachmentByID(ctx, attachmentID) - suite.NoError(err) - suite.NotNil(dbAttachment) - - // make sure the processed file is in storage - processedFullBytes, err := suite.storage.Get(ctx, attachment.File.Path) - suite.NoError(err) - suite.NotEmpty(processedFullBytes) - - // load the processed bytes from our test folder, to compare - processedFullBytesExpected, err := os.ReadFile("./test/test-jpeg-processed.jpg") - suite.NoError(err) - suite.NotEmpty(processedFullBytesExpected) - - // the bytes in storage should be what we expected - suite.Equal(processedFullBytesExpected, processedFullBytes) - - // now do the same for the thumbnail and make sure it's what we expected - processedThumbnailBytes, err := suite.storage.Get(ctx, attachment.Thumbnail.Path) - suite.NoError(err) - suite.NotEmpty(processedThumbnailBytes) - - processedThumbnailBytesExpected, err := os.ReadFile("./test/test-jpeg-thumbnail.jpg") - suite.NoError(err) - suite.NotEmpty(processedThumbnailBytesExpected) - - suite.Equal(processedThumbnailBytesExpected, processedThumbnailBytes) -} - -func (suite *ManagerTestSuite) TestSimpleJpegQueueSpamming() { - // in this test, we spam the manager queue with 50 new media requests, just to see how it holds up - ctx := context.Background() - - b, err := os.ReadFile("./test/test-jpeg.jpg") - if err != nil { - panic(err) - } - - data := func(_ context.Context) (io.ReadCloser, int64, error) { - // load bytes from a test image - return io.NopCloser(bytes.NewReader(b)), int64(len(b)), nil - } - - accountID := "01FS1X72SK9ZPW0J1QQ68BD264" - - spam := 50 - inProcess := []*media.ProcessingMedia{} - for i := 0; i < spam; i++ { - // process the media with no additional info provided - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) - inProcess = append(inProcess, processingMedia) - } - - for _, processingMedia := range inProcess { - // fetch the attachment id from the processing media - attachmentID := processingMedia.AttachmentID() - - // do a blocking call to fetch the attachment - attachment, err := processingMedia.LoadAttachment(ctx) - suite.NoError(err) - suite.NotNil(attachment) - - // make sure it's got the stuff set on it that we expect - // the attachment ID and accountID we expect - suite.Equal(attachmentID, attachment.ID) - suite.Equal(accountID, attachment.AccountID) - - // file meta should be correctly derived from the image - suite.EqualValues(gtsmodel.Original{ - Width: 1920, Height: 1080, Size: 2073600, Aspect: 1.7777777777777777, - }, attachment.FileMeta.Original) - suite.EqualValues(gtsmodel.Small{ - Width: 512, Height: 288, Size: 147456, Aspect: 1.7777777777777777, - }, attachment.FileMeta.Small) - suite.Equal("image/jpeg", attachment.File.ContentType) - suite.Equal("image/jpeg", attachment.Thumbnail.ContentType) - suite.Equal(269739, attachment.File.FileSize) - suite.Equal("LiBzRk#6V[WF_NvzV@WY_3rqV@a$", attachment.Blurhash) - - // now make sure the attachment is in the database - dbAttachment, err := suite.db.GetAttachmentByID(ctx, attachmentID) - suite.NoError(err) - suite.NotNil(dbAttachment) - - // make sure the processed file is in storage - processedFullBytes, err := suite.storage.Get(ctx, attachment.File.Path) - suite.NoError(err) - suite.NotEmpty(processedFullBytes) - - // load the processed bytes from our test folder, to compare - processedFullBytesExpected, err := os.ReadFile("./test/test-jpeg-processed.jpg") - suite.NoError(err) - suite.NotEmpty(processedFullBytesExpected) - - // the bytes in storage should be what we expected - suite.Equal(processedFullBytesExpected, processedFullBytes) - - // now do the same for the thumbnail and make sure it's what we expected - processedThumbnailBytes, err := suite.storage.Get(ctx, attachment.Thumbnail.Path) - suite.NoError(err) - suite.NotEmpty(processedThumbnailBytes) - - processedThumbnailBytesExpected, err := os.ReadFile("./test/test-jpeg-thumbnail.jpg") - suite.NoError(err) - suite.NotEmpty(processedThumbnailBytesExpected) - - suite.Equal(processedThumbnailBytesExpected, processedThumbnailBytes) - } -} - func (suite *ManagerTestSuite) TestSimpleJpegProcessBlockingWithDiskStorage() { ctx := context.Background() @@ -1191,8 +1149,7 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlockingWithDiskStorage() { suite.manager = diskManager // process the media with no additional info provided - processingMedia, err := diskManager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) + processingMedia := diskManager.PreProcessMedia(data, accountID, nil) // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() @@ -1290,19 +1247,17 @@ func (suite *ManagerTestSuite) TestSmallSizedMediaTypeDetection_issue2263() { accountID := "01FS1X72SK9ZPW0J1QQ68BD264" // process the media with no additional info provided - processingMedia, err := suite.manager.ProcessMedia(ctx, data, accountID, nil) - suite.NoError(err) + processingMedia := suite.manager.PreProcessMedia(data, accountID, nil) + if _, err := processingMedia.LoadAttachment(ctx); err != nil { + suite.FailNow(err.Error()) + } - // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() - // wait for processing to complete - var attachment *gtsmodel.MediaAttachment - if !testrig.WaitFor(func() bool { - attachment, err = suite.db.GetAttachmentByID(ctx, attachmentID) - return err == nil && attachment != nil - }) { - suite.FailNow("timed out waiting for attachment to process") + // fetch the attachment id from the processing media + attachment, err := suite.db.GetAttachmentByID(ctx, attachmentID) + if err != nil { + suite.FailNow(err.Error()) } // make sure it's got the stuff set on it that we expect @@ -1318,6 +1273,62 @@ func (suite *ManagerTestSuite) TestSmallSizedMediaTypeDetection_issue2263() { } } +func (suite *ManagerTestSuite) TestMisreportedSmallMedia() { + const accountID = "01FS1X72SK9ZPW0J1QQ68BD264" + var actualSize int + + data := func(_ context.Context) (io.ReadCloser, int64, error) { + // Load bytes from small png. + b, err := os.ReadFile("./test/test-png-alphachannel-1x1px.png") + if err != nil { + suite.FailNow(err.Error()) + } + + actualSize = len(b) + + // Report media as twice its actual size. This should be corrected. + return io.NopCloser(bytes.NewBuffer(b)), int64(2 * actualSize), nil + } + + // Process the media with no additional info provided. + attachment, err := suite.manager. + PreProcessMedia(data, accountID, nil). + LoadAttachment(context.Background()) + if err != nil { + suite.FailNow(err.Error()) + } + + suite.Equal(actualSize, attachment.File.FileSize) +} + +func (suite *ManagerTestSuite) TestNoReportedSizeSmallMedia() { + const accountID = "01FS1X72SK9ZPW0J1QQ68BD264" + var actualSize int + + data := func(_ context.Context) (io.ReadCloser, int64, error) { + // Load bytes from small png. + b, err := os.ReadFile("./test/test-png-alphachannel-1x1px.png") + if err != nil { + suite.FailNow(err.Error()) + } + + actualSize = len(b) + + // Return zero for media size. This should be detected. + return io.NopCloser(bytes.NewBuffer(b)), 0, nil + } + + // Process the media with no additional info provided. + attachment, err := suite.manager. + PreProcessMedia(data, accountID, nil). + LoadAttachment(context.Background()) + if err != nil { + suite.FailNow(err.Error()) + } + + suite.Equal(actualSize, attachment.File.FileSize) +} + func TestManagerTestSuite(t *testing.T) { suite.Run(t, &ManagerTestSuite{}) } diff --git a/internal/media/processingemoji.go b/internal/media/processingemoji.go index 1c7e60144..4c18d4aad 100644 --- a/internal/media/processingemoji.go +++ b/internal/media/processingemoji.go @@ -20,7 +20,6 @@ package media import ( "bytes" "context" - "fmt" "io" "codeberg.org/gruf/go-bytesize" @@ -33,6 +32,7 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/regexes" "github.com/superseriousbusiness/gotosocial/internal/uris" + "github.com/superseriousbusiness/gotosocial/internal/util" ) // ProcessingEmoji represents an emoji currently processing. It exposes @@ -156,14 +156,51 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { } }() - // Byte buffer to read file header into. - // See: https://en.wikipedia.org/wiki/File_format#File_header - // and https://github.com/h2non/filetype - hdrBuf := make([]byte, 261) + var maxSize bytesize.Size + + if p.emoji.Domain == "" { + // this is a local emoji upload + maxSize = config.GetMediaEmojiLocalMaxSize() + } else { + // this is a remote incoming emoji + maxSize = config.GetMediaEmojiRemoteMaxSize() + } + + // Check that provided size isn't beyond max. We check beforehand + // so that we don't attempt to stream the emoji into storage if not needed. + if size := bytesize.Size(sz); sz > 0 && size > maxSize { + return gtserror.Newf("given emoji size %s greater than max allowed %s", size, maxSize) + } - // Read the first 261 header bytes into buffer. - if _, err := io.ReadFull(rc, hdrBuf); err != nil { - return gtserror.Newf("error reading incoming media: %w", err) + // Prepare to read bytes from + // file header or magic number. + fileSize := int(sz) + hdrBuf := newHdrBuf(fileSize) + + // Read into buffer as much as possible. + // + // UnexpectedEOF means we couldn't read up to the + // given size, but we may still have read something. + // + // EOF means we couldn't read anything at all. + // + // Any other error likely means the connection messed up. + // + // In other words, rather counterintuitively, we + // can only proceed on no error or unexpected error! + n, err := io.ReadFull(rc, hdrBuf) + if err != nil { + if err != io.ErrUnexpectedEOF { + return gtserror.Newf("error reading first bytes of incoming media: %w", err) + } + + // Initial file size was misreported, so we didn't read + // fully into hdrBuf. Reslice it to the size we did read. + log.Warnf(ctx, + "recovered from misreported file size; reported %d; read %d", + fileSize, n, + ) + hdrBuf = hdrBuf[:n] } // Parse file type info from header buffer. @@ -184,24 +221,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { // Recombine header bytes with remaining stream r := io.MultiReader(bytes.NewReader(hdrBuf), rc) - var maxSize bytesize.Size - - if p.emoji.Domain == "" { - // this is a local emoji upload - maxSize = config.GetMediaEmojiLocalMaxSize() - } else { - // this is a remote incoming emoji - maxSize = config.GetMediaEmojiRemoteMaxSize() - } - - // Check that provided size isn't beyond max. We check beforehand - // so that we don't attempt to stream the emoji into storage if not needed. - if size := bytesize.Size(sz); sz > 0 && size > maxSize { - return gtserror.Newf("given emoji size %s greater than max allowed %s", size, maxSize) - } - var pathID string - if p.newPathID != "" { // This is a refreshed emoji with a new // path ID that this will be stored under. @@ -215,11 +235,10 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { instanceAccID := regexes.FilePath.FindStringSubmatch(p.emoji.ImageStaticPath)[1] // Calculate emoji file path. - p.emoji.ImagePath = fmt.Sprintf( - "%s/%s/%s/%s.%s", + p.emoji.ImagePath = uris.StoragePathForAttachment( instanceAccID, - TypeEmoji, - SizeOriginal, + string(TypeEmoji), + string(SizeOriginal), pathID, info.Extension, ) @@ -235,14 +254,13 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { } // Write the final image reader stream to our storage. - sz, err = p.mgr.state.Storage.PutStream(ctx, p.emoji.ImagePath, r) + wroteSize, err := p.mgr.state.Storage.PutStream(ctx, p.emoji.ImagePath, r) if err != nil { return gtserror.Newf("error writing emoji to storage: %w", err) } // Once again check size in case none was provided previously. - if size := bytesize.Size(sz); size > maxSize { - + if size := bytesize.Size(wroteSize); size > maxSize { if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImagePath); err != nil { log.Errorf(ctx, "error removing too-large-emoji from storage: %v", err) } @@ -251,7 +269,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { } // Fill in remaining attachment data now it's stored. - p.emoji.ImageURL = uris.GenerateURIForAttachment( + p.emoji.ImageURL = uris.URIForAttachment( instanceAccID, string(TypeEmoji), string(SizeOriginal), @@ -259,11 +277,8 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { info.Extension, ) p.emoji.ImageContentType = info.MIME.Value - p.emoji.ImageFileSize = int(sz) - p.emoji.Cached = func() *bool { - ok := true - return &ok - }() + p.emoji.ImageFileSize = int(wroteSize) + p.emoji.Cached = util.Ptr(true) return nil } diff --git a/internal/media/processingmedia.go b/internal/media/processingmedia.go index 591cbac74..3627d8cd3 100644 --- a/internal/media/processingmedia.go +++ b/internal/media/processingmedia.go @@ -20,12 +20,12 @@ package media import ( "bytes" "context" - "fmt" + "errors" "image/jpeg" "io" "time" - "codeberg.org/gruf/go-errors/v2" + errorsv2 "codeberg.org/gruf/go-errors/v2" "codeberg.org/gruf/go-runners" "github.com/disintegration/imaging" "github.com/h2non/filetype" @@ -33,11 +33,14 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/storage" "github.com/superseriousbusiness/gotosocial/internal/uris" + "github.com/superseriousbusiness/gotosocial/internal/util" ) -// ProcessingMedia represents a piece of media that is currently being processed. It exposes -// various functions for retrieving data from the process. +// ProcessingMedia represents a piece of media +// currently being processed. It exposes functions +// for retrieving data from the process. type ProcessingMedia struct { media *gtsmodel.MediaAttachment // processing media attachment details dataFn DataFunc // load-data function, returns media stream @@ -48,39 +51,56 @@ type ProcessingMedia struct { mgr *Manager // mgr instance (access to db / storage) } -// AttachmentID returns the ID of the underlying media attachment without blocking processing. +// AttachmentID returns the ID of the underlying +// media attachment without blocking processing. func (p *ProcessingMedia) AttachmentID() string { return p.media.ID // immutable, safe outside mutex. } -// LoadAttachment blocks until the thumbnail and fullsize content has been processed, and then returns the completed attachment. +// LoadAttachment blocks until the thumbnail and +// fullsize content has been processed, and then +// returns the attachment. +// +// If processing could not be completed fully +// then an error will be returned. The attachment +// will still be returned in that case, but it will +// only be partially complete and should be treated +// as a placeholder. func (p *ProcessingMedia) LoadAttachment(ctx context.Context) (*gtsmodel.MediaAttachment, error) { // Attempt to load synchronously. media, done, err := p.load(ctx) - if err == nil { // No issue, return media. return media, nil } if !done { - // Provided context was cancelled, e.g. request cancelled - // early. Queue this item for asynchronous processing. + // Provided context was cancelled, + // e.g. request aborted early before + // its context could be used to finish + // loading the attachment. Enqueue for + // asynchronous processing, which will + // use a background context. log.Warnf(ctx, "reprocessing media %s after canceled ctx", p.media.ID) go p.mgr.state.Workers.Media.Enqueue(p.Process) } - return nil, err + // Media could not be retrieved FULLY, + // but partial attachment should be present. + return media, err } -// Process allows the receiving object to fit the runners.WorkerFunc signature. It performs a (blocking) load and logs on error. +// Process allows the receiving object to fit the +// runners.WorkerFunc signature. It performs a +// (blocking) load and logs on error. func (p *ProcessingMedia) Process(ctx context.Context) { if _, _, err := p.load(ctx); err != nil { - log.Errorf(ctx, "error processing media: %v", err) + log.Errorf(ctx, "error(s) processing media: %v", err) } } -// load performs a concurrency-safe load of ProcessingMedia, only marking itself as complete when returned error is NOT a context cancel. +// load performs a concurrency-safe load of ProcessingMedia, only +// marking itself as complete when returned error is NOT a context cancel. func (p *ProcessingMedia) load(ctx context.Context) (*gtsmodel.MediaAttachment, bool, error) { var ( done bool @@ -95,7 +115,7 @@ func (p *ProcessingMedia) load(ctx context.Context) (*gtsmodel.MediaAttachment, defer func() { // This is only done when ctx NOT cancelled. - done = err == nil || !errors.Comparable(err, + done = err == nil || !errorsv2.Comparable(err, context.Canceled, context.DeadlineExceeded, ) @@ -109,34 +129,61 @@ func (p *ProcessingMedia) load(ctx context.Context) (*gtsmodel.MediaAttachment, p.err = err }() + // Gather errors as we proceed. + var errs = gtserror.NewMultiError(4) + // Attempt to store media and calculate // full-size media attachment details. - if err = p.store(ctx); err != nil { - return err + // + // This will update p.media as it goes. + storeErr := p.store(ctx) + if storeErr != nil { + errs.Append(storeErr) } // Finish processing by reloading media into // memory to get dimension and generate a thumb. - if err = p.finish(ctx); err != nil { - return err + // + // This will update p.media as it goes. + if finishErr := p.finish(ctx); finishErr != nil { + errs.Append(finishErr) + } + + // If this isn't a file we were able to process, + // we may have partially stored it (eg., it's a + // jpeg, which is fine, but streaming it to storage + // was interrupted halfway through and so it was + // never decoded). Try to clean up in this case. + if p.media.Type == gtsmodel.FileTypeUnknown { + deleteErr := p.mgr.state.Storage.Delete(ctx, p.media.File.Path) + if deleteErr != nil && !errors.Is(deleteErr, storage.ErrNotFound) { + errs.Append(deleteErr) + } + } + + var dbErr error + switch { + case !p.recache: + // First time caching this attachment, insert it. + dbErr = p.mgr.state.DB.PutAttachment(ctx, p.media) + + case p.recache && len(errs) == 0: + // Existing attachment we're recaching, update it. + // + // (We only want to update if everything went OK so far, + // otherwise we'd better leave previous version alone.) + dbErr = p.mgr.state.DB.UpdateAttachment(ctx, p.media) } - if p.recache { - // Existing attachment we're recaching, so only update. - err = p.mgr.state.DB.UpdateAttachment(ctx, p.media) - return err + if dbErr != nil { + errs.Append(dbErr) } - // First time caching this attachment, insert it. - err = p.mgr.state.DB.PutAttachment(ctx, p.media) + err = errs.Combine() return err }) - if err != nil { - return nil, done, err - } - - return p.media, done, nil + return p.media, done, err } // store calls the data function attached to p if it hasn't been called yet, @@ -156,17 +203,47 @@ func (p *ProcessingMedia) store(ctx context.Context) error { } }() - // Byte buffer to read file header into. - // See: https://en.wikipedia.org/wiki/File_format#File_header - // and https://github.com/h2non/filetype - hdrBuf := make([]byte, 261) + // Assume we're given correct file + // size, we can overwrite this later + // once we know THE TRUTH. + fileSize := int(sz) + p.media.File.FileSize = fileSize + + // Prepare to read bytes from + // file header or magic number. + hdrBuf := newHdrBuf(fileSize) + + // Read into buffer as much as possible. + // + // UnexpectedEOF means we couldn't read up to the + // given size, but we may still have read something. + // + // EOF means we couldn't read anything at all. + // + // Any other error likely means the connection messed up. + // + // In other words, rather counterintuitively, we + // can only proceed on no error or unexpected error! + n, err := io.ReadFull(rc, hdrBuf) + if err != nil { + if err != io.ErrUnexpectedEOF { + return gtserror.Newf("error reading first bytes of incoming media: %w", err) + } - // Read the first 261 header bytes into buffer as much as possible. - if _, err := rc.Read(hdrBuf); err != nil { - return gtserror.Newf("error reading incoming media: %w", err) + // Initial file size was misreported, so we didn't read + // fully into hdrBuf. Reslice it to the size we did read. + log.Warnf(ctx, + "recovered from misreported file size; reported %d; read %d", + fileSize, n, + ) + hdrBuf = hdrBuf[:n] + fileSize = n + p.media.File.FileSize = fileSize } // Parse file type info from header buffer. + // This should only ever error if the buffer + // is empty (ie., the attachment is 0 bytes). info, err := filetype.Match(hdrBuf) if err != nil { return gtserror.Newf("error parsing file type: %w", err) @@ -175,38 +252,77 @@ func (p *ProcessingMedia) store(ctx context.Context) error { // Recombine header bytes with remaining stream r := io.MultiReader(bytes.NewReader(hdrBuf), rc) + // Assume we'll put + // this file in storage. + store := true + switch info.Extension { case "mp4": - p.media.Type = gtsmodel.FileTypeVideo + // No problem. case "gif": - p.media.Type = gtsmodel.FileTypeImage + // No problem case "jpg", "jpeg", "png", "webp": - p.media.Type = gtsmodel.FileTypeImage - if sz > 0 { - // A file size was provided so we can clean exif data from image. - r, err = terminator.Terminate(r, int(sz), info.Extension) + if fileSize > 0 { + // A file size was provided so we can clean + // exif data from image as we're streaming it. + r, err = terminator.Terminate(r, fileSize, info.Extension) if err != nil { return gtserror.Newf("error cleaning exif data: %w", err) } } default: - return gtserror.Newf("unsupported file type: %s", info.Extension) + // The file is not a supported format that + // we can process, so we can't do much with it. + log.Warnf(ctx, + "media extension '%s' not officially supported, will be processed as "+ + "type '%s' with minimal metadata, and will not be cached locally", + info.Extension, gtsmodel.FileTypeUnknown, + ) + + // Don't bother storing this. + store = false } + // Fill in correct attachment + // data now we're parsed it. + p.media.URL = uris.URIForAttachment( + p.media.AccountID, + string(TypeAttachment), + string(SizeOriginal), + p.media.ID, + info.Extension, + ) + + // Prefer discovered mime type, fall back to + // generic "this contains some bytes" type. + mime := info.MIME.Value + if mime == "" { + mime = "application/octet-stream" + } + p.media.File.ContentType = mime + // Calculate attachment file path. - p.media.File.Path = fmt.Sprintf( - "%s/%s/%s/%s.%s", + p.media.File.Path = uris.StoragePathForAttachment( p.media.AccountID, - TypeAttachment, - SizeOriginal, + string(TypeAttachment), + string(SizeOriginal), p.media.ID, info.Extension, ) - // This shouldn't already exist, but we do a check as it's worth logging. + // We should only try to store the file if it's + // a format we can keep processing, otherwise be + // a bit cheeky: don't store it and let users + // click through to the remote server instead. + if !store { + return nil + } + + // File shouldn't already exist in storage at this point, + // but we do a check as it's worth logging / cleaning up. if have, _ := p.mgr.state.Storage.Has(ctx, p.media.File.Path); have { log.Warnf(ctx, "media already exists at storage path: %s", p.media.File.Path) @@ -216,59 +332,99 @@ func (p *ProcessingMedia) store(ctx context.Context) error { } } - // Write the final image reader stream to our storage. - sz, err = p.mgr.state.Storage.PutStream(ctx, p.media.File.Path, r) + // Write the final reader stream to our storage. + wroteSize, err := p.mgr.state.Storage.PutStream(ctx, p.media.File.Path, r) if err != nil { return gtserror.Newf("error writing media to storage: %w", err) } - // Set written image size. - p.media.File.FileSize = int(sz) + // Set actual written size + // as authoritative file size. + p.media.File.FileSize = int(wroteSize) + + // We can now consider this cached. + p.media.Cached = util.Ptr(true) + + return nil +} - // Fill in remaining attachment data now it's stored. - p.media.URL = uris.GenerateURIForAttachment( +func (p *ProcessingMedia) finish(ctx context.Context) error { + // Make a jolly assumption about thumbnail type. + p.media.Thumbnail.ContentType = mimeImageJpeg + + // Calculate attachment thumbnail file path + p.media.Thumbnail.Path = uris.StoragePathForAttachment( p.media.AccountID, string(TypeAttachment), - string(SizeOriginal), + string(SizeSmall), p.media.ID, - info.Extension, + // Always encode attachment + // thumbnails as jpg. + "jpg", ) - p.media.File.ContentType = info.MIME.Value - p.media.Cached = func() *bool { - ok := true - return &ok - }() - return nil -} + // Calculate attachment thumbnail serve path. + p.media.Thumbnail.URL = uris.URIForAttachment( + p.media.AccountID, + string(TypeAttachment), + string(SizeSmall), + p.media.ID, + // Always encode attachment + // thumbnails as jpg. + "jpg", + ) -func (p *ProcessingMedia) finish(ctx context.Context) error { - // Fetch a stream to the original file in storage. + // If original file hasn't been stored, there's + // likely something wrong with the data, or we + // don't want to store it. Skip everything else. + if !*p.media.Cached { + p.media.Processing = gtsmodel.ProcessingStatusProcessed + return nil + } + + // Get a stream to the original file for further processing. rc, err := p.mgr.state.Storage.GetStream(ctx, p.media.File.Path) if err != nil { return gtserror.Newf("error loading file from storage: %w", err) } defer rc.Close() + // fullImg is the processed version of + // the original (stripped + reoriented). var fullImg *gtsImage + // Depending on the content type, we + // can do various types of decoding. switch p.media.File.ContentType { + // .jpeg, .gif, .webp image type case mimeImageJpeg, mimeImageGif, mimeImageWebp: - fullImg, err = decodeImage(rc, imaging.AutoOrientation(true)) + fullImg, err = decodeImage( + rc, + imaging.AutoOrientation(true), + ) if err != nil { return gtserror.Newf("error decoding image: %w", err) } + // Mark as no longer unknown type now + // we know for sure we can decode it. + p.media.Type = gtsmodel.FileTypeImage + // .png image (requires ancillary chunk stripping) case mimeImagePng: - fullImg, err = decodeImage(&pngAncillaryChunkStripper{ - Reader: rc, - }, imaging.AutoOrientation(true)) + fullImg, err = decodeImage( + &pngAncillaryChunkStripper{Reader: rc}, + imaging.AutoOrientation(true), + ) if err != nil { return gtserror.Newf("error decoding image: %w", err) } + // Mark as no longer unknown type now + // we know for sure we can decode it. + p.media.Type = gtsmodel.FileTypeImage + // .mp4 video type case mimeVideoMp4: video, err := decodeVideoFrame(rc) @@ -283,9 +439,14 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { p.media.FileMeta.Original.Duration = &video.duration p.media.FileMeta.Original.Framerate = &video.framerate p.media.FileMeta.Original.Bitrate = &video.bitrate + + // Mark as no longer unknown type now + // we know for sure we can decode it. + p.media.Type = gtsmodel.FileTypeVideo } - // The image should be in-memory by now. + // fullImg should be in-memory by + // now so we're done with storage. if err := rc.Close(); err != nil { return gtserror.Newf("error closing file: %w", err) } @@ -296,15 +457,6 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { p.media.FileMeta.Original.Size = int(fullImg.Size()) p.media.FileMeta.Original.Aspect = fullImg.AspectRatio() - // Calculate attachment thumbnail file path - p.media.Thumbnail.Path = fmt.Sprintf( - "%s/%s/%s/%s.jpg", - p.media.AccountID, - TypeAttachment, - SizeSmall, - p.media.ID, - ) - // Get smaller thumbnail image thumbImg := fullImg.Thumbnail() @@ -312,16 +464,20 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { // now take our large son. fullImg = nil - // Blurhash needs generating from thumb. - hash, err := thumbImg.Blurhash() - if err != nil { - return gtserror.Newf("error generating blurhash: %w", err) - } + // Only generate blurhash + // from thumb if necessary. + if p.media.Blurhash == "" { + hash, err := thumbImg.Blurhash() + if err != nil { + return gtserror.Newf("error generating blurhash: %w", err) + } - // Set the attachment blurhash. - p.media.Blurhash = hash + // Set the attachment blurhash. + p.media.Blurhash = hash + } - // This shouldn't already exist, but we do a check as it's worth logging. + // Thumbnail shouldn't already exist in storage at this point, + // but we do a check as it's worth logging / cleaning up. if have, _ := p.mgr.state.Storage.Has(ctx, p.media.Thumbnail.Path); have { log.Warnf(ctx, "thumbnail already exists at storage path: %s", p.media.Thumbnail.Path) @@ -333,7 +489,9 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { // Create a thumbnail JPEG encoder stream. enc := thumbImg.ToJPEG(&jpeg.Options{ - Quality: 70, // enough for a thumbnail. + // Good enough for + // a thumbnail. + Quality: 70, }) // Stream-encode the JPEG thumbnail image into storage. @@ -342,16 +500,6 @@ func (p *ProcessingMedia) finish(ctx context.Context) error { return gtserror.Newf("error stream-encoding thumbnail to storage: %w", err) } - // Fill in remaining thumbnail now it's stored - p.media.Thumbnail.ContentType = mimeImageJpeg - p.media.Thumbnail.URL = uris.GenerateURIForAttachment( - p.media.AccountID, - string(TypeAttachment), - string(SizeSmall), - p.media.ID, - "jpg", // always jpeg - ) - // Set thumbnail dimensions in attachment info. p.media.FileMeta.Small = gtsmodel.Small{ Width: int(thumbImg.Width()), diff --git a/internal/media/test/Frantz-Fanon-The-Wretched-of-the-Earth-1965.pdf b/internal/media/test/Frantz-Fanon-The-Wretched-of-the-Earth-1965.pdf Binary files differnew file mode 100644 index 000000000..b2e4de4a5 --- /dev/null +++ b/internal/media/test/Frantz-Fanon-The-Wretched-of-the-Earth-1965.pdf diff --git a/internal/media/types.go b/internal/media/types.go index 35c62a947..6e7727cd5 100644 --- a/internal/media/types.go +++ b/internal/media/types.go @@ -44,9 +44,6 @@ const ( mimeVideoMp4 = mimeVideo + "/" + mimeMp4 ) -// EmojiMaxBytes is the maximum permitted bytes of an emoji upload (50kb) -// const EmojiMaxBytes = 51200 - type Size string const ( diff --git a/internal/media/util.go b/internal/media/util.go new file mode 100644 index 000000000..1595da6d7 --- /dev/null +++ b/internal/media/util.go @@ -0,0 +1,42 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +package media + +// newHdrBuf returns a buffer of suitable size to +// read bytes from a file header or magic number. +// +// File header is *USUALLY* 261 bytes at the start +// of a file; magic number can be much less than +// that (just a few bytes). +// +// To cover both cases, this function returns a buffer +// suitable for whichever is smallest: the first 261 +// bytes of the file, or the whole file. +// +// See: +// +// - https://en.wikipedia.org/wiki/File_format#File_header +// - https://github.com/h2non/filetype. +func newHdrBuf(fileSize int) []byte { + bufSize := 261 + if fileSize > 0 && fileSize < bufSize { + bufSize = fileSize + } + + return make([]byte, bufSize) +} |