diff options
author | 2023-01-11 11:13:13 +0000 | |
---|---|---|
committer | 2023-01-11 12:13:13 +0100 | |
commit | 53180548083c0a100db2f703d5f5da047a9e0031 (patch) | |
tree | a8eb1df9d03b37f907a747ae42cc8992d2ff9f52 /internal/media | |
parent | [feature] Add local user and post count to nodeinfo responses (#1325) (diff) | |
download | gotosocial-53180548083c0a100db2f703d5f5da047a9e0031.tar.xz |
[performance] media processing improvements (#1288)
* media processor consolidation and reformatting, reduce amount of required syscalls
Signed-off-by: kim <grufwub@gmail.com>
* update go-store library, stream jpeg/png encoding + use buffer pools, improved media processing AlreadyExists error handling
Signed-off-by: kim <grufwub@gmail.com>
* fix duration not being set, fix mp4 test expecting error
Signed-off-by: kim <grufwub@gmail.com>
* fix test expecting media files with different extension
Signed-off-by: kim <grufwub@gmail.com>
* remove unused code
Signed-off-by: kim <grufwub@gmail.com>
* fix expected storage paths in tests, update expected test thumbnails
Signed-off-by: kim <grufwub@gmail.com>
* remove dead code
Signed-off-by: kim <grufwub@gmail.com>
* fix cached presigned s3 url fetching
Signed-off-by: kim <grufwub@gmail.com>
* fix tests
Signed-off-by: kim <grufwub@gmail.com>
* fix test models
Signed-off-by: kim <grufwub@gmail.com>
* update media processing to use sync.Once{} for concurrency protection
Signed-off-by: kim <grufwub@gmail.com>
* shutup linter
Signed-off-by: kim <grufwub@gmail.com>
* fix passing in KVStore GetStream() as stream to PutStream()
Signed-off-by: kim <grufwub@gmail.com>
* fix unlocks of storage keys
Signed-off-by: kim <grufwub@gmail.com>
* whoops, return the error...
Signed-off-by: kim <grufwub@gmail.com>
* pour one out for tobi's code <3
Signed-off-by: kim <grufwub@gmail.com>
* add back the byte slurping code
Signed-off-by: kim <grufwub@gmail.com>
* check for both ErrUnexpectedEOF and EOF
Signed-off-by: kim <grufwub@gmail.com>
* add back links to file format header information
Signed-off-by: kim <grufwub@gmail.com>
Signed-off-by: kim <grufwub@gmail.com>
Diffstat (limited to 'internal/media')
-rw-r--r-- | internal/media/image.go | 273 | ||||
-rw-r--r-- | internal/media/manager.go | 6 | ||||
-rw-r--r-- | internal/media/manager_test.go | 31 | ||||
-rw-r--r-- | internal/media/png-stripper.go | 12 | ||||
-rw-r--r-- | internal/media/processingemoji.go | 373 | ||||
-rw-r--r-- | internal/media/processingmedia.go | 593 | ||||
-rw-r--r-- | internal/media/pruneorphaned_test.go | 4 | ||||
-rw-r--r-- | internal/media/pruneremote_test.go | 2 | ||||
-rw-r--r-- | internal/media/test/longer-mp4-thumbnail.jpg | bin | 3784 -> 2897 bytes | |||
-rw-r--r-- | internal/media/test/test-jpeg-thumbnail.jpg | bin | 22858 -> 20973 bytes | |||
-rw-r--r-- | internal/media/test/test-mp4-thumbnail.jpg | bin | 1912 -> 1913 bytes | |||
-rw-r--r-- | internal/media/test/test-png-alphachannel-thumbnail.jpg | bin | 6446 -> 5984 bytes | |||
-rw-r--r-- | internal/media/test/test-png-noalphachannel-thumbnail.jpg | bin | 6446 -> 5984 bytes | |||
-rw-r--r-- | internal/media/types.go | 29 | ||||
-rw-r--r-- | internal/media/util.go | 96 | ||||
-rw-r--r-- | internal/media/video.go | 130 |
16 files changed, 651 insertions, 898 deletions
diff --git a/internal/media/image.go b/internal/media/image.go index b168c619e..b3eff6bec 100644 --- a/internal/media/image.go +++ b/internal/media/image.go @@ -19,182 +19,167 @@ package media import ( - "bytes" - "errors" - "fmt" + "bufio" "image" - "image/gif" + "image/color" + "image/draw" "image/jpeg" "image/png" "io" + "sync" "github.com/buckket/go-blurhash" "github.com/disintegration/imaging" - _ "golang.org/x/image/webp" // blank import to support WebP decoding + "github.com/superseriousbusiness/gotosocial/internal/iotools" + + // import to init webp encode/decoding. + _ "golang.org/x/image/webp" ) -const ( - thumbnailMaxWidth = 512 - thumbnailMaxHeight = 512 +var ( + // pngEncoder provides our global PNG encoding with + // specified compression level, and memory pooled buffers. + pngEncoder = png.Encoder{ + CompressionLevel: png.DefaultCompression, + BufferPool: &pngEncoderBufferPool{}, + } + + // jpegBufferPool is a memory pool of byte buffers for JPEG encoding. + jpegBufferPool = sync.Pool{ + New: func() any { + return bufio.NewWriter(nil) + }, + } ) -func decodeGif(r io.Reader) (*mediaMeta, error) { - gif, err := gif.DecodeAll(r) +// gtsImage is a thin wrapper around the standard library image +// interface to provide our own useful helper functions for image +// size and aspect ratio calculations, streamed encoding to various +// types, and creating reduced size thumbnail images. +type gtsImage struct{ image image.Image } + +// blankImage generates a blank image of given dimensions. +func blankImage(width int, height int) *gtsImage { + // create a rectangle with the same dimensions as the video + img := image.NewRGBA(image.Rect(0, 0, width, height)) + + // fill the rectangle with our desired fill color. + draw.Draw(img, img.Bounds(), &image.Uniform{ + color.RGBA{42, 43, 47, 0}, + }, image.Point{}, draw.Src) + + return >sImage{image: img} +} + +// decodeImage will decode image from reader stream and return image wrapped in our own gtsImage{} type. +func decodeImage(r io.Reader, opts ...imaging.DecodeOption) (*gtsImage, error) { + img, err := imaging.Decode(r, opts...) if err != nil { return nil, err } + return >sImage{image: img}, nil +} - // use the first frame to get the static characteristics - width := gif.Config.Width - height := gif.Config.Height - size := width * height - aspect := float32(width) / float32(height) - - return &mediaMeta{ - width: width, - height: height, - size: size, - aspect: aspect, - }, nil +// Width returns the image width in pixels. +func (m *gtsImage) Width() uint32 { + return uint32(m.image.Bounds().Size().X) } -func decodeImage(r io.Reader, contentType string) (*mediaMeta, error) { - var i image.Image - var err error - - switch contentType { - case mimeImageJpeg, mimeImageWebp: - i, err = imaging.Decode(r, imaging.AutoOrientation(true)) - case mimeImagePng: - strippedPngReader := io.Reader(&PNGAncillaryChunkStripper{ - Reader: r, - }) - i, err = imaging.Decode(strippedPngReader, imaging.AutoOrientation(true)) - default: - err = fmt.Errorf("content type %s not recognised", contentType) - } +// Height returns the image height in pixels. +func (m *gtsImage) Height() uint32 { + return uint32(m.image.Bounds().Size().Y) +} - if err != nil { - return nil, err - } +// Size returns the total number of image pixels. +func (m *gtsImage) Size() uint64 { + return uint64(m.image.Bounds().Size().X) * + uint64(m.image.Bounds().Size().Y) +} + +// AspectRatio returns the image ratio of width:height. +func (m *gtsImage) AspectRatio() float32 { + return float32(m.image.Bounds().Size().X) / + float32(m.image.Bounds().Size().Y) +} - if i == nil { - return nil, errors.New("processed image was nil") +// Thumbnail returns a small sized copy of gtsImage{}, limited to 512x512 if not small enough. +func (m *gtsImage) Thumbnail() *gtsImage { + const ( + // max thumb + // dimensions. + maxWidth = 512 + maxHeight = 512 + ) + + // Check the receiving image is within max thumnail bounds. + if m.Width() <= maxWidth && m.Height() <= maxHeight { + return >sImage{image: imaging.Clone(m.image)} } - width := i.Bounds().Size().X - height := i.Bounds().Size().Y - size := width * height - aspect := float32(width) / float32(height) - - return &mediaMeta{ - width: width, - height: height, - size: size, - aspect: aspect, - }, nil + // Image is too large, needs to be resized to thumbnail max. + img := imaging.Fit(m.image, maxWidth, maxHeight, imaging.Linear) + return >sImage{image: img} } -// deriveStaticEmojji takes a given gif or png of an emoji, decodes it, and re-encodes it as a static png. -func deriveStaticEmoji(r io.Reader, contentType string) (*mediaMeta, error) { - var i image.Image - var err error - - switch contentType { - case mimeImagePng: - i, err = StrippedPngDecode(r) - if err != nil { - return nil, err - } - case mimeImageGif: - i, err = gif.Decode(r) - if err != nil { - return nil, err - } - default: - return nil, fmt.Errorf("content type %s not allowed for emoji", contentType) - } +// Blurhash calculates the blurhash for the receiving image data. +func (m *gtsImage) Blurhash() (string, error) { + // for generating blurhashes, it's more cost effective to + // lose detail since it's blurry, so make a tiny version. + tiny := imaging.Resize(m.image, 32, 0, imaging.NearestNeighbor) - out := &bytes.Buffer{} - if err := png.Encode(out, i); err != nil { - return nil, err - } - return &mediaMeta{ - small: out.Bytes(), - }, nil + // Encode blurhash from resized version + return blurhash.Encode(4, 3, tiny) } -// deriveThumbnailFromImage returns a byte slice and metadata for a thumbnail -// of a given piece of media, or an error if something goes wrong. -// -// If createBlurhash is true, then a blurhash will also be generated from a tiny -// version of the image. This costs precious CPU cycles, so only use it if you -// really need a blurhash and don't have one already. -// -// If createBlurhash is false, then the blurhash field on the returned ImageAndMeta -// will be an empty string. -func deriveThumbnailFromImage(r io.Reader, contentType string, createBlurhash bool) (*mediaMeta, error) { - var i image.Image - var err error - - switch contentType { - case mimeImageJpeg, mimeImageGif, mimeImageWebp: - i, err = imaging.Decode(r, imaging.AutoOrientation(true)) - case mimeImagePng: - strippedPngReader := io.Reader(&PNGAncillaryChunkStripper{ - Reader: r, - }) - i, err = imaging.Decode(strippedPngReader, imaging.AutoOrientation(true)) - default: - err = fmt.Errorf("content type %s can't be thumbnailed as an image", contentType) - } +// ToJPEG creates a new streaming JPEG encoder from receiving image, and a size ptr +// which stores the number of bytes written during the image encoding process. +func (m *gtsImage) ToJPEG(opts *jpeg.Options) io.Reader { + return iotools.StreamWriteFunc(func(w io.Writer) error { + // Get encoding buffer + bw := getJPEGBuffer(w) - if err != nil { - return nil, fmt.Errorf("error decoding %s: %s", contentType, err) - } + // Encode JPEG to buffered writer. + err := jpeg.Encode(bw, m.image, opts) - originalX := i.Bounds().Size().X - originalY := i.Bounds().Size().Y + // Replace buffer. + // + // NOTE: jpeg.Encode() already + // performs a bufio.Writer.Flush(). + putJPEGBuffer(bw) - var thumb image.Image - if originalX <= thumbnailMaxWidth && originalY <= thumbnailMaxHeight { - // it's already small, no need to resize - thumb = i - } else { - thumb = imaging.Fit(i, thumbnailMaxWidth, thumbnailMaxHeight, imaging.Linear) - } + return err + }) +} - thumbX := thumb.Bounds().Size().X - thumbY := thumb.Bounds().Size().Y - size := thumbX * thumbY - aspect := float32(thumbX) / float32(thumbY) +// ToPNG creates a new streaming PNG encoder from receiving image, and a size ptr +// which stores the number of bytes written during the image encoding process. +func (m *gtsImage) ToPNG() io.Reader { + return iotools.StreamWriteFunc(func(w io.Writer) error { + return pngEncoder.Encode(w, m.image) + }) +} - im := &mediaMeta{ - width: thumbX, - height: thumbY, - size: size, - aspect: aspect, - } +// getJPEGBuffer fetches a reset JPEG encoding buffer from global JPEG buffer pool. +func getJPEGBuffer(w io.Writer) *bufio.Writer { + buf, _ := jpegBufferPool.Get().(*bufio.Writer) + buf.Reset(w) + return buf +} - if createBlurhash { - // for generating blurhashes, it's more cost effective to lose detail rather than - // pass a big image into the blurhash algorithm, so make a teeny tiny version - tiny := imaging.Resize(thumb, 32, 0, imaging.NearestNeighbor) - bh, err := blurhash.Encode(4, 3, tiny) - if err != nil { - return nil, fmt.Errorf("error creating blurhash: %s", err) - } - im.blurhash = bh - } +// putJPEGBuffer resets the given bufio writer and places in global JPEG buffer pool. +func putJPEGBuffer(buf *bufio.Writer) { + buf.Reset(nil) + jpegBufferPool.Put(buf) +} - out := &bytes.Buffer{} - if err := jpeg.Encode(out, thumb, &jpeg.Options{ - // Quality isn't extremely important for thumbnails, so 75 is "good enough" - Quality: 75, - }); err != nil { - return nil, fmt.Errorf("error encoding thumbnail: %s", err) - } - im.small = out.Bytes() +// pngEncoderBufferPool implements png.EncoderBufferPool. +type pngEncoderBufferPool sync.Pool + +func (p *pngEncoderBufferPool) Get() *png.EncoderBuffer { + buf, _ := (*sync.Pool)(p).Get().(*png.EncoderBuffer) + return buf +} - return im, nil +func (p *pngEncoderBufferPool) Put(buf *png.EncoderBuffer) { + (*sync.Pool)(p).Put(buf) } diff --git a/internal/media/manager.go b/internal/media/manager.go index 9b1d87673..44483787a 100644 --- a/internal/media/manager.go +++ b/internal/media/manager.go @@ -148,9 +148,6 @@ func NewManager(database db.DB, storage *storage.Driver) (Manager, error) { // Prepare the media worker pool m.mediaWorker = concurrency.NewWorkerPool[*ProcessingMedia](-1, 10) m.mediaWorker.SetProcessor(func(ctx context.Context, media *ProcessingMedia) error { - if err := ctx.Err(); err != nil { - return err - } if _, err := media.LoadAttachment(ctx); err != nil { return fmt.Errorf("error loading media %s: %v", media.AttachmentID(), err) } @@ -160,9 +157,6 @@ func NewManager(database db.DB, storage *storage.Driver) (Manager, error) { // Prepare the emoji worker pool m.emojiWorker = concurrency.NewWorkerPool[*ProcessingEmoji](-1, 10) m.emojiWorker.SetProcessor(func(ctx context.Context, emoji *ProcessingEmoji) error { - if err := ctx.Err(); err != nil { - return err - } if _, err := emoji.LoadEmoji(ctx); err != nil { return fmt.Errorf("error loading emoji %s: %v", emoji.EmojiID(), err) } diff --git a/internal/media/manager_test.go b/internal/media/manager_test.go index 1abf8c3ce..8febaddae 100644 --- a/internal/media/manager_test.go +++ b/internal/media/manager_test.go @@ -26,6 +26,7 @@ import ( "os" "path" "testing" + "time" "codeberg.org/gruf/go-store/v2/kv" "codeberg.org/gruf/go-store/v2/storage" @@ -33,7 +34,6 @@ import ( gtsmodel "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/media" gtsstorage "github.com/superseriousbusiness/gotosocial/internal/storage" - "github.com/superseriousbusiness/gotosocial/testrig" ) type ManagerTestSuite struct { @@ -214,7 +214,7 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLarge() { // do a blocking call to fetch the emoji emoji, err := processingEmoji.LoadEmoji(ctx) - suite.EqualError(err, "store: given emoji fileSize (645688b) is larger than allowed size (51200b)") + suite.EqualError(err, "given emoji size 630kiB greater than max allowed 50.0kiB") suite.Nil(emoji) } @@ -227,7 +227,7 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLargeNoSizeGiven() { if err != nil { panic(err) } - return io.NopCloser(bytes.NewBuffer(b)), int64(len(b)), nil + return io.NopCloser(bytes.NewBuffer(b)), -1, nil } emojiID := "01GDQ9G782X42BAMFASKP64343" @@ -238,7 +238,7 @@ func (suite *ManagerTestSuite) TestEmojiProcessBlockingTooLargeNoSizeGiven() { // do a blocking call to fetch the emoji emoji, err := processingEmoji.LoadEmoji(ctx) - suite.EqualError(err, "store: given emoji fileSize (645688b) is larger than allowed size (51200b)") + suite.EqualError(err, "calculated emoji size 630kiB greater than max allowed 50.0kiB") suite.Nil(emoji) } @@ -396,6 +396,9 @@ func (suite *ManagerTestSuite) TestSlothVineProcessBlocking() { // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() + // Give time for processing + time.Sleep(time.Second * 3) + // do a blocking call to fetch the attachment attachment, err := processingMedia.LoadAttachment(ctx) suite.NoError(err) @@ -420,7 +423,7 @@ func (suite *ManagerTestSuite) TestSlothVineProcessBlocking() { suite.Equal("video/mp4", attachment.File.ContentType) suite.Equal("image/jpeg", attachment.Thumbnail.ContentType) suite.Equal(312413, attachment.File.FileSize) - suite.Equal("", attachment.Blurhash) + suite.Equal("L00000fQfQfQfQfQfQfQfQfQfQfQ", attachment.Blurhash) // now make sure the attachment is in the database dbAttachment, err := suite.db.GetAttachmentByID(ctx, attachmentID) @@ -491,12 +494,12 @@ func (suite *ManagerTestSuite) TestLongerMp4ProcessBlocking() { suite.EqualValues(10, *attachment.FileMeta.Original.Framerate) suite.EqualValues(0xc8fb, *attachment.FileMeta.Original.Bitrate) suite.EqualValues(gtsmodel.Small{ - Width: 600, Height: 330, Size: 198000, Aspect: 1.8181819, + Width: 512, Height: 281, Size: 143872, Aspect: 1.822064, }, attachment.FileMeta.Small) suite.Equal("video/mp4", attachment.File.ContentType) suite.Equal("image/jpeg", attachment.Thumbnail.ContentType) suite.Equal(109549, attachment.File.FileSize) - suite.Equal("", attachment.Blurhash) + suite.Equal("L00000fQfQfQfQfQfQfQfQfQfQfQ", attachment.Blurhash) // now make sure the attachment is in the database dbAttachment, err := suite.db.GetAttachmentByID(ctx, attachmentID) @@ -550,7 +553,7 @@ func (suite *ManagerTestSuite) TestNotAnMp4ProcessBlocking() { // we should get an error while loading attachment, err := processingMedia.LoadAttachment(ctx) - suite.EqualError(err, "\"video width could not be discovered\",\"video height could not be discovered\",\"video duration could not be discovered\",\"video framerate could not be discovered\",\"video bitrate could not be discovered\"") + suite.EqualError(err, "error decoding video: error determining video metadata: [width height duration framerate bitrate]") suite.Nil(attachment) } @@ -928,7 +931,8 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessBlockingWithCallback() { } func (suite *ManagerTestSuite) TestSimpleJpegProcessAsync() { - ctx := context.Background() + ctx, cncl := context.WithTimeout(context.Background(), time.Second*30) + defer cncl() data := func(_ context.Context) (io.ReadCloser, int64, error) { // load bytes from a test image @@ -944,15 +948,12 @@ func (suite *ManagerTestSuite) TestSimpleJpegProcessAsync() { // process the media with no additional info provided processingMedia, err := suite.manager.ProcessMedia(ctx, data, nil, accountID, nil) suite.NoError(err) + // fetch the attachment id from the processing media attachmentID := processingMedia.AttachmentID() - // wait for the media to finish processing - if !testrig.WaitFor(func() bool { - return processingMedia.Finished() - }) { - suite.FailNow("timed out waiting for media to be processed") - } + // Give time for processing to happen. + time.Sleep(time.Second * 3) // fetch the attachment from the database attachment, err := suite.db.GetAttachmentByID(ctx, attachmentID) diff --git a/internal/media/png-stripper.go b/internal/media/png-stripper.go index be5e80387..79b0bac05 100644 --- a/internal/media/png-stripper.go +++ b/internal/media/png-stripper.go @@ -75,8 +75,6 @@ package media import ( "encoding/binary" - "image" - "image/png" "io" ) @@ -192,13 +190,3 @@ func (r *PNGAncillaryChunkStripper) Read(p []byte) (int, error) { } } } - -// StrippedPngDecode strips ancillary data from png to allow more lenient decoding of pngs -// see: https://github.com/golang/go/issues/43382 -// and: https://github.com/google/wuffs/blob/414a011491ff513b86d8694c5d71800f3cb5a715/script/strip-png-ancillary-chunks.go -func StrippedPngDecode(r io.Reader) (image.Image, error) { - strippedPngReader := io.Reader(&PNGAncillaryChunkStripper{ - Reader: r, - }) - return png.Decode(strippedPngReader) -} diff --git a/internal/media/processingemoji.go b/internal/media/processingemoji.go index de47d23a8..b68c9dfe1 100644 --- a/internal/media/processingemoji.go +++ b/internal/media/processingemoji.go @@ -24,84 +24,74 @@ import ( "errors" "fmt" "io" - "strings" "sync" - "sync/atomic" "time" + "codeberg.org/gruf/go-bytesize" gostore "codeberg.org/gruf/go-store/v2/storage" + "github.com/h2non/filetype" "github.com/superseriousbusiness/gotosocial/internal/config" - "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/id" "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/storage" "github.com/superseriousbusiness/gotosocial/internal/uris" ) // ProcessingEmoji represents an emoji currently processing. It exposes // various functions for retrieving data from the process. type ProcessingEmoji struct { - mu sync.Mutex - - // id of this instance's account -- pinned for convenience here so we only need to fetch it once - instanceAccountID string - - /* - below fields should be set on newly created media; - emoji will be updated incrementally as media goes through processing - */ - - emoji *gtsmodel.Emoji - data DataFunc - postData PostDataCallbackFunc - read bool // bool indicating that data function has been triggered already - - /* - below fields represent the processing state of the static of the emoji - */ - staticState int32 - - /* - below pointers to database and storage are maintained so that - the media can store and update itself during processing steps - */ - - database db.DB - storage *storage.Driver - - err error // error created during processing, if any - - // track whether this emoji has already been put in the databse - insertedInDB bool - - // is this a refresh of an existing emoji? - refresh bool - // if it is a refresh, which alternate ID should we use in the storage and URL paths? - newPathID string + instAccID string // instance account ID + emoji *gtsmodel.Emoji // processing emoji details + refresh bool // whether this is an existing emoji being refreshed + newPathID string // new emoji path ID to use if refreshed + dataFn DataFunc // load-data function, returns media stream + postFn PostDataCallbackFunc // post data callback function + err error // error encountered during processing + manager *manager // manager instance (access to db / storage) + once sync.Once // once ensures processing only occurs once } // EmojiID returns the ID of the underlying emoji without blocking processing. func (p *ProcessingEmoji) EmojiID() string { - return p.emoji.ID + return p.emoji.ID // immutable, safe outside mutex. } // LoadEmoji blocks until the static and fullsize image // has been processed, and then returns the completed emoji. func (p *ProcessingEmoji) LoadEmoji(ctx context.Context) (*gtsmodel.Emoji, error) { - p.mu.Lock() - defer p.mu.Unlock() + // only process once. + p.once.Do(func() { + var err error + + defer func() { + if r := recover(); r != nil { + if err != nil { + rOld := r // wrap the panic so we don't lose existing returned error + r = fmt.Errorf("panic occured after error %q: %v", err.Error(), rOld) + } - if err := p.store(ctx); err != nil { - return nil, err - } + // Catch any panics and wrap as error. + err = fmt.Errorf("caught panic: %v", r) + } - if err := p.loadStatic(ctx); err != nil { - return nil, err - } + if err != nil { + // Store error. + p.err = err + } + }() + + // Attempt to store media and calculate + // full-size media attachment details. + if err = p.store(ctx); err != nil { + return + } + + // Finish processing by reloading media into + // memory to get dimension and generate a thumb. + if err = p.finish(ctx); err != nil { + return + } - // store the result in the database before returning it - if !p.insertedInDB { if p.refresh { columns := []string{ "updated_at", @@ -118,176 +108,195 @@ func (p *ProcessingEmoji) LoadEmoji(ctx context.Context) (*gtsmodel.Emoji, error "shortcode", "uri", } - if _, err := p.database.UpdateEmoji(ctx, p.emoji, columns...); err != nil { - return nil, err - } - } else { - if err := p.database.PutEmoji(ctx, p.emoji); err != nil { - return nil, err - } - } - p.insertedInDB = true - } - - return p.emoji, nil -} - -// Finished returns true if processing has finished for both the thumbnail -// and full fized version of this piece of media. -func (p *ProcessingEmoji) Finished() bool { - return atomic.LoadInt32(&p.staticState) == int32(complete) -} -func (p *ProcessingEmoji) loadStatic(ctx context.Context) error { - staticState := atomic.LoadInt32(&p.staticState) - switch processState(staticState) { - case received: - // stream the original file out of storage... - stored, err := p.storage.GetStream(ctx, p.emoji.ImagePath) - if err != nil { - p.err = fmt.Errorf("loadStatic: error fetching file from storage: %s", err) - atomic.StoreInt32(&p.staticState, int32(errored)) - return p.err + // Existing emoji we're refreshing, so only need to update. + _, err = p.manager.db.UpdateEmoji(ctx, p.emoji, columns...) + return } - defer stored.Close() - // we haven't processed a static version of this emoji yet so do it now - static, err := deriveStaticEmoji(stored, p.emoji.ImageContentType) - if err != nil { - p.err = fmt.Errorf("loadStatic: error deriving static: %s", err) - atomic.StoreInt32(&p.staticState, int32(errored)) - return p.err - } - - // Close stored emoji now we're done - if err := stored.Close(); err != nil { - log.Errorf("loadStatic: error closing stored full size: %s", err) - } - - // put the static image in storage - if err := p.storage.Put(ctx, p.emoji.ImageStaticPath, static.small); err != nil && err != storage.ErrAlreadyExists { - p.err = fmt.Errorf("loadStatic: error storing static: %s", err) - atomic.StoreInt32(&p.staticState, int32(errored)) - return p.err - } + // New emoji media, first time caching. + err = p.manager.db.PutEmoji(ctx, p.emoji) + return //nolint shutup linter i like this here + }) - p.emoji.ImageStaticFileSize = len(static.small) - - // we're done processing the static version of the emoji! - atomic.StoreInt32(&p.staticState, int32(complete)) - fallthrough - case complete: - return nil - case errored: - return p.err + if p.err != nil { + return nil, p.err } - return fmt.Errorf("static processing status %d unknown", p.staticState) + return p.emoji, nil } // store calls the data function attached to p if it hasn't been called yet, // and updates the underlying attachment fields as necessary. It will then stream // bytes from p's reader directly into storage so that it can be retrieved later. func (p *ProcessingEmoji) store(ctx context.Context) error { - // check if we've already done this and bail early if we have - if p.read { - return nil - } + defer func() { + if p.postFn == nil { + return + } - // execute the data function to get the readcloser out of it - rc, fileSize, err := p.data(ctx) + // Ensure post callback gets called. + if err := p.postFn(ctx); err != nil { + log.Errorf("error executing postdata function: %v", err) + } + }() + + // Load media from provided data fn. + rc, sz, err := p.dataFn(ctx) if err != nil { - return fmt.Errorf("store: error executing data function: %s", err) + return fmt.Errorf("error executing data function: %w", err) } - // defer closing the reader when we're done with it defer func() { + // Ensure data reader gets closed on return. if err := rc.Close(); err != nil { - log.Errorf("store: error closing readcloser: %s", err) + log.Errorf("error closing data reader: %v", err) } }() - // execute the postData function no matter what happens - defer func() { - if p.postData != nil { - if err := p.postData(ctx); err != nil { - log.Errorf("store: error executing postData: %s", err) - } - } - }() + // Byte buffer to read file header into. + // See: https://en.wikipedia.org/wiki/File_format#File_header + // and https://github.com/h2non/filetype + hdrBuf := make([]byte, 261) - // extract no more than 261 bytes from the beginning of the file -- this is the header - firstBytes := make([]byte, maxFileHeaderBytes) - if _, err := rc.Read(firstBytes); err != nil { - return fmt.Errorf("store: error reading initial %d bytes: %s", maxFileHeaderBytes, err) + // Read the first 261 header bytes into buffer. + if _, err := io.ReadFull(rc, hdrBuf); err != nil { + return fmt.Errorf("error reading incoming media: %w", err) } - // now we have the file header we can work out the content type from it - contentType, err := parseContentType(firstBytes) + // Parse file type info from header buffer. + info, err := filetype.Match(hdrBuf) if err != nil { - return fmt.Errorf("store: error parsing content type: %s", err) + return fmt.Errorf("error parsing file type: %w", err) } - // bail if this is a type we can't process - if !supportedEmoji(contentType) { - return fmt.Errorf("store: content type %s was not valid for an emoji", contentType) + switch info.Extension { + // only supported emoji types + case "gif", "png": + + // unhandled + default: + return fmt.Errorf("unsupported emoji filetype: %s", info.Extension) } - // extract the file extension - split := strings.Split(contentType, "/") - extension := split[1] // something like 'gif' + // Recombine header bytes with remaining stream + r := io.MultiReader(bytes.NewReader(hdrBuf), rc) + + var maxSize bytesize.Size + + if p.emoji.Domain == "" { + // this is a local emoji upload + maxSize = config.GetMediaEmojiLocalMaxSize() + } else { + // this is a remote incoming emoji + maxSize = config.GetMediaEmojiRemoteMaxSize() + } + + // Check that provided size isn't beyond max. We check beforehand + // so that we don't attempt to stream the emoji into storage if not needed. + if size := bytesize.Size(sz); sz > 0 && size > maxSize { + return fmt.Errorf("given emoji size %s greater than max allowed %s", size, maxSize) + } - // set some additional fields on the emoji now that - // we know more about what the underlying image actually is var pathID string + if p.refresh { + // This is a refreshed emoji with a new + // path ID that this will be stored under. pathID = p.newPathID } else { + // This is a new emoji, simply use provided ID. pathID = p.emoji.ID } - p.emoji.ImageURL = uris.GenerateURIForAttachment(p.instanceAccountID, string(TypeEmoji), string(SizeOriginal), pathID, extension) - p.emoji.ImagePath = fmt.Sprintf("%s/%s/%s/%s.%s", p.instanceAccountID, TypeEmoji, SizeOriginal, pathID, extension) - p.emoji.ImageContentType = contentType - // concatenate the first bytes with the existing bytes still in the reader (thanks Mara) - readerToStore := io.MultiReader(bytes.NewBuffer(firstBytes), rc) + // Calculate emoji file path. + p.emoji.ImagePath = fmt.Sprintf( + "%s/%s/%s/%s.%s", + p.instAccID, + TypeEmoji, + SizeOriginal, + pathID, + info.Extension, + ) + + // This shouldn't already exist, but we do a check as it's worth logging. + if have, _ := p.manager.storage.Has(ctx, p.emoji.ImagePath); have { + log.Warnf("emoji already exists at storage path: %s", p.emoji.ImagePath) + + // Attempt to remove existing emoji at storage path (might be broken / out-of-date) + if err := p.manager.storage.Delete(ctx, p.emoji.ImagePath); err != nil { + return fmt.Errorf("error removing emoji from storage: %v", err) + } + } - var maxEmojiSize int64 - if p.emoji.Domain == "" { - maxEmojiSize = int64(config.GetMediaEmojiLocalMaxSize()) - } else { - maxEmojiSize = int64(config.GetMediaEmojiRemoteMaxSize()) + // Write the final image reader stream to our storage. + sz, err = p.manager.storage.PutStream(ctx, p.emoji.ImagePath, r) + if err != nil { + return fmt.Errorf("error writing emoji to storage: %w", err) } - // if we know the fileSize already, make sure it's not bigger than our limit - var checkedSize bool - if fileSize > 0 { - checkedSize = true - if fileSize > maxEmojiSize { - return fmt.Errorf("store: given emoji fileSize (%db) is larger than allowed size (%db)", fileSize, maxEmojiSize) + // Once again check size in case none was provided previously. + if size := bytesize.Size(sz); size > maxSize { + if err := p.manager.storage.Delete(ctx, p.emoji.ImagePath); err != nil { + log.Errorf("error removing too-large-emoji from storage: %v", err) } + return fmt.Errorf("calculated emoji size %s greater than max allowed %s", size, maxSize) } - // store this for now -- other processes can pull it out of storage as they please - if fileSize, err = putStream(ctx, p.storage, p.emoji.ImagePath, readerToStore, fileSize); err != nil { - if !errors.Is(err, storage.ErrAlreadyExists) { - return fmt.Errorf("store: error storing stream: %s", err) - } - log.Warnf("emoji %s already exists at storage path: %s", p.emoji.ID, p.emoji.ImagePath) + // Fill in remaining attachment data now it's stored. + p.emoji.ImageURL = uris.GenerateURIForAttachment( + p.instAccID, + string(TypeEmoji), + string(SizeOriginal), + pathID, + info.Extension, + ) + p.emoji.ImageContentType = info.MIME.Value + p.emoji.ImageFileSize = int(sz) + + return nil +} + +func (p *ProcessingEmoji) finish(ctx context.Context) error { + // Fetch a stream to the original file in storage. + rc, err := p.manager.storage.GetStream(ctx, p.emoji.ImagePath) + if err != nil { + return fmt.Errorf("error loading file from storage: %w", err) } + defer rc.Close() - // if we didn't know the fileSize yet, we do now, so check if we need to - if !checkedSize && fileSize > maxEmojiSize { - err = fmt.Errorf("store: discovered emoji fileSize (%db) is larger than allowed emojiRemoteMaxSize (%db), will delete from the store now", fileSize, maxEmojiSize) - log.Warn(err) - if deleteErr := p.storage.Delete(ctx, p.emoji.ImagePath); deleteErr != nil { - log.Errorf("store: error removing too-large emoji from the store: %s", deleteErr) + // Decode the image from storage. + staticImg, err := decodeImage(rc) + if err != nil { + return fmt.Errorf("error decoding image: %w", err) + } + + // The image should be in-memory by now. + if err := rc.Close(); err != nil { + return fmt.Errorf("error closing file: %w", err) + } + + // This shouldn't already exist, but we do a check as it's worth logging. + if have, _ := p.manager.storage.Has(ctx, p.emoji.ImageStaticPath); have { + log.Warnf("static emoji already exists at storage path: %s", p.emoji.ImagePath) + + // Attempt to remove static existing emoji at storage path (might be broken / out-of-date) + if err := p.manager.storage.Delete(ctx, p.emoji.ImageStaticPath); err != nil { + return fmt.Errorf("error removing static emoji from storage: %v", err) } - return err } - p.emoji.ImageFileSize = int(fileSize) - p.read = true + // Create an emoji PNG encoder stream. + enc := staticImg.ToPNG() + + // Stream-encode the PNG static image into storage. + sz, err := p.manager.storage.PutStream(ctx, p.emoji.ImageStaticPath, enc) + if err != nil { + return fmt.Errorf("error stream-encoding static emoji to storage: %w", err) + } + + // Set written image size. + p.emoji.ImageStaticFileSize = int(sz) return nil } @@ -406,15 +415,13 @@ func (m *manager) preProcessEmoji(ctx context.Context, data DataFunc, postData P } processingEmoji := &ProcessingEmoji{ - instanceAccountID: instanceAccount.ID, - emoji: emoji, - data: data, - postData: postData, - staticState: int32(received), - database: m.db, - storage: m.storage, - refresh: refresh, - newPathID: newPathID, + instAccID: instanceAccount.ID, + emoji: emoji, + refresh: refresh, + newPathID: newPathID, + dataFn: data, + postFn: postData, + manager: m, } return processingEmoji, nil diff --git a/internal/media/processingmedia.go b/internal/media/processingmedia.go index 6e02ce147..4b2ef322d 100644 --- a/internal/media/processingmedia.go +++ b/internal/media/processingmedia.go @@ -21,387 +21,329 @@ package media import ( "bytes" "context" - "errors" "fmt" + "image/jpeg" "io" - "strings" "sync" - "sync/atomic" "time" + "github.com/disintegration/imaging" + "github.com/h2non/filetype" terminator "github.com/superseriousbusiness/exif-terminator" - "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/id" "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/storage" "github.com/superseriousbusiness/gotosocial/internal/uris" ) // ProcessingMedia represents a piece of media that is currently being processed. It exposes // various functions for retrieving data from the process. type ProcessingMedia struct { - mu sync.Mutex - - /* - below fields should be set on newly created media; - attachment will be updated incrementally as media goes through processing - */ - - attachment *gtsmodel.MediaAttachment - data DataFunc - postData PostDataCallbackFunc - read bool // bool indicating that data function has been triggered already - - thumbState int32 // the processing state of the media thumbnail - fullSizeState int32 // the processing state of the full-sized media - - /* - below pointers to database and storage are maintained so that - the media can store and update itself during processing steps - */ - - database db.DB - storage *storage.Driver - - err error // error created during processing, if any - - // track whether this media has already been put in the databse - insertedInDB bool - - // true if this is a recache, false if it's brand new media - recache bool + media *gtsmodel.MediaAttachment // processing media attachment details + recache bool // recaching existing (uncached) media + dataFn DataFunc // load-data function, returns media stream + postFn PostDataCallbackFunc // post data callback function + err error // error encountered during processing + manager *manager // manager instance (access to db / storage) + once sync.Once // once ensures processing only occurs once } // AttachmentID returns the ID of the underlying media attachment without blocking processing. func (p *ProcessingMedia) AttachmentID() string { - return p.attachment.ID + return p.media.ID // immutable, safe outside mutex. } // LoadAttachment blocks until the thumbnail and fullsize content // has been processed, and then returns the completed attachment. func (p *ProcessingMedia) LoadAttachment(ctx context.Context) (*gtsmodel.MediaAttachment, error) { - p.mu.Lock() - defer p.mu.Unlock() + // only process once. + p.once.Do(func() { + var err error - if err := p.store(ctx); err != nil { - return nil, err - } + defer func() { + if r := recover(); r != nil { + if err != nil { + rOld := r // wrap the panic so we don't lose existing returned error + r = fmt.Errorf("panic occured after error %q: %v", err.Error(), rOld) + } - if err := p.loadFullSize(ctx); err != nil { - return nil, err - } + // Catch any panics and wrap as error. + err = fmt.Errorf("caught panic: %v", r) + } - if err := p.loadThumb(ctx); err != nil { - return nil, err - } + if err != nil { + // Store error. + p.err = err + } + }() + + // Attempt to store media and calculate + // full-size media attachment details. + if err = p.store(ctx); err != nil { + return + } + + // Finish processing by reloading media into + // memory to get dimension and generate a thumb. + if err = p.finish(ctx); err != nil { + return + } - if !p.insertedInDB { if p.recache { - // This is an existing media attachment we're recaching, so only need to update it - if err := p.database.UpdateByID(ctx, p.attachment, p.attachment.ID); err != nil { - return nil, err - } - } else { - // This is a new media attachment we're caching for first time - if err := p.database.Put(ctx, p.attachment); err != nil { - return nil, err - } + // Existing attachment we're recaching, so only need to update. + err = p.manager.db.UpdateByID(ctx, p.media, p.media.ID) + return } - // Mark this as stored in DB - p.insertedInDB = true + // New attachment, first time caching. + err = p.manager.db.Put(ctx, p.media) + return //nolint shutup linter i like this here + }) + + if p.err != nil { + return nil, p.err } - log.Tracef("finished loading attachment %s", p.attachment.URL) - return p.attachment, nil + return p.media, nil } -// Finished returns true if processing has finished for both the thumbnail -// and full fized version of this piece of media. -func (p *ProcessingMedia) Finished() bool { - return atomic.LoadInt32(&p.thumbState) == int32(complete) && atomic.LoadInt32(&p.fullSizeState) == int32(complete) -} +// store calls the data function attached to p if it hasn't been called yet, +// and updates the underlying attachment fields as necessary. It will then stream +// bytes from p's reader directly into storage so that it can be retrieved later. +func (p *ProcessingMedia) store(ctx context.Context) error { + defer func() { + if p.postFn == nil { + return + } -func (p *ProcessingMedia) loadThumb(ctx context.Context) error { - thumbState := atomic.LoadInt32(&p.thumbState) - switch processState(thumbState) { - case received: - // we haven't processed a thumbnail for this media yet so do it now - // check if we need to create a blurhash or if there's already one set - var createBlurhash bool - if p.attachment.Blurhash == "" { - // no blurhash created yet - createBlurhash = true + // ensure post callback gets called. + if err := p.postFn(ctx); err != nil { + log.Errorf("error executing postdata function: %v", err) } + }() - var ( - thumb *mediaMeta - err error - ) - switch ct := p.attachment.File.ContentType; ct { - case mimeImageJpeg, mimeImagePng, mimeImageWebp, mimeImageGif: - // thumbnail the image from the original stored full size version - stored, err := p.storage.GetStream(ctx, p.attachment.File.Path) - if err != nil { - p.err = fmt.Errorf("loadThumb: error fetching file from storage: %s", err) - atomic.StoreInt32(&p.thumbState, int32(errored)) - return p.err - } + // Load media from provided data fun + rc, sz, err := p.dataFn(ctx) + if err != nil { + return fmt.Errorf("error executing data function: %w", err) + } + + defer func() { + // Ensure data reader gets closed on return. + if err := rc.Close(); err != nil { + log.Errorf("error closing data reader: %v", err) + } + }() - thumb, err = deriveThumbnailFromImage(stored, ct, createBlurhash) + // Byte buffer to read file header into. + // See: https://en.wikipedia.org/wiki/File_format#File_header + // and https://github.com/h2non/filetype + hdrBuf := make([]byte, 261) - // try to close the stored stream we had open, no matter what - if closeErr := stored.Close(); closeErr != nil { - log.Errorf("error closing stream: %s", closeErr) - } + // Read the first 261 header bytes into buffer. + if _, err := io.ReadFull(rc, hdrBuf); err != nil { + return fmt.Errorf("error reading incoming media: %w", err) + } - // now check if we managed to get a thumbnail - if err != nil { - p.err = fmt.Errorf("loadThumb: error deriving thumbnail: %s", err) - atomic.StoreInt32(&p.thumbState, int32(errored)) - return p.err - } - case mimeVideoMp4: - // create a generic thumbnail based on video height + width - thumb, err = deriveThumbnailFromVideo(p.attachment.FileMeta.Original.Height, p.attachment.FileMeta.Original.Width) + // Parse file type info from header buffer. + info, err := filetype.Match(hdrBuf) + if err != nil { + return fmt.Errorf("error parsing file type: %w", err) + } + + // Recombine header bytes with remaining stream + r := io.MultiReader(bytes.NewReader(hdrBuf), rc) + + switch info.Extension { + case "mp4": + p.media.Type = gtsmodel.FileTypeVideo + + case "gif": + p.media.Type = gtsmodel.FileTypeImage + + case "jpg", "jpeg", "png", "webp": + p.media.Type = gtsmodel.FileTypeImage + if sz > 0 { + // A file size was provided so we can clean exif data from image. + r, err = terminator.Terminate(r, int(sz), info.Extension) if err != nil { - p.err = fmt.Errorf("loadThumb: error deriving thumbnail: %s", err) - atomic.StoreInt32(&p.thumbState, int32(errored)) - return p.err + return fmt.Errorf("error cleaning exif data: %w", err) } - default: - p.err = fmt.Errorf("loadThumb: content type %s not a processible image type", ct) - atomic.StoreInt32(&p.thumbState, int32(errored)) - return p.err } - // put the thumbnail in storage - if err := p.storage.Put(ctx, p.attachment.Thumbnail.Path, thumb.small); err != nil && err != storage.ErrAlreadyExists { - p.err = fmt.Errorf("loadThumb: error storing thumbnail: %s", err) - atomic.StoreInt32(&p.thumbState, int32(errored)) - return p.err - } + default: + return fmt.Errorf("unsupported file type: %s", info.Extension) + } - // set appropriate fields on the attachment based on the thumbnail we derived - if createBlurhash { - p.attachment.Blurhash = thumb.blurhash + // Calculate attachment file path. + p.media.File.Path = fmt.Sprintf( + "%s/%s/%s/%s.%s", + p.media.AccountID, + TypeAttachment, + SizeOriginal, + p.media.ID, + info.Extension, + ) + + // This shouldn't already exist, but we do a check as it's worth logging. + if have, _ := p.manager.storage.Has(ctx, p.media.File.Path); have { + log.Warnf("media already exists at storage path: %s", p.media.File.Path) + + // Attempt to remove existing media at storage path (might be broken / out-of-date) + if err := p.manager.storage.Delete(ctx, p.media.File.Path); err != nil { + return fmt.Errorf("error removing media from storage: %v", err) } - p.attachment.FileMeta.Small = gtsmodel.Small{ - Width: thumb.width, - Height: thumb.height, - Size: thumb.size, - Aspect: thumb.aspect, - } - p.attachment.Thumbnail.FileSize = len(thumb.small) - - // we're done processing the thumbnail! - atomic.StoreInt32(&p.thumbState, int32(complete)) - log.Tracef("finished processing thumbnail for attachment %s", p.attachment.URL) - fallthrough - case complete: - return nil - case errored: - return p.err } - return fmt.Errorf("loadThumb: thumbnail processing status %d unknown", p.thumbState) -} + // Write the final image reader stream to our storage. + sz, err = p.manager.storage.PutStream(ctx, p.media.File.Path, r) + if err != nil { + return fmt.Errorf("error writing media to storage: %w", err) + } -func (p *ProcessingMedia) loadFullSize(ctx context.Context) error { - fullSizeState := atomic.LoadInt32(&p.fullSizeState) - switch processState(fullSizeState) { - case received: - var err error - var decoded *mediaMeta + // Set written image size. + p.media.File.FileSize = int(sz) + + // Fill in remaining attachment data now it's stored. + p.media.URL = uris.GenerateURIForAttachment( + p.media.AccountID, + string(TypeAttachment), + string(SizeOriginal), + p.media.ID, + info.Extension, + ) + p.media.File.ContentType = info.MIME.Value + cached := true + p.media.Cached = &cached - // stream the original file out of storage... - stored, err := p.storage.GetStream(ctx, p.attachment.File.Path) - if err != nil { - p.err = fmt.Errorf("loadFullSize: error fetching file from storage: %s", err) - atomic.StoreInt32(&p.fullSizeState, int32(errored)) - return p.err - } + return nil +} - defer func() { - if err := stored.Close(); err != nil { - log.Errorf("loadFullSize: error closing stored full size: %s", err) - } - }() +func (p *ProcessingMedia) finish(ctx context.Context) error { + // Fetch a stream to the original file in storage. + rc, err := p.manager.storage.GetStream(ctx, p.media.File.Path) + if err != nil { + return fmt.Errorf("error loading file from storage: %w", err) + } + defer rc.Close() - // decode the image - ct := p.attachment.File.ContentType - switch ct { - case mimeImageJpeg, mimeImagePng, mimeImageWebp: - decoded, err = decodeImage(stored, ct) - case mimeImageGif: - decoded, err = decodeGif(stored) - case mimeVideoMp4: - decoded, err = decodeVideo(stored, ct) - default: - err = fmt.Errorf("loadFullSize: content type %s not a processible image type", ct) - } + var fullImg *gtsImage + switch p.media.File.ContentType { + // .jpeg, .gif, .webp image type + case mimeImageJpeg, mimeImageGif, mimeImageWebp: + fullImg, err = decodeImage(rc, imaging.AutoOrientation(true)) if err != nil { - p.err = err - atomic.StoreInt32(&p.fullSizeState, int32(errored)) - return p.err + return fmt.Errorf("error decoding image: %w", err) } - // set appropriate fields on the attachment based on the image we derived - - // generic fields - p.attachment.File.UpdatedAt = time.Now() - p.attachment.FileMeta.Original = gtsmodel.Original{ - Width: decoded.width, - Height: decoded.height, - Size: decoded.size, - Aspect: decoded.aspect, + // .png image (requires ancillary chunk stripping) + case mimeImagePng: + fullImg, err = decodeImage(&PNGAncillaryChunkStripper{ + Reader: rc, + }, imaging.AutoOrientation(true)) + if err != nil { + return fmt.Errorf("error decoding image: %w", err) } - // nullable fields - if decoded.duration != 0 { - i := decoded.duration - p.attachment.FileMeta.Original.Duration = &i - } - if decoded.framerate != 0 { - i := decoded.framerate - p.attachment.FileMeta.Original.Framerate = &i - } - if decoded.bitrate != 0 { - i := decoded.bitrate - p.attachment.FileMeta.Original.Bitrate = &i + // .mp4 video type + case mimeVideoMp4: + video, err := decodeVideoFrame(rc) + if err != nil { + return fmt.Errorf("error decoding video: %w", err) } - // we're done processing the full-size image - p.attachment.Processing = gtsmodel.ProcessingStatusProcessed - atomic.StoreInt32(&p.fullSizeState, int32(complete)) - log.Tracef("finished processing full size image for attachment %s", p.attachment.URL) - fallthrough - case complete: - return nil - case errored: - return p.err - } + // Set video frame as image. + fullImg = video.frame - return fmt.Errorf("loadFullSize: full size processing status %d unknown", p.fullSizeState) -} + // Set video metadata in attachment info. + p.media.FileMeta.Original.Duration = &video.duration + p.media.FileMeta.Original.Framerate = &video.framerate + p.media.FileMeta.Original.Bitrate = &video.bitrate + } -// store calls the data function attached to p if it hasn't been called yet, -// and updates the underlying attachment fields as necessary. It will then stream -// bytes from p's reader directly into storage so that it can be retrieved later. -func (p *ProcessingMedia) store(ctx context.Context) error { - // check if we've already done this and bail early if we have - if p.read { - return nil + // The image should be in-memory by now. + if err := rc.Close(); err != nil { + return fmt.Errorf("error closing file: %w", err) } - // execute the data function to get the readcloser out of it - rc, fileSize, err := p.data(ctx) + // Set full-size dimensions in attachment info. + p.media.FileMeta.Original.Width = int(fullImg.Width()) + p.media.FileMeta.Original.Height = int(fullImg.Height()) + p.media.FileMeta.Original.Size = int(fullImg.Size()) + p.media.FileMeta.Original.Aspect = fullImg.AspectRatio() + + // Calculate attachment thumbnail file path + p.media.Thumbnail.Path = fmt.Sprintf( + "%s/%s/%s/%s.jpg", + p.media.AccountID, + TypeAttachment, + SizeSmall, + p.media.ID, + ) + + // Get smaller thumbnail image + thumbImg := fullImg.Thumbnail() + + // Garbage collector, you may + // now take our large son. + fullImg = nil + + // Blurhash needs generating from thumb. + hash, err := thumbImg.Blurhash() if err != nil { - return fmt.Errorf("store: error executing data function: %s", err) + return fmt.Errorf("error generating blurhash: %w", err) } - // defer closing the reader when we're done with it - defer func() { - if err := rc.Close(); err != nil { - log.Errorf("store: error closing readcloser: %s", err) - } - }() + // Set the attachment blurhash. + p.media.Blurhash = hash - // execute the postData function no matter what happens - defer func() { - if p.postData != nil { - if err := p.postData(ctx); err != nil { - log.Errorf("store: error executing postData: %s", err) - } - } - }() + // This shouldn't already exist, but we do a check as it's worth logging. + if have, _ := p.manager.storage.Has(ctx, p.media.Thumbnail.Path); have { + log.Warnf("thumbnail already exists at storage path: %s", p.media.Thumbnail.Path) - // extract no more than 261 bytes from the beginning of the file -- this is the header - firstBytes := make([]byte, maxFileHeaderBytes) - if _, err := rc.Read(firstBytes); err != nil { - return fmt.Errorf("store: error reading initial %d bytes: %s", maxFileHeaderBytes, err) + // Attempt to remove existing thumbnail at storage path (might be broken / out-of-date) + if err := p.manager.storage.Delete(ctx, p.media.Thumbnail.Path); err != nil { + return fmt.Errorf("error removing thumbnail from storage: %v", err) + } } - // now we have the file header we can work out the content type from it - contentType, err := parseContentType(firstBytes) - if err != nil { - return fmt.Errorf("store: error parsing content type: %s", err) - } + // Create a thumbnail JPEG encoder stream. + enc := thumbImg.ToJPEG(&jpeg.Options{ + Quality: 70, // enough for a thumbnail. + }) - // bail if this is a type we can't process - if !supportedAttachment(contentType) { - return fmt.Errorf("store: media type %s not (yet) supported", contentType) + // Stream-encode the JPEG thumbnail image into storage. + sz, err := p.manager.storage.PutStream(ctx, p.media.Thumbnail.Path, enc) + if err != nil { + return fmt.Errorf("error stream-encoding thumbnail to storage: %w", err) } - // extract the file extension - split := strings.Split(contentType, "/") - if len(split) != 2 { - return fmt.Errorf("store: content type %s was not valid", contentType) + // Fill in remaining thumbnail now it's stored + p.media.Thumbnail.ContentType = mimeImageJpeg + p.media.Thumbnail.URL = uris.GenerateURIForAttachment( + p.media.AccountID, + string(TypeAttachment), + string(SizeSmall), + p.media.ID, + "jpg", // always jpeg + ) + + // Set thumbnail dimensions in attachment info. + p.media.FileMeta.Small = gtsmodel.Small{ + Width: int(thumbImg.Width()), + Height: int(thumbImg.Height()), + Size: int(thumbImg.Size()), + Aspect: thumbImg.AspectRatio(), } - extension := split[1] // something like 'jpeg' - - // concatenate the cleaned up first bytes with the existing bytes still in the reader (thanks Mara) - multiReader := io.MultiReader(bytes.NewBuffer(firstBytes), rc) - - // use the extension to derive the attachment type - // and, while we're in here, clean up exif data from - // the image if we already know the fileSize - var readerToStore io.Reader - switch extension { - case mimeGif: - p.attachment.Type = gtsmodel.FileTypeImage - // nothing to terminate, we can just store the multireader - readerToStore = multiReader - case mimeJpeg, mimePng, mimeWebp: - p.attachment.Type = gtsmodel.FileTypeImage - if fileSize > 0 { - terminated, err := terminator.Terminate(multiReader, int(fileSize), extension) - if err != nil { - return fmt.Errorf("store: exif error: %s", err) - } - defer func() { - if closer, ok := terminated.(io.Closer); ok { - if err := closer.Close(); err != nil { - log.Errorf("store: error closing terminator reader: %s", err) - } - } - }() - // store the exif-terminated version of what was in the multireader - readerToStore = terminated - } else { - // can't terminate if we don't know the file size, so just store the multiReader - readerToStore = multiReader - } - case mimeMp4: - p.attachment.Type = gtsmodel.FileTypeVideo - // nothing to terminate, we can just store the multireader - readerToStore = multiReader - default: - return fmt.Errorf("store: couldn't process %s", extension) - } - - // now set some additional fields on the attachment since - // we know more about what the underlying media actually is - p.attachment.URL = uris.GenerateURIForAttachment(p.attachment.AccountID, string(TypeAttachment), string(SizeOriginal), p.attachment.ID, extension) - p.attachment.File.ContentType = contentType - p.attachment.File.Path = fmt.Sprintf("%s/%s/%s/%s.%s", p.attachment.AccountID, TypeAttachment, SizeOriginal, p.attachment.ID, extension) - // store this for now -- other processes can pull it out of storage as they please - if fileSize, err = putStream(ctx, p.storage, p.attachment.File.Path, readerToStore, fileSize); err != nil { - if !errors.Is(err, storage.ErrAlreadyExists) { - return fmt.Errorf("store: error storing stream: %s", err) - } - log.Warnf("attachment %s already exists at storage path: %s", p.attachment.ID, p.attachment.File.Path) - } + // Set written image size. + p.media.Thumbnail.FileSize = int(sz) - cached := true - p.attachment.Cached = &cached - p.attachment.File.FileSize = int(fileSize) - p.read = true + // Finally set the attachment as processed and update time. + p.media.Processing = gtsmodel.ProcessingStatusProcessed + p.media.File.UpdatedAt = time.Now() - log.Tracef("finished storing initial data for attachment %s", p.attachment.URL) return nil } @@ -411,19 +353,6 @@ func (m *manager) preProcessMedia(ctx context.Context, data DataFunc, postData P return nil, err } - file := gtsmodel.File{ - Path: "", // we don't know yet because it depends on the uncalled DataFunc - ContentType: "", // we don't know yet because it depends on the uncalled DataFunc - UpdatedAt: time.Now(), - } - - thumbnail := gtsmodel.Thumbnail{ - URL: uris.GenerateURIForAttachment(accountID, string(TypeAttachment), string(SizeSmall), id, mimeJpeg), // all thumbnails are encoded as jpeg, - Path: fmt.Sprintf("%s/%s/%s/%s.%s", accountID, TypeAttachment, SizeSmall, id, mimeJpeg), // all thumbnails are encoded as jpeg, - ContentType: mimeImageJpeg, - UpdatedAt: time.Now(), - } - avatar := false header := false cached := false @@ -443,8 +372,8 @@ func (m *manager) preProcessMedia(ctx context.Context, data DataFunc, postData P ScheduledStatusID: "", Blurhash: "", Processing: gtsmodel.ProcessingStatusReceived, - File: file, - Thumbnail: thumbnail, + File: gtsmodel.File{UpdatedAt: time.Now()}, + Thumbnail: gtsmodel.Thumbnail{UpdatedAt: time.Now()}, Avatar: &avatar, Header: &header, Cached: &cached, @@ -495,34 +424,28 @@ func (m *manager) preProcessMedia(ctx context.Context, data DataFunc, postData P } processingMedia := &ProcessingMedia{ - attachment: attachment, - data: data, - postData: postData, - thumbState: int32(received), - fullSizeState: int32(received), - database: m.db, - storage: m.storage, + media: attachment, + dataFn: data, + postFn: postData, + manager: m, } return processingMedia, nil } -func (m *manager) preProcessRecache(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, attachmentID string) (*ProcessingMedia, error) { - // get the existing attachment - attachment, err := m.db.GetAttachmentByID(ctx, attachmentID) +func (m *manager) preProcessRecache(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, id string) (*ProcessingMedia, error) { + // get the existing attachment from database. + attachment, err := m.db.GetAttachmentByID(ctx, id) if err != nil { return nil, err } processingMedia := &ProcessingMedia{ - attachment: attachment, - data: data, - postData: postData, - thumbState: int32(received), - fullSizeState: int32(received), - database: m.db, - storage: m.storage, - recache: true, // indicate it's a recache + media: attachment, + dataFn: data, + postFn: postData, + manager: m, + recache: true, // indicate it's a recache } return processingMedia, nil diff --git a/internal/media/pruneorphaned_test.go b/internal/media/pruneorphaned_test.go index 2d3ed5a31..52976b51b 100644 --- a/internal/media/pruneorphaned_test.go +++ b/internal/media/pruneorphaned_test.go @@ -39,7 +39,7 @@ func (suite *PruneOrphanedTestSuite) TestPruneOrphanedDry() { } pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachments/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif" - if err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil { + if _, err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil { panic(err) } @@ -62,7 +62,7 @@ func (suite *PruneOrphanedTestSuite) TestPruneOrphanedMoist() { } pandaPath := "01GJQJ1YD9QCHCE12GG0EYHVNW/attachments/original/01GJQJ2AYM1VKSRW96YVAJ3NK3.gif" - if err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil { + if _, err := suite.storage.PutStream(context.Background(), pandaPath, bytes.NewBuffer(b)); err != nil { panic(err) } diff --git a/internal/media/pruneremote_test.go b/internal/media/pruneremote_test.go index 258aa20ca..51521422c 100644 --- a/internal/media/pruneremote_test.go +++ b/internal/media/pruneremote_test.go @@ -87,7 +87,7 @@ func (suite *PruneRemoteTestSuite) TestPruneAndRecache() { // now recache the image.... data := func(_ context.Context) (io.ReadCloser, int64, error) { // load bytes from a test image - b, err := os.ReadFile("../../testrig/media/thoughtsofdog-original.jpeg") + b, err := os.ReadFile("../../testrig/media/thoughtsofdog-original.jpg") if err != nil { panic(err) } diff --git a/internal/media/test/longer-mp4-thumbnail.jpg b/internal/media/test/longer-mp4-thumbnail.jpg Binary files differindex e77534950..076db8251 100644 --- a/internal/media/test/longer-mp4-thumbnail.jpg +++ b/internal/media/test/longer-mp4-thumbnail.jpg diff --git a/internal/media/test/test-jpeg-thumbnail.jpg b/internal/media/test/test-jpeg-thumbnail.jpg Binary files differindex 80170e7c8..c11569fe6 100644 --- a/internal/media/test/test-jpeg-thumbnail.jpg +++ b/internal/media/test/test-jpeg-thumbnail.jpg diff --git a/internal/media/test/test-mp4-thumbnail.jpg b/internal/media/test/test-mp4-thumbnail.jpg Binary files differindex 8bfdf1540..6d33c1b78 100644 --- a/internal/media/test/test-mp4-thumbnail.jpg +++ b/internal/media/test/test-mp4-thumbnail.jpg diff --git a/internal/media/test/test-png-alphachannel-thumbnail.jpg b/internal/media/test/test-png-alphachannel-thumbnail.jpg Binary files differindex ca62f4ea6..8342157be 100644 --- a/internal/media/test/test-png-alphachannel-thumbnail.jpg +++ b/internal/media/test/test-png-alphachannel-thumbnail.jpg diff --git a/internal/media/test/test-png-noalphachannel-thumbnail.jpg b/internal/media/test/test-png-noalphachannel-thumbnail.jpg Binary files differindex ca62f4ea6..8342157be 100644 --- a/internal/media/test/test-png-noalphachannel-thumbnail.jpg +++ b/internal/media/test/test-png-noalphachannel-thumbnail.jpg diff --git a/internal/media/types.go b/internal/media/types.go index 86fb1741d..d1f234c38 100644 --- a/internal/media/types.go +++ b/internal/media/types.go @@ -24,13 +24,6 @@ import ( "time" ) -// maxFileHeaderBytes represents the maximum amount of bytes we want -// to examine from the beginning of a file to determine its type. -// -// See: https://en.wikipedia.org/wiki/File_format#File_header -// and https://github.com/h2non/filetype -const maxFileHeaderBytes = 261 - // mime consts const ( mimeImage = "image" @@ -52,14 +45,6 @@ const ( mimeVideoMp4 = mimeVideo + "/" + mimeMp4 ) -type processState int32 - -const ( - received processState = iota // processing order has been received but not done yet - complete // processing order has been completed successfully - errored // processing order has been completed with an error -) - // EmojiMaxBytes is the maximum permitted bytes of an emoji upload (50kb) // const EmojiMaxBytes = 51200 @@ -132,17 +117,3 @@ type DataFunc func(ctx context.Context) (reader io.ReadCloser, fileSize int64, e // // This can be set to nil, and will then not be executed. type PostDataCallbackFunc func(ctx context.Context) error - -type mediaMeta struct { - width int - height int - size int - aspect float32 - blurhash string - small []byte - - // video-specific properties - duration float32 - framerate float32 - bitrate uint64 -} diff --git a/internal/media/util.go b/internal/media/util.go index 8393d832e..b15583026 100644 --- a/internal/media/util.go +++ b/internal/media/util.go @@ -19,72 +19,22 @@ package media import ( - "context" - "errors" "fmt" - "io" - "github.com/h2non/filetype" "github.com/superseriousbusiness/gotosocial/internal/log" - "github.com/superseriousbusiness/gotosocial/internal/storage" ) -// AllSupportedMIMETypes just returns all media -// MIME types supported by this instance. -func AllSupportedMIMETypes() []string { - return []string{ - mimeImageJpeg, - mimeImageGif, - mimeImagePng, - mimeImageWebp, - mimeVideoMp4, - } -} - -// parseContentType parses the MIME content type from a file, returning it as a string in the form (eg., "image/jpeg"). -// Returns an error if the content type is not something we can process. -// -// Fileheader should be no longer than 262 bytes; anything more than this is inefficient. -func parseContentType(fileHeader []byte) (string, error) { - if fhLength := len(fileHeader); fhLength > maxFileHeaderBytes { - return "", fmt.Errorf("parseContentType requires %d bytes max, we got %d", maxFileHeaderBytes, fhLength) - } - - kind, err := filetype.Match(fileHeader) - if err != nil { - return "", err - } - - if kind == filetype.Unknown { - return "", errors.New("filetype unknown") - } - - return kind.MIME.Value, nil -} - -// supportedAttachment checks mime type of an attachment against a -// slice of accepted types, and returns True if the mime type is accepted. -func supportedAttachment(mimeType string) bool { - for _, accepted := range AllSupportedMIMETypes() { - if mimeType == accepted { - return true - } - } - return false +var SupportedMIMETypes = []string{ + mimeImageJpeg, + mimeImageGif, + mimeImagePng, + mimeImageWebp, + mimeVideoMp4, } -// supportedEmoji checks that the content type is image/png or image/gif -- the only types supported for emoji. -func supportedEmoji(mimeType string) bool { - acceptedEmojiTypes := []string{ - mimeImageGif, - mimeImagePng, - } - for _, accepted := range acceptedEmojiTypes { - if mimeType == accepted { - return true - } - } - return false +var SupportedEmojiMIMETypes = []string{ + mimeImageGif, + mimeImagePng, } // ParseMediaType converts s to a recognized MediaType, or returns an error if unrecognized @@ -127,31 +77,3 @@ func (l *logrusWrapper) Info(msg string, keysAndValues ...interface{}) { func (l *logrusWrapper) Error(err error, msg string, keysAndValues ...interface{}) { log.Error("media manager cron logger: ", err, msg, keysAndValues) } - -// lengthReader wraps a reader and reads the length of total bytes written as it goes. -type lengthReader struct { - source io.Reader - length int64 -} - -func (r *lengthReader) Read(b []byte) (int, error) { - n, err := r.source.Read(b) - r.length += int64(n) - return n, err -} - -// putStream either puts a file with a known fileSize into storage directly, and returns the -// fileSize unchanged, or it wraps the reader with a lengthReader and returns the discovered -// fileSize. -func putStream(ctx context.Context, storage *storage.Driver, key string, r io.Reader, fileSize int64) (int64, error) { - if fileSize > 0 { - return fileSize, storage.PutStream(ctx, key, r) - } - - lr := &lengthReader{ - source: r, - } - - err := storage.PutStream(ctx, key, lr) - return lr.length, err -} diff --git a/internal/media/video.go b/internal/media/video.go index bd624559b..bffdfbbba 100644 --- a/internal/media/video.go +++ b/internal/media/video.go @@ -19,63 +19,55 @@ package media import ( - "bytes" "fmt" - "image" - "image/color" - "image/draw" - "image/jpeg" "io" "os" "github.com/abema/go-mp4" - "github.com/superseriousbusiness/gotosocial/internal/gtserror" - "github.com/superseriousbusiness/gotosocial/internal/log" ) -var thumbFill = color.RGBA{42, 43, 47, 0} // the color to fill video thumbnails with +type gtsVideo struct { + frame *gtsImage + duration float32 // in seconds + bitrate uint64 + framerate float32 +} -func decodeVideo(r io.Reader, contentType string) (*mediaMeta, error) { +// decodeVideoFrame decodes and returns an image from a single frame in the given video stream. +// (note: currently this only returns a blank image resized to fit video dimensions). +func decodeVideoFrame(r io.Reader) (*gtsVideo, error) { // We'll need a readseeker to decode the video. We can get a readseeker // without burning too much mem by first copying the reader into a temp file. // First create the file in the temporary directory... - tempFile, err := os.CreateTemp(os.TempDir(), "gotosocial-") + tmp, err := os.CreateTemp(os.TempDir(), "gotosocial-") if err != nil { - return nil, fmt.Errorf("could not create temporary file while decoding video: %w", err) + return nil, err } - tempFileName := tempFile.Name() - // Make sure to clean up the temporary file when we're done with it defer func() { - if err := tempFile.Close(); err != nil { - log.Errorf("could not close file %s: %s", tempFileName, err) - } - if err := os.Remove(tempFileName); err != nil { - log.Errorf("could not remove file %s: %s", tempFileName, err) - } + tmp.Close() + os.Remove(tmp.Name()) }() // Now copy the entire reader we've been provided into the // temporary file; we won't use the reader again after this. - if _, err := io.Copy(tempFile, r); err != nil { - return nil, fmt.Errorf("could not copy video reader into temporary file %s: %w", tempFileName, err) + if _, err := io.Copy(tmp, r); err != nil { + return nil, err } - var ( - width int - height int - duration float32 - framerate float32 - bitrate uint64 - ) - // probe the video file to extract useful metadata from it; for methodology, see: // https://github.com/abema/go-mp4/blob/7d8e5a7c5e644e0394261b0cf72fef79ce246d31/mp4tool/probe/probe.go#L85-L154 - info, err := mp4.Probe(tempFile) + info, err := mp4.Probe(tmp) if err != nil { - return nil, fmt.Errorf("could not probe temporary video file %s: %w", tempFileName, err) + return nil, fmt.Errorf("error probing tmp file %s: %w", tmp.Name(), err) } + var ( + width int + height int + video gtsVideo + ) + for _, tr := range info.Tracks { if tr.AVC == nil { continue @@ -89,72 +81,42 @@ func decodeVideo(r io.Reader, contentType string) (*mediaMeta, error) { height = h } - if br := tr.Samples.GetBitrate(tr.Timescale); br > bitrate { - bitrate = br - } else if br := info.Segments.GetBitrate(tr.TrackID, tr.Timescale); br > bitrate { - bitrate = br + if br := tr.Samples.GetBitrate(tr.Timescale); br > video.bitrate { + video.bitrate = br + } else if br := info.Segments.GetBitrate(tr.TrackID, tr.Timescale); br > video.bitrate { + video.bitrate = br } - if d := float32(tr.Duration) / float32(tr.Timescale); d > duration { - duration = d - framerate = float32(len(tr.Samples)) / duration + if d := float64(tr.Duration) / float64(tr.Timescale); d > float64(video.duration) { + video.framerate = float32(len(tr.Samples)) / float32(d) + video.duration = float32(d) } } - var errs gtserror.MultiError + // Check for empty video metadata. + var empty []string if width == 0 { - errs = append(errs, "video width could not be discovered") + empty = append(empty, "width") } - if height == 0 { - errs = append(errs, "video height could not be discovered") + empty = append(empty, "height") } - - if duration == 0 { - errs = append(errs, "video duration could not be discovered") + if video.duration == 0 { + empty = append(empty, "duration") } - - if framerate == 0 { - errs = append(errs, "video framerate could not be discovered") + if video.framerate == 0 { + empty = append(empty, "framerate") } - - if bitrate == 0 { - errs = append(errs, "video bitrate could not be discovered") + if video.bitrate == 0 { + empty = append(empty, "bitrate") } - - if errs != nil { - return nil, errs.Combine() + if len(empty) > 0 { + return nil, fmt.Errorf("error determining video metadata: %v", empty) } - return &mediaMeta{ - width: width, - height: height, - duration: duration, - framerate: framerate, - bitrate: bitrate, - size: height * width, - aspect: float32(width) / float32(height), - }, nil -} - -func deriveThumbnailFromVideo(height int, width int) (*mediaMeta, error) { - // create a rectangle with the same dimensions as the video - img := image.NewRGBA(image.Rect(0, 0, width, height)) - - // fill the rectangle with our desired fill color - draw.Draw(img, img.Bounds(), &image.Uniform{thumbFill}, image.Point{}, draw.Src) - - // we can get away with using extremely poor quality for this monocolor thumbnail - out := &bytes.Buffer{} - if err := jpeg.Encode(out, img, &jpeg.Options{Quality: 1}); err != nil { - return nil, fmt.Errorf("error encoding video thumbnail: %w", err) - } + // Create new empty "frame" image. + // TODO: decode frame from video file. + video.frame = blankImage(width, height) - return &mediaMeta{ - width: width, - height: height, - size: width * height, - aspect: float32(width) / float32(height), - small: out.Bytes(), - }, nil + return &video, nil } |