summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--go.mod1
-rw-r--r--go.sum2
-rw-r--r--internal/media/metadata.go2
-rw-r--r--internal/media/probe.go5
-rw-r--r--internal/media/util.go25
-rw-r--r--vendor/codeberg.org/gruf/go-mmap/LICENSE9
-rw-r--r--vendor/codeberg.org/gruf/go-mmap/README.md3
-rw-r--r--vendor/codeberg.org/gruf/go-mmap/fs.go104
-rw-r--r--vendor/codeberg.org/gruf/go-mmap/mmap.go142
-rw-r--r--vendor/codeberg.org/gruf/go-mmap/open.go62
-rw-r--r--vendor/modules.txt3
11 files changed, 351 insertions, 7 deletions
diff --git a/go.mod b/go.mod
index e2450dcd4..6d013df4a 100644
--- a/go.mod
+++ b/go.mod
@@ -23,6 +23,7 @@ require (
codeberg.org/gruf/go-kv/v2 v2.0.7
codeberg.org/gruf/go-list v0.0.0-20240425093752-494db03d641f
codeberg.org/gruf/go-mempool v0.0.0-20251003110531-b54adae66253
+ codeberg.org/gruf/go-mmap v0.0.0-20251105140602-5f31e9314dbf
codeberg.org/gruf/go-mutexes v1.5.8
codeberg.org/gruf/go-runners v1.7.0
codeberg.org/gruf/go-sched v1.3.0
diff --git a/go.sum b/go.sum
index 4c259a75c..9eaa889e3 100644
--- a/go.sum
+++ b/go.sum
@@ -44,6 +44,8 @@ codeberg.org/gruf/go-maps v1.0.4 h1:K+Ww4vvR3TZqm5jqrKVirmguZwa3v1VUvmig2SE8uxY=
codeberg.org/gruf/go-maps v1.0.4/go.mod h1:ASX7osM7kFwt5O8GfGflcFjrwYGD8eIuRLl/oMjhEi8=
codeberg.org/gruf/go-mempool v0.0.0-20251003110531-b54adae66253 h1:qPAY72xCWlySVROSNZecfLGAyeV/SiXmPmfhUU+o3Xw=
codeberg.org/gruf/go-mempool v0.0.0-20251003110531-b54adae66253/go.mod h1:761koiXmqfgzvu5mez2Rk7YlwWilpqJ/zv5hIA6NoNI=
+codeberg.org/gruf/go-mmap v0.0.0-20251105140602-5f31e9314dbf h1:WaQskbb8Rnp2CfpZ2pBhbmnnJ+Cxe0xvTSDE8fgdcfM=
+codeberg.org/gruf/go-mmap v0.0.0-20251105140602-5f31e9314dbf/go.mod h1:kaHx5xHuLNf13iALbHiFpXmLeO33Ll/aW3mGkW8sI40=
codeberg.org/gruf/go-mutexes v1.5.8 h1:HRGnvT4COb3jX9xdeoSUUbjPgmk5kXPuDfld9ksUJKA=
codeberg.org/gruf/go-mutexes v1.5.8/go.mod h1:21sy/hWH8dDQBk7ocsxqo2GNpWiIir+e82RG3hjnN20=
codeberg.org/gruf/go-runners v1.7.0 h1:Z+8Qne4H9nAdZZbA4cij0PWhhJxtigUGA4Mp7griYes=
diff --git a/internal/media/metadata.go b/internal/media/metadata.go
index c1fa58645..6524784fd 100644
--- a/internal/media/metadata.go
+++ b/internal/media/metadata.go
@@ -75,7 +75,7 @@ func clearMetadata(ctx context.Context, filepath string) error {
// terminateExif cleans exif data from file at input path, into file
// at output path, using given file extension to determine cleaning type.
func terminateExif(outpath, inpath string, ext string) (err error) {
- var inFile *os.File
+ var inFile fileReader
var outFile *os.File
// Ensure handles
diff --git a/internal/media/probe.go b/internal/media/probe.go
index 5c07b04fb..47990eea3 100644
--- a/internal/media/probe.go
+++ b/internal/media/probe.go
@@ -22,7 +22,6 @@ import (
"encoding/binary"
"image/jpeg"
"io"
- "os"
"strings"
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
@@ -80,7 +79,7 @@ func probe(ctx context.Context, filepath string) (*result, error) {
// probeJPEG decodes the given file as JPEG and determines
// image details from the decoded JPEG using native Go code.
-func probeJPEG(file *os.File) (*result, error) {
+func probeJPEG(file fileReader) (*result, error) {
// Attempt to decode JPEG, adding back hdr magic.
cfg, err := jpeg.DecodeConfig(io.MultiReader(
@@ -129,7 +128,7 @@ func probeJPEG(file *os.File) (*result, error) {
//
// copied from github.com/disintegration/imaging
// but modified to optimize discard operations.
-func readOrientation(r *os.File) int {
+func readOrientation(r fileReader) int {
const (
markerAPP1 = 0xffe1
exifHeader = 0x45786966
diff --git a/internal/media/util.go b/internal/media/util.go
index d73206434..fbd232daa 100644
--- a/internal/media/util.go
+++ b/internal/media/util.go
@@ -24,10 +24,13 @@ import (
"io/fs"
"os"
"path"
+ "runtime"
+ "syscall"
"code.superseriousbusiness.org/gotosocial/internal/gtserror"
"codeberg.org/gruf/go-bytesize"
"codeberg.org/gruf/go-iotools"
+ "codeberg.org/gruf/go-mmap"
)
// media processing tmpdir.
@@ -82,15 +85,31 @@ func (af allowFiles) Open(name string) (fs.File, error) {
// Ffmpeg likes to read containing
// dir as '.'. Allow RO access here.
case ".":
- return openRead(file.dir)
+ return os.OpenFile(file.dir, os.O_RDONLY, 0)
}
}
return nil, os.ErrPermission
}
+// MmapThreshold defines the threshold file size (in bytes) for which
+// a call to OpenRead() will deem as big enough for a file to be worth
+// opening using an `mmap` syscall. This is a runtime initialized number
+// based on the number of available CPUs, as in concurrent conditions Go
+// can make optimizations for blocking `read` syscalls which scales with
+// the number of available goroutines it can have running at once.
+var mmapThreshold = mmap.Threshold{At: int64(runtime.NumCPU() * syscall.Getpagesize())}
+
+// fileReader is a type alias to the interface{} that
+// codeberg.org/gruf/go-mmap exposes, to make things a
+// little less visually confusing. this interfaces{}
+// abstracts away whether a (regular!) file has been
+// opened via os.OpenFile(..., RDONLY) or has been
+// mmapped into memory for access via byte slice.
+type fileReader = mmap.FileReader
+
// openRead opens the existing file at path for reads only.
-func openRead(path string) (*os.File, error) {
- return os.OpenFile(path, os.O_RDONLY, 0)
+func openRead(path string) (fileReader, error) {
+ return mmapThreshold.OpenRead(path)
}
// openWrite opens the (new!) file at path for read / writes.
diff --git a/vendor/codeberg.org/gruf/go-mmap/LICENSE b/vendor/codeberg.org/gruf/go-mmap/LICENSE
new file mode 100644
index 000000000..d6f08d0ab
--- /dev/null
+++ b/vendor/codeberg.org/gruf/go-mmap/LICENSE
@@ -0,0 +1,9 @@
+MIT License
+
+Copyright (c) gruf
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/vendor/codeberg.org/gruf/go-mmap/README.md b/vendor/codeberg.org/gruf/go-mmap/README.md
new file mode 100644
index 000000000..76fc5fccd
--- /dev/null
+++ b/vendor/codeberg.org/gruf/go-mmap/README.md
@@ -0,0 +1,3 @@
+# go-mmap
+
+Optimized large file reads in Go \ No newline at end of file
diff --git a/vendor/codeberg.org/gruf/go-mmap/fs.go b/vendor/codeberg.org/gruf/go-mmap/fs.go
new file mode 100644
index 000000000..8c47d84b0
--- /dev/null
+++ b/vendor/codeberg.org/gruf/go-mmap/fs.go
@@ -0,0 +1,104 @@
+package mmap
+
+import (
+ "io/fs"
+ "path"
+ "syscall"
+ "time"
+)
+
+type fileStat struct {
+ syscall.Stat_t
+ name string
+ mode fs.FileMode
+}
+
+func (s *fileStat) Name() string { return s.name }
+func (s *fileStat) IsDir() bool { return s.mode.IsDir() }
+func (s *fileStat) Mode() fs.FileMode { return s.mode }
+func (s *fileStat) Size() int64 { return s.Stat_t.Size }
+func (s *fileStat) ModTime() time.Time { return time.Unix(s.Stat_t.Mtim.Unix()) }
+func (s *fileStat) Sys() any { return &s.Stat_t }
+
+// open is a simple wrapper around syscall.Open().
+func open(filepath string, mode int, perm uint32) (fd int, err error) {
+ err = retryOnEINTR(func() (err error) {
+ fd, err = syscall.Open(filepath, mode, perm)
+ return
+ })
+ return
+}
+
+// stat is a simple wrapper around syscall.Stat().
+func stat(filepath string) (*fileStat, error) {
+ var stat fileStat
+ err := retryOnEINTR(func() error {
+ return syscall.Stat(filepath, &stat.Stat_t)
+ })
+ if err != nil {
+ return nil, err
+ }
+ stat.name = path.Base(filepath)
+ stat.mode = fs.FileMode(stat.Stat_t.Mode & 0777)
+ switch stat.Stat_t.Mode & syscall.S_IFMT {
+ case syscall.S_IFBLK:
+ stat.mode |= fs.ModeDevice
+ case syscall.S_IFCHR:
+ stat.mode |= fs.ModeDevice | fs.ModeCharDevice
+ case syscall.S_IFDIR:
+ stat.mode |= fs.ModeDir
+ case syscall.S_IFIFO:
+ stat.mode |= fs.ModeNamedPipe
+ case syscall.S_IFLNK:
+ stat.mode |= fs.ModeSymlink
+ case syscall.S_IFREG:
+ // nothing to do
+ case syscall.S_IFSOCK:
+ stat.mode |= fs.ModeSocket
+ }
+ if stat.Stat_t.Mode&syscall.S_ISGID != 0 {
+ stat.mode |= fs.ModeSetgid
+ }
+ if stat.Stat_t.Mode&syscall.S_ISUID != 0 {
+ stat.mode |= fs.ModeSetuid
+ }
+ if stat.Stat_t.Mode&syscall.S_ISVTX != 0 {
+ stat.mode |= fs.ModeSticky
+ }
+ return &stat, nil
+}
+
+// mmap is a simple wrapper around syscall.Mmap().
+func mmap(fd int, offset int64, length int, prot int, flags int) (b []byte, err error) {
+ err = retryOnEINTR(func() error {
+ b, err = syscall.Mmap(fd, offset, length, prot, flags)
+ return err
+ })
+ return
+}
+
+// munmap is a simple wrapper around syscall.Munmap().
+func munmap(b []byte) error {
+ return retryOnEINTR(func() error {
+ return syscall.Munmap(b)
+ })
+}
+
+// close_ is a simple wrapper around syscall.Close().
+func close_(fd int) error {
+ return retryOnEINTR(func() error {
+ return syscall.Close(fd)
+ })
+}
+
+// retryOnEINTR is a low-level filesystem function
+// for retrying syscalls on O_EINTR received.
+func retryOnEINTR(do func() error) error {
+ for {
+ err := do()
+ if err == syscall.EINTR {
+ continue
+ }
+ return err
+ }
+}
diff --git a/vendor/codeberg.org/gruf/go-mmap/mmap.go b/vendor/codeberg.org/gruf/go-mmap/mmap.go
new file mode 100644
index 000000000..797ed3e73
--- /dev/null
+++ b/vendor/codeberg.org/gruf/go-mmap/mmap.go
@@ -0,0 +1,142 @@
+package mmap
+
+import (
+ "errors"
+ "io"
+ "io/fs"
+ "runtime"
+ "syscall"
+)
+
+// MmapFile maps file at path into memory using syscall.mmap(),
+// and returns a protected MmapReader{} for accessing the mapped data.
+// Note that the mapped memory is not concurrency safe (other than
+// concurrent ReadAt() calls). Any other calls made concurrently to
+// Read() or Close() (including ReadAt()) require protection.
+func MmapFile(path string) (*MmappedFile, error) {
+
+ // Stat file information.
+ stat, err := stat(path)
+ if err != nil {
+ return nil, err
+ }
+
+ // Mmap file into memory.
+ return openMmap(path, stat)
+}
+
+func openMmap(path string, stat *fileStat) (*MmappedFile, error) {
+ if stat.Size() <= 0 {
+ // Empty file, no-op read.
+ return &MmappedFile{}, nil
+ }
+
+ // Check file data size is accessible.
+ if stat.Size() != int64(int(stat.Size())) {
+ return nil, errors.New("file is too large")
+ }
+
+ // Open file at path for read-only access.
+ fd, err := open(path, syscall.O_RDONLY, 0)
+ if err != nil {
+ return nil, err
+ }
+
+ // Map this file into memory as slice.
+ mem, err := mmap(fd, 0, int(stat.Size()),
+ syscall.PROT_READ, syscall.MAP_PRIVATE)
+
+ // Done with file.
+ _ = close_(fd)
+
+ if err != nil {
+ return nil, err
+ }
+
+ // Return as wrapped reader type.
+ return newMmapReader(mem, stat), nil
+}
+
+// newMmapReader wraps a mapped memory slice in an
+// MmappedFile{}, also setting a GC finalizer function.
+func newMmapReader(mem []byte, stat *fileStat) *MmappedFile {
+ r := &MmappedFile{b: mem, s: stat}
+ runtime.SetFinalizer(r, (*MmappedFile).Close)
+ return r
+}
+
+type MmappedFile struct {
+ b []byte // mapped memory
+ n int // read index
+ s *fileStat // file info
+}
+
+func (r *MmappedFile) Name() string {
+ return r.s.name
+}
+
+func (r *MmappedFile) Stat() (fs.FileInfo, error) {
+ return r.s, nil
+}
+
+func (r *MmappedFile) Read(b []byte) (n int, err error) {
+ if r.n >= len(r.b) {
+ return 0, io.EOF
+ }
+ n = copy(b, r.b[r.n:])
+ r.n += n
+ return
+}
+
+func (r *MmappedFile) ReadAt(b []byte, off int64) (n int, err error) {
+ if off > int64(len(r.b)) {
+ return 0, io.EOF
+ }
+ n = copy(b, r.b[off:])
+ return n, nil
+}
+
+func (r *MmappedFile) WriteTo(w io.Writer) (int64, error) {
+ if r.n >= len(r.b) {
+ return 0, io.EOF
+ }
+ n, err := w.Write(r.b[r.n:])
+ r.n += n
+ return int64(n), err
+}
+
+func (r *MmappedFile) Seek(off int64, whence int) (int64, error) {
+ var n int
+ switch whence {
+ case io.SeekCurrent:
+ n = r.n + int(off)
+ case io.SeekStart:
+ n = 0 + int(off)
+ case io.SeekEnd:
+ n = len(r.b) + int(off)
+ default:
+ return 0, errors.New("invalid argument")
+ }
+ if n < 0 || n > len(r.b) {
+ return 0, errors.New("invalid argument")
+ }
+ r.n = n
+ return int64(n), nil
+}
+
+func (r *MmappedFile) Len() int {
+ return len(r.b) - r.n
+}
+
+func (r *MmappedFile) Size() int64 {
+ return int64(len(r.b))
+}
+
+func (r *MmappedFile) Close() error {
+ if b := r.b; b != nil {
+ r.b = nil
+ runtime.SetFinalizer(r, nil)
+ return munmap(b)
+ }
+ return nil
+}
diff --git a/vendor/codeberg.org/gruf/go-mmap/open.go b/vendor/codeberg.org/gruf/go-mmap/open.go
new file mode 100644
index 000000000..c12a091d3
--- /dev/null
+++ b/vendor/codeberg.org/gruf/go-mmap/open.go
@@ -0,0 +1,62 @@
+package mmap
+
+import (
+ "io"
+ "io/fs"
+ "os"
+ "runtime"
+ "syscall"
+)
+
+// MmapThreshold defines the threshold file size (in bytes) for which
+// a call to OpenRead() will deem as big enough for a file to be worth
+// opening using an `mmap` syscall. This is a runtime initialized number
+// based on the number of available CPUs as in concurrent conditions Go
+// can make optimizations for blocking `read` syscalls which scales with
+// the number of available goroutines it can have running at once.
+var MmapThreshold = int64(runtime.NumCPU() * syscall.Getpagesize())
+
+// FileReader defines the base interface
+// of a readable file, whether accessed
+// via `read` or `mmap` syscalls.
+type FileReader interface {
+ fs.File
+ io.ReaderAt
+ io.WriterTo
+ io.Seeker
+ Name() string
+}
+
+// Threshold is a receiving type for OpenRead()
+// that allows defining a custom MmapThreshold.
+type Threshold struct{ At int64 }
+
+// OpenRead: see mmap.OpenRead().
+func (t Threshold) OpenRead(path string) (FileReader, error) {
+ stat, err := stat(path)
+ if err != nil {
+ return nil, err
+ }
+ if stat.Size() >= t.At {
+ return openMmap(path, stat)
+ } else {
+ return os.OpenFile(path, syscall.O_RDONLY, 0)
+ }
+}
+
+// OpenRead will open the file as read only (erroring if it does
+// not already exist). If the file at path is beyond 'MmapThreshold'
+// it will be opened for reads using an `mmap` syscall, by calling
+// MmappedRead(path). Else, it will be opened using os.OpenFile().
+//
+// Please note that the reader returned by this function is not
+// guaranteed to be concurrency-safe. Calls returned by os.OpenFile()
+// follow the usual standard library concurrency guarantees, but the
+// reader returned by MmappedRead() provides no concurrent protection.
+//
+// Also note that this may not always be faster! If the file you need
+// to open will be immediately drained to another file, TCP or Unix
+// connection, then the standard library will used optimized syscalls.
+func OpenRead(path string) (FileReader, error) {
+ return Threshold{MmapThreshold}.OpenRead(path)
+}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index c3c98820a..215a63879 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -274,6 +274,9 @@ codeberg.org/gruf/go-maps
# codeberg.org/gruf/go-mempool v0.0.0-20251003110531-b54adae66253
## explicit; go 1.24.0
codeberg.org/gruf/go-mempool
+# codeberg.org/gruf/go-mmap v0.0.0-20251105140602-5f31e9314dbf
+## explicit; go 1.20
+codeberg.org/gruf/go-mmap
# codeberg.org/gruf/go-mutexes v1.5.8
## explicit; go 1.24.0
codeberg.org/gruf/go-mutexes