summaryrefslogtreecommitdiff
path: root/vendor/git.iim.gay/grufwub/go-store/storage/block.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/git.iim.gay/grufwub/go-store/storage/block.go')
-rw-r--r--vendor/git.iim.gay/grufwub/go-store/storage/block.go785
1 files changed, 785 insertions, 0 deletions
diff --git a/vendor/git.iim.gay/grufwub/go-store/storage/block.go b/vendor/git.iim.gay/grufwub/go-store/storage/block.go
new file mode 100644
index 000000000..023b83886
--- /dev/null
+++ b/vendor/git.iim.gay/grufwub/go-store/storage/block.go
@@ -0,0 +1,785 @@
+package storage
+
+import (
+ "crypto/sha256"
+ "io"
+ "io/fs"
+ "os"
+ "strings"
+ "sync"
+ "syscall"
+
+ "git.iim.gay/grufwub/fastpath"
+ "git.iim.gay/grufwub/go-bytes"
+ "git.iim.gay/grufwub/go-errors"
+ "git.iim.gay/grufwub/go-hashenc"
+ "git.iim.gay/grufwub/go-store/util"
+)
+
+var (
+ nodePathPrefix = "node/"
+ blockPathPrefix = "block/"
+)
+
+// DefaultBlockConfig is the default BlockStorage configuration
+var DefaultBlockConfig = &BlockConfig{
+ BlockSize: 1024 * 16,
+ WriteBufSize: 4096,
+ Overwrite: false,
+ Compression: NoCompression(),
+}
+
+// BlockConfig defines options to be used when opening a BlockStorage
+type BlockConfig struct {
+ // BlockSize is the chunking size to use when splitting and storing blocks of data
+ BlockSize int
+
+ // WriteBufSize is the buffer size to use when writing file streams (PutStream)
+ WriteBufSize int
+
+ // Overwrite allows overwriting values of stored keys in the storage
+ Overwrite bool
+
+ // Compression is the Compressor to use when reading / writing files, default is no compression
+ Compression Compressor
+}
+
+// getBlockConfig returns a valid BlockConfig for supplied ptr
+func getBlockConfig(cfg *BlockConfig) BlockConfig {
+ // If nil, use default
+ if cfg == nil {
+ cfg = DefaultBlockConfig
+ }
+
+ // Assume nil compress == none
+ if cfg.Compression == nil {
+ cfg.Compression = NoCompression()
+ }
+
+ // Assume 0 chunk size == use default
+ if cfg.BlockSize < 1 {
+ cfg.BlockSize = DefaultBlockConfig.BlockSize
+ }
+
+ // Assume 0 buf size == use default
+ if cfg.WriteBufSize < 1 {
+ cfg.WriteBufSize = DefaultDiskConfig.WriteBufSize
+ }
+
+ // Return owned config copy
+ return BlockConfig{
+ BlockSize: cfg.BlockSize,
+ WriteBufSize: cfg.WriteBufSize,
+ Overwrite: cfg.Overwrite,
+ Compression: cfg.Compression,
+ }
+}
+
+// BlockStorage is a Storage implementation that stores input data as chunks on
+// a filesystem. Each value is chunked into blocks of configured size and these
+// blocks are stored with name equal to their base64-encoded SHA256 hash-sum. A
+// "node" file is finally created containing an array of hashes contained within
+// this value
+type BlockStorage struct {
+ path string // path is the root path of this store
+ blockPath string // blockPath is the joined root path + block path prefix
+ nodePath string // nodePath is the joined root path + node path prefix
+ config BlockConfig // cfg is the supplied configuration for this store
+ hashPool sync.Pool // hashPool is this store's hashEncoder pool
+
+ // NOTE:
+ // BlockStorage does not need to lock each of the underlying block files
+ // as the filename itself directly relates to the contents. If there happens
+ // to be an overwrite, it will just be of the same data since the filename is
+ // the hash of the data.
+}
+
+// OpenBlock opens a BlockStorage instance for given folder path and configuration
+func OpenBlock(path string, cfg *BlockConfig) (*BlockStorage, error) {
+ // Acquire path builder
+ pb := util.AcquirePathBuilder()
+ defer util.ReleasePathBuilder(pb)
+
+ // Clean provided path, ensure ends in '/' (should
+ // be dir, this helps with file path trimming later)
+ path = pb.Clean(path) + "/"
+
+ // Get checked config
+ config := getBlockConfig(cfg)
+
+ // Attempt to open path
+ file, err := os.OpenFile(path, defaultFileROFlags, defaultDirPerms)
+ if err != nil {
+ // If not a not-exist error, return
+ if !os.IsNotExist(err) {
+ return nil, err
+ }
+
+ // Attempt to make store path dirs
+ err = os.MkdirAll(path, defaultDirPerms)
+ if err != nil {
+ return nil, err
+ }
+
+ // Reopen dir now it's been created
+ file, err = os.OpenFile(path, defaultFileROFlags, defaultDirPerms)
+ if err != nil {
+ return nil, err
+ }
+ }
+ defer file.Close()
+
+ // Double check this is a dir (NOT a file!)
+ stat, err := file.Stat()
+ if err != nil {
+ return nil, err
+ } else if !stat.IsDir() {
+ return nil, errPathIsFile
+ }
+
+ // Return new BlockStorage
+ return &BlockStorage{
+ path: path,
+ blockPath: pb.Join(path, blockPathPrefix),
+ nodePath: pb.Join(path, nodePathPrefix),
+ config: config,
+ hashPool: sync.Pool{
+ New: func() interface{} {
+ return newHashEncoder()
+ },
+ },
+ }, nil
+}
+
+// Clean implements storage.Clean()
+func (st *BlockStorage) Clean() error {
+ nodes := map[string]*node{}
+
+ // Acquire path builder
+ pb := fastpath.AcquireBuilder()
+ defer fastpath.ReleaseBuilder(pb)
+
+ // Walk nodes dir for entries
+ onceErr := errors.OnceError{}
+ err := util.WalkDir(pb, st.nodePath, func(npath string, fsentry fs.DirEntry) {
+ // Only deal with regular files
+ if !fsentry.Type().IsRegular() {
+ return
+ }
+
+ // Stop if we hit error previously
+ if onceErr.IsSet() {
+ return
+ }
+
+ // Get joined node path name
+ npath = pb.Join(npath, fsentry.Name())
+
+ // Attempt to open RO file
+ file, err := open(npath, defaultFileROFlags)
+ if err != nil {
+ onceErr.Store(err)
+ return
+ }
+ defer file.Close()
+
+ // Alloc new Node + acquire hash buffer for writes
+ hbuf := util.AcquireBuffer(encodedHashLen)
+ defer util.ReleaseBuffer(hbuf)
+ node := node{}
+
+ // Write file contents to node
+ _, err = io.CopyBuffer(
+ &nodeWriter{
+ node: &node,
+ buf: hbuf,
+ },
+ file,
+ nil,
+ )
+ if err != nil {
+ onceErr.Store(err)
+ return
+ }
+
+ // Append to nodes slice
+ nodes[fsentry.Name()] = &node
+ })
+
+ // Handle errors (though nodePath may not have been created yet)
+ if err != nil && !os.IsNotExist(err) {
+ return err
+ } else if onceErr.IsSet() {
+ return onceErr.Load()
+ }
+
+ // Walk blocks dir for entries
+ onceErr.Reset()
+ err = util.WalkDir(pb, st.blockPath, func(bpath string, fsentry fs.DirEntry) {
+ // Only deal with regular files
+ if !fsentry.Type().IsRegular() {
+ return
+ }
+
+ // Stop if we hit error previously
+ if onceErr.IsSet() {
+ return
+ }
+
+ inUse := false
+ for key, node := range nodes {
+ if node.removeHash(fsentry.Name()) {
+ if len(node.hashes) < 1 {
+ // This node contained hash, and after removal is now empty.
+ // Remove this node from our tracked nodes slice
+ delete(nodes, key)
+ }
+ inUse = true
+ }
+ }
+
+ // Block hash is used by node
+ if inUse {
+ return
+ }
+
+ // Get joined block path name
+ bpath = pb.Join(bpath, fsentry.Name())
+
+ // Remove this unused block path
+ err := os.Remove(bpath)
+ if err != nil {
+ onceErr.Store(err)
+ return
+ }
+ })
+
+ // Handle errors (though blockPath may not have been created yet)
+ if err != nil && !os.IsNotExist(err) {
+ return err
+ } else if onceErr.IsSet() {
+ return onceErr.Load()
+ }
+
+ // If there are nodes left at this point, they are corrupt
+ // (i.e. they're referencing block hashes that don't exist)
+ if len(nodes) > 0 {
+ nodeKeys := []string{}
+ for key := range nodes {
+ nodeKeys = append(nodeKeys, key)
+ }
+ return errCorruptNodes.Extend("%v", nodeKeys)
+ }
+
+ return nil
+}
+
+// ReadBytes implements Storage.ReadBytes()
+func (st *BlockStorage) ReadBytes(key string) ([]byte, error) {
+ // Get stream reader for key
+ rc, err := st.ReadStream(key)
+ if err != nil {
+ return nil, err
+ }
+
+ // Read all bytes and return
+ return io.ReadAll(rc)
+}
+
+// ReadStream implements Storage.ReadStream()
+func (st *BlockStorage) ReadStream(key string) (io.ReadCloser, error) {
+ // Get node file path for key
+ npath, err := st.nodePathForKey(key)
+ if err != nil {
+ return nil, err
+ }
+
+ // Attempt to open RO file
+ file, err := open(npath, defaultFileROFlags)
+ if err != nil {
+ return nil, err
+ }
+ defer file.Close()
+
+ // Alloc new Node + acquire hash buffer for writes
+ hbuf := util.AcquireBuffer(encodedHashLen)
+ defer util.ReleaseBuffer(hbuf)
+ node := node{}
+
+ // Write file contents to node
+ _, err = io.CopyBuffer(
+ &nodeWriter{
+ node: &node,
+ buf: hbuf,
+ },
+ file,
+ nil,
+ )
+ if err != nil {
+ return nil, err
+ }
+
+ // Return new block reader
+ return util.NopReadCloser(&blockReader{
+ storage: st,
+ node: &node,
+ }), nil
+}
+
+func (st *BlockStorage) readBlock(key string) ([]byte, error) {
+ // Get block file path for key
+ bpath := st.blockPathForKey(key)
+
+ // Attempt to open RO file
+ file, err := open(bpath, defaultFileROFlags)
+ if err != nil {
+ return nil, err
+ }
+ defer file.Close()
+
+ // Wrap the file in a compressor
+ cFile, err := st.config.Compression.Reader(file)
+ if err != nil {
+ return nil, err
+ }
+ defer cFile.Close()
+
+ // Read the entire file
+ return io.ReadAll(cFile)
+}
+
+// WriteBytes implements Storage.WriteBytes()
+func (st *BlockStorage) WriteBytes(key string, value []byte) error {
+ return st.WriteStream(key, bytes.NewReader(value))
+}
+
+// WriteStream implements Storage.WriteStream()
+func (st *BlockStorage) WriteStream(key string, r io.Reader) error {
+ // Get node file path for key
+ npath, err := st.nodePathForKey(key)
+ if err != nil {
+ return err
+ }
+
+ // Check if this exists
+ ok, err := stat(key)
+ if err != nil {
+ return err
+ }
+
+ // Check if we allow overwrites
+ if ok && !st.config.Overwrite {
+ return ErrAlreadyExists
+ }
+
+ // Ensure nodes dir (and any leading up to) exists
+ err = os.MkdirAll(st.nodePath, defaultDirPerms)
+ if err != nil {
+ return err
+ }
+
+ // Ensure blocks dir (and any leading up to) exists
+ err = os.MkdirAll(st.blockPath, defaultDirPerms)
+ if err != nil {
+ return err
+ }
+
+ // Alloc new node
+ node := node{}
+
+ // Acquire HashEncoder
+ hc := st.hashPool.Get().(*hashEncoder)
+ defer st.hashPool.Put(hc)
+
+ // Create new waitgroup and OnceError for
+ // goroutine error tracking and propagating
+ wg := sync.WaitGroup{}
+ onceErr := errors.OnceError{}
+
+loop:
+ for !onceErr.IsSet() {
+ // Fetch new buffer for this loop
+ buf := util.AcquireBuffer(st.config.BlockSize)
+ buf.Grow(st.config.BlockSize)
+
+ // Read next chunk
+ n, err := io.ReadFull(r, buf.B)
+ switch err {
+ case nil, io.ErrUnexpectedEOF:
+ // do nothing
+ case io.EOF:
+ util.ReleaseBuffer(buf)
+ break loop
+ default:
+ util.ReleaseBuffer(buf)
+ return err
+ }
+
+ // Hash the encoded data
+ sum := hc.EncodeSum(buf.B)
+
+ // Append to the node's hashes
+ node.hashes = append(node.hashes, sum.String())
+
+ // If already on disk, skip
+ has, err := st.statBlock(sum.StringPtr())
+ if err != nil {
+ util.ReleaseBuffer(buf)
+ return err
+ } else if has {
+ util.ReleaseBuffer(buf)
+ continue loop
+ }
+
+ // Write in separate goroutine
+ wg.Add(1)
+ go func() {
+ // Defer buffer release + signal done
+ defer func() {
+ util.ReleaseBuffer(buf)
+ wg.Done()
+ }()
+
+ // Write block to store at hash
+ err = st.writeBlock(sum.StringPtr(), buf.B[:n])
+ if err != nil {
+ onceErr.Store(err)
+ return
+ }
+ }()
+
+ // We reached EOF
+ if n < buf.Len() {
+ break loop
+ }
+ }
+
+ // Wait, check errors
+ wg.Wait()
+ if onceErr.IsSet() {
+ return onceErr.Load()
+ }
+
+ // If no hashes created, return
+ if len(node.hashes) < 1 {
+ return errNoHashesWritten
+ }
+
+ // Prepare to swap error if need-be
+ errSwap := errSwapNoop
+
+ // Build file RW flags
+ // NOTE: we performed an initial check for
+ // this before writing blocks, but if
+ // the utilizer of this storage didn't
+ // correctly mutex protect this key then
+ // someone may have beaten us to the
+ // punch at writing the node file.
+ flags := defaultFileRWFlags
+ if !st.config.Overwrite {
+ flags |= syscall.O_EXCL
+
+ // Catch + replace err exist
+ errSwap = errSwapExist
+ }
+
+ // Attempt to open RW file
+ file, err := open(npath, flags)
+ if err != nil {
+ return errSwap(err)
+ }
+ defer file.Close()
+
+ // Acquire write buffer
+ buf := util.AcquireBuffer(st.config.WriteBufSize)
+ defer util.ReleaseBuffer(buf)
+ buf.Grow(st.config.WriteBufSize)
+
+ // Finally, write data to file
+ _, err = io.CopyBuffer(file, &nodeReader{node: &node}, nil)
+ return err
+}
+
+// writeBlock writes the block with hash and supplied value to the filesystem
+func (st *BlockStorage) writeBlock(hash string, value []byte) error {
+ // Get block file path for key
+ bpath := st.blockPathForKey(hash)
+
+ // Attempt to open RW file
+ file, err := open(bpath, defaultFileRWFlags)
+ if err != nil {
+ if err == ErrAlreadyExists {
+ err = nil /* race issue describe in struct NOTE */
+ }
+ return err
+ }
+ defer file.Close()
+
+ // Wrap the file in a compressor
+ cFile, err := st.config.Compression.Writer(file)
+ if err != nil {
+ return err
+ }
+ defer cFile.Close()
+
+ // Write value to file
+ _, err = cFile.Write(value)
+ return err
+}
+
+// statBlock checks for existence of supplied block hash
+func (st *BlockStorage) statBlock(hash string) (bool, error) {
+ return stat(st.blockPathForKey(hash))
+}
+
+// Stat implements Storage.Stat()
+func (st *BlockStorage) Stat(key string) (bool, error) {
+ // Get node file path for key
+ kpath, err := st.nodePathForKey(key)
+ if err != nil {
+ return false, err
+ }
+
+ // Check for file on disk
+ return stat(kpath)
+}
+
+// Remove implements Storage.Remove()
+func (st *BlockStorage) Remove(key string) error {
+ // Get node file path for key
+ kpath, err := st.nodePathForKey(key)
+ if err != nil {
+ return err
+ }
+
+ // Attempt to remove file
+ return os.Remove(kpath)
+}
+
+// WalkKeys implements Storage.WalkKeys()
+func (st *BlockStorage) WalkKeys(opts *WalkKeysOptions) error {
+ // Acquire path builder
+ pb := fastpath.AcquireBuilder()
+ defer fastpath.ReleaseBuilder(pb)
+
+ // Walk dir for entries
+ return util.WalkDir(pb, st.nodePath, func(npath string, fsentry fs.DirEntry) {
+ // Only deal with regular files
+ if fsentry.Type().IsRegular() {
+ opts.WalkFn(entry(fsentry.Name()))
+ }
+ })
+}
+
+// nodePathForKey calculates the node file path for supplied key
+func (st *BlockStorage) nodePathForKey(key string) (string, error) {
+ // Path separators are illegal
+ if strings.Contains(key, "/") {
+ return "", ErrInvalidKey
+ }
+
+ // Acquire path builder
+ pb := util.AcquirePathBuilder()
+ defer util.ReleasePathBuilder(pb)
+
+ // Return joined + cleaned node-path
+ return pb.Join(st.nodePath, key), nil
+}
+
+// blockPathForKey calculates the block file path for supplied hash
+func (st *BlockStorage) blockPathForKey(hash string) string {
+ pb := util.AcquirePathBuilder()
+ defer util.ReleasePathBuilder(pb)
+ return pb.Join(st.blockPath, hash)
+}
+
+// hashSeparator is the separating byte between block hashes
+const hashSeparator = byte(':')
+
+// node represents the contents of a node file in storage
+type node struct {
+ hashes []string
+}
+
+// removeHash attempts to remove supplied block hash from the node's hash array
+func (n *node) removeHash(hash string) bool {
+ haveDropped := false
+ for i := 0; i < len(n.hashes); {
+ if n.hashes[i] == hash {
+ // Drop this hash from slice
+ n.hashes = append(n.hashes[:i], n.hashes[i+1:]...)
+ haveDropped = true
+ } else {
+ // Continue iter
+ i++
+ }
+ }
+ return haveDropped
+}
+
+// nodeReader is an io.Reader implementation for the node file representation,
+// which is useful when calculated node file is being written to the store
+type nodeReader struct {
+ node *node
+ idx int
+ last int
+}
+
+func (r *nodeReader) Read(b []byte) (int, error) {
+ n := 0
+
+ // '-1' means we missed writing
+ // hash separator on last iteration
+ if r.last == -1 {
+ b[n] = hashSeparator
+ n++
+ r.last = 0
+ }
+
+ for r.idx < len(r.node.hashes) {
+ hash := r.node.hashes[r.idx]
+
+ // Copy into buffer + update read count
+ m := copy(b[n:], hash[r.last:])
+ n += m
+
+ // If incomplete copy, return here
+ if m < len(hash)-r.last {
+ r.last = m
+ return n, nil
+ }
+
+ // Check we can write last separator
+ if n == len(b) {
+ r.last = -1
+ return n, nil
+ }
+
+ // Write separator, iter, reset
+ b[n] = hashSeparator
+ n++
+ r.idx++
+ r.last = 0
+ }
+
+ // We reached end of hashes
+ return n, io.EOF
+}
+
+// nodeWriter is an io.Writer implementation for the node file representation,
+// which is useful when calculated node file is being read from the store
+type nodeWriter struct {
+ node *node
+ buf *bytes.Buffer
+}
+
+func (w *nodeWriter) Write(b []byte) (int, error) {
+ n := 0
+
+ for {
+ // Find next hash separator position
+ idx := bytes.IndexByte(b[n:], hashSeparator)
+ if idx == -1 {
+ // Check we shouldn't be expecting it
+ if w.buf.Len() > encodedHashLen {
+ return n, errInvalidNode
+ }
+
+ // Write all contents to buffer
+ w.buf.Write(b[n:])
+ return len(b), nil
+ }
+
+ // Found hash separator, write
+ // current buf contents to Node hashes
+ w.buf.Write(b[n : n+idx])
+ n += idx + 1
+ if w.buf.Len() != encodedHashLen {
+ return n, errInvalidNode
+ }
+
+ // Append to hashes & reset
+ w.node.hashes = append(w.node.hashes, w.buf.String())
+ w.buf.Reset()
+ }
+}
+
+// blockReader is an io.Reader implementation for the combined, linked block
+// data contained with a node file. Basically, this allows reading value data
+// from the store for a given node file
+type blockReader struct {
+ storage *BlockStorage
+ node *node
+ buf []byte
+ prev int
+}
+
+func (r *blockReader) Read(b []byte) (int, error) {
+ n := 0
+
+ // Data left in buf, copy as much as we
+ // can into supplied read buffer
+ if r.prev < len(r.buf)-1 {
+ n += copy(b, r.buf[r.prev:])
+ r.prev += n
+ if n >= len(b) {
+ return n, nil
+ }
+ }
+
+ for {
+ // Check we have any hashes left
+ if len(r.node.hashes) < 1 {
+ return n, io.EOF
+ }
+
+ // Get next key from slice
+ key := r.node.hashes[0]
+ r.node.hashes = r.node.hashes[1:]
+
+ // Attempt to fetch next batch of data
+ var err error
+ r.buf, err = r.storage.readBlock(key)
+ if err != nil {
+ return n, err
+ }
+ r.prev = 0
+
+ // Copy as much as can from new buffer
+ m := copy(b[n:], r.buf)
+ r.prev += m
+ n += m
+
+ // If we hit end of supplied buf, return
+ if n >= len(b) {
+ return n, nil
+ }
+ }
+}
+
+// hashEncoder is a HashEncoder with built-in encode buffer
+type hashEncoder struct {
+ henc hashenc.HashEncoder
+ ebuf []byte
+}
+
+// encodedHashLen is the once-calculated encoded hash-sum length
+var encodedHashLen = hashenc.Base64().EncodedLen(
+ sha256.New().Size(),
+)
+
+// newHashEncoder returns a new hashEncoder instance
+func newHashEncoder() *hashEncoder {
+ hash := sha256.New()
+ enc := hashenc.Base64()
+ return &hashEncoder{
+ henc: hashenc.New(hash, enc),
+ ebuf: make([]byte, enc.EncodedLen(hash.Size())),
+ }
+}
+
+// EncodeSum encodes the src data and returns resulting bytes, only valid until next call to EncodeSum()
+func (henc *hashEncoder) EncodeSum(src []byte) bytes.Bytes {
+ henc.henc.EncodeSum(henc.ebuf, src)
+ return bytes.ToBytes(henc.ebuf)
+}