From 3db689ad9b7a3df330632015449a4213c93f8c9e Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Wed, 18 Dec 2024 11:28:56 +0100 Subject: [PATCH] cmd/files: flush parent folders This is a mitigation to increased MFS memory usage in the course of many writes operations. The underlying issue is the unbounded growth of the mfs directory cache in boxo. In the latest boxo version, this cache can be cleared by calling Flush() on the folder. In order to trigger that, we call Flush() on the parent folder of the file/folder where the write-operations are happening. To flushing the parent folder allows it to grow unbounded. Then, any read operation to that folder or parents (i.e. stat), will trigger a sync-operation to match the cache to the underlying unixfs structure (and obtain the correct node-cid). This sync operation must visit every item in the cache. When the cache has grown too much, and the underlying unixfs-folder has switched into a HAMT, the operation can take minutes. Thus, we should clear the cache often and the Flush flag is a good indicator that we can let it go. Users can always run with --flush=false and flush at regular intervals during their MFS writes if they want to extract some performance. Fixes #8694, #10588. --- core/commands/files.go | 62 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/core/commands/files.go b/core/commands/files.go index 40e4f3d7aea..e997ccff89d 100644 --- a/core/commands/files.go +++ b/core/commands/files.go @@ -491,10 +491,14 @@ being GC'ed. } if flush { - _, err := mfs.FlushPath(req.Context, nd.FilesRoot, dst) - if err != nil { + if _, err := mfs.FlushPath(req.Context, nd.FilesRoot, dst); err != nil { return fmt.Errorf("cp: cannot flush the created file %s: %s", dst, err) } + // Flush parent to clear directory cache and free memory. + parent := gopath.Dir(dst) + if _, err = mfs.FlushPath(req.Context, nd.FilesRoot, parent); err != nil { + return fmt.Errorf("cp: cannot flush the created file's parent folder %s: %s", dst, err) + } } return nil @@ -792,10 +796,30 @@ Example: } err = mfs.Mv(nd.FilesRoot, src, dst) - if err == nil && flush { - _, err = mfs.FlushPath(req.Context, nd.FilesRoot, "/") + if err != nil { + return err } - return err + if flush { + parentSrc := gopath.Dir(src) + parentDst := gopath.Dir(dst) + // Flush parent to clear directory cache and free memory. + if _, err = mfs.FlushPath(req.Context, nd.FilesRoot, parentDst); err != nil { + return fmt.Errorf("cp: cannot flush the destination file's parent folder %s: %s", dst, err) + } + + // Avoid re-flushing when moving within the same folder. + if parentSrc != parentDst { + if _, err = mfs.FlushPath(req.Context, nd.FilesRoot, parentSrc); err != nil { + return fmt.Errorf("cp: cannot flush the source's file's parent folder %s: %s", dst, err) + } + } + + if _, err = mfs.FlushPath(req.Context, nd.FilesRoot, "/"); err != nil { + return err + } + } + + return nil }, } @@ -943,6 +967,17 @@ See '--to-files' in 'ipfs add --help' for more information. flog.Error("files: error closing file mfs file descriptor", err) } } + if flush { + // Flush parent to clear directory cache and free memory. + parent := gopath.Dir(path) + if _, err := mfs.FlushPath(req.Context, nd.FilesRoot, parent); err != nil { + if retErr == nil { + retErr = err + } else { + flog.Error("files: flushing the parent folder", err) + } + } + } }() if trunc { @@ -1105,11 +1140,20 @@ Change the CID version or hash function of the root node of a given path. return err } - err = updatePath(nd.FilesRoot, path, prefix) - if err == nil && flush { - _, err = mfs.FlushPath(req.Context, nd.FilesRoot, path) + if err := updatePath(nd.FilesRoot, path, prefix); err != nil { + return err } - return err + if flush { + if _, err = mfs.FlushPath(req.Context, nd.FilesRoot, path); err != nil { + return err + } + // Flush parent to clear directory cache and free memory. + parent := gopath.Dir(path) + if _, err = mfs.FlushPath(req.Context, nd.FilesRoot, parent); err != nil { + return err + } + } + return nil }, }