From bd12191411d0ea912c7c9e2ce7298dd1ac25f7f9 Mon Sep 17 00:00:00 2001 From: Rick Newton-Rogers Date: Mon, 5 Aug 2024 12:04:52 +0100 Subject: [PATCH] Clone files on Darwin rather than copying them (#2823) ### Motivation: Copying regular files on Darwin was taking more time than it needed to because it was manually opening files and copying over bites rather than making use of the system's ability to create CoW clones. ### Modifications: The primary change is to switch `copyItem` to be backed by Darwin's `copyfile` method rather than `fcopyfile`. `fcopyfile` ignores the flag we were passing in to indicate that the file should be cloned if possible, whereas `copyfile` is a higher-level API which takes this into account. This change exposes a `copyfile` wrapper method as a new public function. This change also adds a workaround to a seeming compiler bug on channel options blocking building tests on more recent Xcodes. ### Result: Copying files on Darwin will be faster. --- Sources/NIOFileSystem/FileSystem.swift | 42 ++++++++------- .../FileSystemError+Syscall.swift | 53 ++++++++++++++++++- .../Internal/System Calls/Syscall.swift | 18 +++++++ .../Internal/System Calls/Syscalls.swift | 19 ++++++- .../FileSystemTests.swift | 14 ++--- .../FileSystemErrorTests.swift | 19 +++++++ .../Internal/SyscallTests.swift | 14 +++++ 7 files changed, 150 insertions(+), 29 deletions(-) diff --git a/Sources/NIOFileSystem/FileSystem.swift b/Sources/NIOFileSystem/FileSystem.swift index b18524a9fe..291102d014 100644 --- a/Sources/NIOFileSystem/FileSystem.swift +++ b/Sources/NIOFileSystem/FileSystem.swift @@ -1172,6 +1172,27 @@ extension FileSystem { ) } + #if canImport(Darwin) + // COPYFILE_CLONE clones the file if possible and will fallback to doing a copy. + // COPYFILE_ALL is shorthand for: + // COPYFILE_STAT | COPYFILE_ACL | COPYFILE_XATTR | COPYFILE_DATA + let flags = copyfile_flags_t(COPYFILE_CLONE) | copyfile_flags_t(COPYFILE_ALL) + return Libc.copyfile( + from: sourcePath, + to: destinationPath, + state: nil, + flags: flags + ).mapError { errno in + FileSystemError.copyfile( + errno: errno, + from: sourcePath, + to: destinationPath, + location: .here() + ) + } + + #elseif canImport(Glibc) || canImport(Musl) || canImport(Bionic) + let openSourceResult = self._openFile( forReadingAt: sourcePath, options: OpenOptions.Read(followSymbolicLinks: true) @@ -1231,25 +1252,6 @@ extension FileSystem { let copyResult: Result copyResult = source.fileHandle.systemFileHandle.sendableView._withUnsafeDescriptorResult { sourceFD in destination.fileHandle.systemFileHandle.sendableView._withUnsafeDescriptorResult { destinationFD in - #if canImport(Darwin) - // COPYFILE_CLONE clones the file if possible and will fallback to doing a copy. - // COPYFILE_ALL is shorthand for: - // COPYFILE_STAT | COPYFILE_ACL | COPYFILE_XATTR | COPYFILE_DATA - let flags = copyfile_flags_t(COPYFILE_CLONE) | copyfile_flags_t(COPYFILE_ALL) - return Libc.fcopyfile( - from: sourceFD, - to: destinationFD, - state: nil, - flags: flags - ).mapError { errno in - FileSystemError.fcopyfile( - errno: errno, - from: sourcePath, - to: destinationPath, - location: .here() - ) - } - #elseif canImport(Glibc) || canImport(Musl) || canImport(Bionic) var offset = 0 while offset < sourceInfo.size { @@ -1277,7 +1279,6 @@ extension FileSystem { } } return .success(()) - #endif } onUnavailable: { makeOnUnavailableError(path: destinationPath, location: .here()) } @@ -1287,6 +1288,7 @@ extension FileSystem { let closeResult = destination.fileHandle.systemFileHandle.sendableView._close(materialize: true) return copyResult.flatMap { closeResult } + #endif } private func copySymbolicLink( diff --git a/Sources/NIOFileSystem/FileSystemError+Syscall.swift b/Sources/NIOFileSystem/FileSystemError+Syscall.swift index a9309abdae..633b6dc7e1 100644 --- a/Sources/NIOFileSystem/FileSystemError+Syscall.swift +++ b/Sources/NIOFileSystem/FileSystemError+Syscall.swift @@ -992,6 +992,38 @@ extension FileSystemError { from sourcePath: FilePath, to destinationPath: FilePath, location: SourceLocation + ) -> Self { + Self._copyfile( + "fcopyfile", + errno: errno, + from: sourcePath, + to: destinationPath, + location: location + ) + } + + @_spi(Testing) + public static func copyfile( + errno: Errno, + from sourcePath: FilePath, + to destinationPath: FilePath, + location: SourceLocation + ) -> Self { + Self._copyfile( + "copyfile", + errno: errno, + from: sourcePath, + to: destinationPath, + location: location + ) + } + + private static func _copyfile( + _ name: String, + errno: Errno, + from sourcePath: FilePath, + to destinationPath: FilePath, + location: SourceLocation ) -> Self { let code: Code let message: String @@ -1016,6 +1048,25 @@ extension FileSystemError { Can't copy file from '\(sourcePath)' to '\(destinationPath)', the destination \ path already exists. """ + case .fileExists: + code = .fileAlreadyExists + message = """ + Unable to create file at path '\(destinationPath)', no existing file options were set \ + which implies that no file should exist but a file already exists at the \ + specified path. + """ + case .tooManyOpenFiles: + code = .unavailable + message = """ + Unable to open the source ('\(sourcePath)') or destination ('\(destinationPath)') files, \ + too many file descriptors are open. + """ + case .noSuchFileOrDirectory: + code = .notFound + message = """ + Unable to open or create file at path '\(sourcePath)', either a component of the \ + path did not exist or the named file to be opened did not exist. + """ default: code = .unknown message = "Can't copy file from '\(sourcePath)' to '\(destinationPath)'." @@ -1024,7 +1075,7 @@ extension FileSystemError { return FileSystemError( code: code, message: message, - systemCall: "fcopyfile", + systemCall: name, errno: errno, location: location ) diff --git a/Sources/NIOFileSystem/Internal/System Calls/Syscall.swift b/Sources/NIOFileSystem/Internal/System Calls/Syscall.swift index 515e59b3cf..0a80b9f1dd 100644 --- a/Sources/NIOFileSystem/Internal/System Calls/Syscall.swift +++ b/Sources/NIOFileSystem/Internal/System Calls/Syscall.swift @@ -313,6 +313,24 @@ public enum Libc { } #endif + #if canImport(Darwin) + @_spi(Testing) + public static func copyfile( + from source: FilePath, + to destination: FilePath, + state: copyfile_state_t?, + flags: copyfile_flags_t + ) -> Result { + source.withPlatformString { sourcePath in + destination.withPlatformString { destinationPath in + nothingOrErrno(retryOnInterrupt: false) { + libc_copyfile(sourcePath, destinationPath, state, flags) + } + } + } + } + #endif + @_spi(Testing) public static func remove( _ path: FilePath diff --git a/Sources/NIOFileSystem/Internal/System Calls/Syscalls.swift b/Sources/NIOFileSystem/Internal/System Calls/Syscalls.swift index 247f4dd798..1e9c166132 100644 --- a/Sources/NIOFileSystem/Internal/System Calls/Syscalls.swift +++ b/Sources/NIOFileSystem/Internal/System Calls/Syscalls.swift @@ -400,7 +400,7 @@ internal func libc_remove( } #if canImport(Darwin) -/// copyfile(3): Copy a file from one file to another. +/// fcopyfile(3): Copy a file from one file to another. internal func libc_fcopyfile( _ from: CInt, _ to: CInt, @@ -416,6 +416,23 @@ internal func libc_fcopyfile( } #endif +#if canImport(Darwin) +/// copyfile(3): Copy a file from one file to another. +internal func libc_copyfile( + _ from: UnsafePointer, + _ to: UnsafePointer, + _ state: copyfile_state_t?, + _ flags: copyfile_flags_t +) -> CInt { + #if ENABLE_MOCKING + if mockingEnabled { + return mock(from, to, state, flags) + } + #endif + return copyfile(from, to, state, flags) +} +#endif + /// getcwd(3): Get working directory pathname internal func libc_getcwd( _ buffer: UnsafeMutablePointer, diff --git a/Tests/NIOFileSystemIntegrationTests/FileSystemTests.swift b/Tests/NIOFileSystemIntegrationTests/FileSystemTests.swift index d952c2760f..fd17cdc1f3 100644 --- a/Tests/NIOFileSystemIntegrationTests/FileSystemTests.swift +++ b/Tests/NIOFileSystemIntegrationTests/FileSystemTests.swift @@ -659,14 +659,14 @@ final class FileSystemTests: XCTestCase { /// This is is not quite the same as sequential, different code paths are used. /// Tests using this ensure use of the parallel paths (which are more complex) while keeping actual /// parallelism to minimal levels to make debugging simpler. - private let minimalParallel: CopyStrategy = try! .parallel(maxDescriptors: 2) + private static let minimalParallel: CopyStrategy = try! .parallel(maxDescriptors: 2) func testCopyEmptyDirectorySequential() async throws { try await testCopyEmptyDirectory(.sequential) } func testCopyEmptyDirectoryParallelMinimal() async throws { - try await testCopyEmptyDirectory(minimalParallel) + try await testCopyEmptyDirectory(Self.minimalParallel) } func testCopyEmptyDirectoryParallelDefault() async throws { @@ -690,7 +690,7 @@ final class FileSystemTests: XCTestCase { } func testCopyOnGeneratedTreeStructureParallelMinimal() async throws { - try await testAnyCopyStrategyOnGeneratedTreeStructure(minimalParallel) + try await testAnyCopyStrategyOnGeneratedTreeStructure(Self.minimalParallel) } func testCopyOnGeneratedTreeStructureParallelDefault() async throws { @@ -737,7 +737,7 @@ final class FileSystemTests: XCTestCase { } func testCopySelectivelyParallelMinimal() async throws { - try await testCopySelectively(minimalParallel) + try await testCopySelectively(Self.minimalParallel) } func testCopySelectivelyParallelDefault() async throws { @@ -780,7 +780,7 @@ final class FileSystemTests: XCTestCase { } func testCopyCancelledPartWayThroughParallelMinimal() async throws { - try await testCopyCancelledPartWayThrough(minimalParallel) + try await testCopyCancelledPartWayThrough(Self.minimalParallel) } func testCopyCancelledPartWayThroughParallelDefault() async throws { @@ -927,7 +927,7 @@ final class FileSystemTests: XCTestCase { } func testCopyNonExistentFileParallelMinimal() async throws { - try await testCopyNonExistentFile(minimalParallel) + try await testCopyNonExistentFile(Self.minimalParallel) } func testCopyNonExistentFileParallelDefault() async throws { @@ -953,7 +953,7 @@ final class FileSystemTests: XCTestCase { } func testCopyToExistingDestinationParallelMinimal() async throws { - try await testCopyToExistingDestination(minimalParallel) + try await testCopyToExistingDestination(Self.minimalParallel) } func testCopyToExistingDestinationParallelDefault() async throws { diff --git a/Tests/NIOFileSystemTests/FileSystemErrorTests.swift b/Tests/NIOFileSystemTests/FileSystemErrorTests.swift index f32165f622..83e504b277 100644 --- a/Tests/NIOFileSystemTests/FileSystemErrorTests.swift +++ b/Tests/NIOFileSystemTests/FileSystemErrorTests.swift @@ -261,6 +261,10 @@ final class FileSystemErrorTests: XCTestCase { .fcopyfile(errno: .badFileDescriptor, from: "src", to: "dst", location: here) } + assertCauseIsSyscall("copyfile", here) { + .copyfile(errno: .badFileDescriptor, from: "src", to: "dst", location: here) + } + assertCauseIsSyscall("sendfile", here) { .sendfile(errno: .badFileDescriptor, from: "src", to: "dst", location: here) } @@ -523,6 +527,21 @@ final class FileSystemErrorTests: XCTestCase { } } + func testErrnoMapping_copyfile() { + self.testErrnoToErrorCode( + expected: [ + .notSupported: .invalidArgument, + .permissionDenied: .permissionDenied, + .invalidArgument: .invalidArgument, + .fileExists: .fileAlreadyExists, + .tooManyOpenFiles: .unavailable, + .noSuchFileOrDirectory: .notFound, + ] + ) { errno in + .copyfile(errno: errno, from: "src", to: "dst", location: .fixed) + } + } + func testErrnoMapping_fcopyfile() { self.testErrnoToErrorCode( expected: [ diff --git a/Tests/NIOFileSystemTests/Internal/SyscallTests.swift b/Tests/NIOFileSystemTests/Internal/SyscallTests.swift index 614b11a50f..66ed1171c1 100644 --- a/Tests/NIOFileSystemTests/Internal/SyscallTests.swift +++ b/Tests/NIOFileSystemTests/Internal/SyscallTests.swift @@ -389,6 +389,20 @@ final class SyscallTests: XCTestCase { #endif } + func test_copyfile() throws { + #if canImport(Darwin) + + let testCases: [MockTestCase] = [ + MockTestCase(name: "copyfile", .noInterrupt, "foo", "bar", "nil", 0) { _ in + try Libc.copyfile(from: "foo", to: "bar", state: nil, flags: 0).get() + } + ] + testCases.run() + #else + throw XCTSkip("'copyfile' is only supported on Darwin") + #endif + } + func test_remove() throws { let testCases: [MockTestCase] = [ MockTestCase(name: "remove", .noInterrupt, "somepath") { _ in