diff --git a/pkg/sentry/kernel/task_clone.go b/pkg/sentry/kernel/task_clone.go index cb23518def..ae9a1897ca 100644 --- a/pkg/sentry/kernel/task_clone.go +++ b/pkg/sentry/kernel/task_clone.go @@ -253,7 +253,13 @@ func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) { if args.Flags&linux.CLONE_SIGHAND == 0 { sh = sh.Fork() } - tg = t.k.NewThreadGroup(pidns, sh, linux.Signal(args.ExitSignal), tg.limits.GetCopy()) + termSig := linux.Signal(args.ExitSignal) + if args.Flags&linux.CLONE_PARENT != 0 { + t.tg.pidns.owner.mu.RLock() + termSig = t.tg.terminationSignal + t.tg.pidns.owner.mu.RUnlock() + } + tg = t.k.NewThreadGroup(pidns, sh, termSig, tg.limits.GetCopy()) tg.oomScoreAdj = atomicbitops.FromInt32(t.tg.oomScoreAdj.Load()) rseqAddr = t.rseqAddr rseqSignature = t.rseqSignature @@ -286,7 +292,7 @@ func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) { SessionKeyring: sessionKeyring, Origin: t.Origin, } - if args.Flags&linux.CLONE_THREAD == 0 { + if args.Flags&(linux.CLONE_THREAD|linux.CLONE_PARENT) == 0 { cfg.Parent = t } else { cfg.InheritParent = t diff --git a/pkg/sentry/syscalls/linux/sys_thread.go b/pkg/sentry/syscalls/linux/sys_thread.go index 7df0bb7e68..165690c033 100644 --- a/pkg/sentry/syscalls/linux/sys_thread.go +++ b/pkg/sentry/syscalls/linux/sys_thread.go @@ -230,6 +230,9 @@ func Clone3(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, if cloneArgs.Flags&linux.CLONE_DETACHED != 0 { return 0, nil, linuxerr.EINVAL } + if cloneArgs.Flags&(linux.CLONE_THREAD|linux.CLONE_PARENT) != 0 && cloneArgs.ExitSignal != 0 { + return 0, nil, linuxerr.EINVAL + } ntid, ctrl, err := t.Clone(&cloneArgs) if err != nil { diff --git a/test/syscalls/linux/BUILD b/test/syscalls/linux/BUILD index b6b7b3657f..9233af4c73 100644 --- a/test/syscalls/linux/BUILD +++ b/test/syscalls/linux/BUILD @@ -967,6 +967,7 @@ cc_binary( malloc = "//test/util:errno_safe_allocator", deps = select_gtest() + [ "//test/util:capability_util", + "//test/util:file_descriptor", "//test/util:logging", "//test/util:memory_util", "//test/util:posix_error", diff --git a/test/syscalls/linux/cgroup.cc b/test/syscalls/linux/cgroup.cc index 7eb5842a5d..b768f9e13c 100644 --- a/test/syscalls/linux/cgroup.cc +++ b/test/syscalls/linux/cgroup.cc @@ -1361,7 +1361,7 @@ TEST_F(Cgroup2Test, PidsEnforcement) { constexpr int kCantFork = 2; pid_t pid = fork(); if (pid == 0) { - wfd.reset(); + close(wfd.release()); char token; if (read(rfd.get(), &token, 1) <= 0) { _exit(1); @@ -1490,7 +1490,7 @@ TEST_F(Cgroup2Test, InotifyEventsOnExit) { pid_t pid = fork(); if (pid == 0) { - wfd.reset(); + close(wfd.release()); char token; if (read(rfd.get(), &token, 1) <= 0) { _exit(1); @@ -1528,7 +1528,7 @@ TEST_F(Cgroup2Test, ZombieCgroupMembership) { pid_t pid = fork(); if (pid == 0) { - wfd.reset(); + close(wfd.release()); char token; if (read(rfd.get(), &token, 1) <= 0) { _exit(1); @@ -1831,7 +1831,7 @@ TEST_F(Cgroup2Test, KillTree) { pid_t pid1 = fork(); if (pid1 == 0) { - wfd.reset(); + close(wfd.release()); char token; if (read(rfd.get(), &token, 1) <= 0) { _exit(1); @@ -1842,7 +1842,7 @@ TEST_F(Cgroup2Test, KillTree) { pid_t pid2 = fork(); if (pid2 == 0) { - wfd.reset(); + close(wfd.release()); char token; if (read(rfd.get(), &token, 1) <= 0) { _exit(1); @@ -1969,8 +1969,8 @@ TEST_F(Cgroup2Test, ThreadedDomainComplexTopology) { pid_t p2 = fork(); if (p2 == 0) { - exit_wfd.reset(); - start_rfd.reset(); + close(exit_wfd.release()); + close(start_rfd.release()); // Move the child leader into child2. ASSERT_NO_ERRNO(child2.WriteIntegerControlFile("cgroup.threads", gettid())); diff --git a/test/syscalls/linux/exec.cc b/test/syscalls/linux/exec.cc index 8a01c010e3..ac131433df 100644 --- a/test/syscalls/linux/exec.cc +++ b/test/syscalls/linux/exec.cc @@ -1241,7 +1241,7 @@ void writeAndWaitForPid(int child_pid, int pipe_fd) { TEST_PCHECK_MSG(waitpid(child_pid, &status, 0) == child_pid, "waitpid failed."); TEST_CHECK(WIFEXITED(status) && WEXITSTATUS(status) == 0); - exit(42); + _exit(42); } void ExecWithThread() { @@ -1266,7 +1266,7 @@ void ExecWithThread() { const ExecveArray envv; execve("/proc/self/exe", argv.get(), envv.get()); - exit(errno); + _exit(errno); } void ExecFromThread() { @@ -1275,7 +1275,7 @@ void ExecFromThread() { const ExecveArray envv; execve("/proc/self/exe", argv.get(), envv.get()); - exit(errno); + _exit(errno); }); while (true) { diff --git a/test/syscalls/linux/exit.cc b/test/syscalls/linux/exit.cc index 0d3011f53a..8c0032e3da 100644 --- a/test/syscalls/linux/exit.cc +++ b/test/syscalls/linux/exit.cc @@ -62,7 +62,7 @@ TEST(ExitTest, CloseFds) { pid_t pid = fork(); if (pid == 0) { - read_fd.reset(); + close(read_fd.release()); SleepSafe(absl::Seconds(10)); @@ -109,7 +109,7 @@ TEST(ExitTest, SigkillZombieGroup) { pid_t pid = fork(); if (pid == 0) { - read_fd.reset(); + close(read_fd.release()); _exit(0); } @@ -140,7 +140,7 @@ TEST(ExitTest, SigkillZombieThread) { pid_t pid = fork(); if (pid == 0) { - read_fd.reset(); + close(read_fd.release()); syscall(SYS_exit, 0); } diff --git a/test/syscalls/linux/fork.cc b/test/syscalls/linux/fork.cc index 4eb664ee29..d250e63af3 100644 --- a/test/syscalls/linux/fork.cc +++ b/test/syscalls/linux/fork.cc @@ -32,6 +32,7 @@ #include "gtest/gtest.h" #include "absl/time/clock.h" #include "absl/time/time.h" +#include "test/util/file_descriptor.h" #include "test/util/linux_capability_util.h" #include "test/util/logging.h" #include "test/util/memory_util.h" @@ -515,6 +516,79 @@ int clone3(struct clone_args* ca, size_t size) { return syscall(SYS_clone3, ca, size); } +TEST(CloneTest, CloneParent) { + // A is the test process. B is the child. C is the grandchild. + // C is created with CLONE_PARENT, so it should be a adopted by A. + pid_t a_pid = getpid(); + + int fds[2]; + ASSERT_THAT(pipe(fds), SyscallSucceeds()); + FileDescriptor rfd(fds[0]); + FileDescriptor wfd(fds[1]); + + pid_t b_pid = fork(); + if (b_pid == 0) { + if (close(rfd.release()) != 0) { + _exit(1); + } + + clone_args ca = {}; + ca.flags = CLONE_PARENT; + ca.exit_signal = 0; // clone3 will fail without this. + pid_t c_pid = clone3(&ca, sizeof(ca)); + if (c_pid == 0) { + if (getppid() != a_pid) { + _exit(42); // CLONE_PARENT failed. + } + _exit(0); + } + if (c_pid < 0) { + _exit(1); // C could not be created. + } + if (write(wfd.get(), &c_pid, sizeof(c_pid)) != sizeof(c_pid)) { + _exit(1); + } + + // B should fail to wait on C with ECHILD if CLONE_PARENT worked. + int status = -1; + int rval = wait4(c_pid, &status, __WALL, NULL); + if (rval != -1 || errno != ECHILD) { + _exit(43); + } + _exit(0); + } + ASSERT_THAT(b_pid, SyscallSucceeds()); + wfd.reset(); + + // A's wait on B. + int status; + EXPECT_THAT(waitpid(b_pid, &status, 0), SyscallSucceedsWithValue(b_pid)); + ASSERT_TRUE(WIFEXITED(status)); + EXPECT_EQ(WEXITSTATUS(status), 0); + + // A's wait on C. + pid_t c_pid = -1; + EXPECT_THAT(read(rfd.get(), &c_pid, sizeof(c_pid)), + SyscallSucceedsWithValue(sizeof(c_pid))); + ASSERT_GT(c_pid, 0); + EXPECT_THAT(wait4(c_pid, &status, __WALL, NULL), + SyscallSucceedsWithValue(c_pid)); + ASSERT_TRUE(WIFEXITED(status)); + EXPECT_EQ(WEXITSTATUS(status), 0); +} + +TEST(CloneTest, CloneParentOrThreadWithExitSignalFails) { + clone_args ca = {}; + ca.flags = CLONE_PARENT; + ca.exit_signal = SIGCHLD; + EXPECT_THAT(clone3(&ca, sizeof(ca)), SyscallFailsWithErrno(EINVAL)); + + ca = {}; + ca.flags = CLONE_THREAD; + ca.exit_signal = SIGCHLD; + EXPECT_THAT(clone3(&ca, sizeof(ca)), SyscallFailsWithErrno(EINVAL)); +} + // Checks that clone fails for any unsupported flag. TEST(CloneTest, Clone3UnknownFlag) { clone_args ca = {}; @@ -532,7 +606,7 @@ TEST(CloneTest, Clone3AsClone) { EXPECT_THAT(child_pid = clone3(&ca, sizeof(ca)), SyscallSucceeds()); if (child_pid == 0) { - exit(0); + _exit(0); } int status; @@ -555,7 +629,7 @@ TEST(CloneTest, Clone3Basic) { EXPECT_EQ(store_child_tid, child_pid); if (child_pid == 0) { - exit(0); + _exit(0); } int status; diff --git a/test/syscalls/linux/kill.cc b/test/syscalls/linux/kill.cc index 5d1735853f..8cd26451b7 100644 --- a/test/syscalls/linux/kill.cc +++ b/test/syscalls/linux/kill.cc @@ -71,7 +71,7 @@ TEST(KillTest, CanKillAllPIDs) { pid_t pid = fork(); if (pid == 0) { - read_fd.reset(); + close(read_fd.release()); struct sigaction sa; sa.sa_sigaction = SigHandler; @@ -81,7 +81,7 @@ TEST(KillTest, CanKillAllPIDs) { MaybeSave(); // Indicate to the parent that we're ready. - write_fd.reset(); + close(write_fd.release()); // Wait until we get the signal from the parent. while (true) { diff --git a/test/syscalls/linux/sigaltstack.cc b/test/syscalls/linux/sigaltstack.cc index 24e7c49604..b7fb9d7db1 100644 --- a/test/syscalls/linux/sigaltstack.cc +++ b/test/syscalls/linux/sigaltstack.cc @@ -257,7 +257,7 @@ TEST(SigaltstackTest, SetCurrentStack) { // Should not be able to disable the stack. stack.ss_flags = SS_DISABLE; TEST_CHECK(sigaltstack(&stack, nullptr) == -1 && errno == EPERM); - exit(0); + _exit(0); }, ::testing::ExitedWithCode(0), ""); } diff --git a/test/syscalls/linux/sigtimedwait.cc b/test/syscalls/linux/sigtimedwait.cc index 21651a6970..c987ad5d66 100644 --- a/test/syscalls/linux/sigtimedwait.cc +++ b/test/syscalls/linux/sigtimedwait.cc @@ -197,7 +197,7 @@ TEST(SigtimedwaitTest, SIGKILLUncaught) { pid_t pid = fork(); if (pid == 0) { - rfd.reset(); + close(rfd.release()); sigset_t mask; sigemptyset(&mask); diff --git a/test/syscalls/linux/sync_file_range.cc b/test/syscalls/linux/sync_file_range.cc index 36cc420433..74a3fb87ec 100644 --- a/test/syscalls/linux/sync_file_range.cc +++ b/test/syscalls/linux/sync_file_range.cc @@ -63,7 +63,7 @@ TEST(SyncFileRangeTest, CannotSyncFileRangeOnUnopenedFd) { pid_t pid = fork(); if (pid == 0) { - f.reset(); + close(f.release()); // fd is now invalid. TEST_CHECK(sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WRITE) == -1); diff --git a/test/syscalls/linux/wait.cc b/test/syscalls/linux/wait.cc index f220ebaf0b..ab046fcadb 100644 --- a/test/syscalls/linux/wait.cc +++ b/test/syscalls/linux/wait.cc @@ -876,12 +876,12 @@ TEST(WaitTest, TraceeWALL) { pid_t child = fork(); if (child == 0) { // Child. - rfd.reset(); + close(rfd.release()); TEST_PCHECK(ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == 0); // Notify parent that we're now a tracee. - wfd.reset(); + close(wfd.release()); _exit(0); }