Skip to content

Commit 1a52daf

Browse files
committed
daemon/compartment: Use clone3 instead of clone
To allow future use-cases, e.g., clone flags for new Namespace which are only possible with clone3. We switch to clone3 syscall now. Also just use the same stack area as the parent by setting corresponding clone_args for stack to 'NULL' and stack_size to '0'. This reduces code complexity. Signed-off-by: Michael Weiß <[email protected]>
1 parent 6fd6845 commit 1a52daf

File tree

1 file changed

+31
-68
lines changed

1 file changed

+31
-68
lines changed

daemon/compartment.c

Lines changed: 31 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
*/
2323

2424
#define _GNU_SOURCE
25+
#include <linux/sched.h>
2526
#include <sched.h>
2627

2728
#include "compartment.h"
@@ -47,32 +48,12 @@
4748
#include <unistd.h>
4849
#include <sys/types.h>
4950
#include <sys/stat.h>
51+
#include <sys/syscall.h>
5052
#include <signal.h>
5153
#include <sys/wait.h>
5254
#include <pty.h>
5355
#include <sys/mman.h>
5456

55-
#define CLONE_STACK_SIZE 8 * 1024 * 1024
56-
/* Define some missing clone flags in BIONIC */
57-
#ifndef CLONE_NEWNS
58-
#define CLONE_NEWNS 0x00020000
59-
#endif
60-
#ifndef CLONE_NEWUTS
61-
#define CLONE_NEWUTS 0x04000000
62-
#endif
63-
#ifndef CLONE_NEWIPC
64-
#define CLONE_NEWIPC 0x08000000
65-
#endif
66-
#ifndef CLONE_NEWUSER
67-
#define CLONE_NEWUSER 0x10000000
68-
#endif
69-
#ifndef CLONE_NEWPID
70-
#define CLONE_NEWPID 0x20000000
71-
#endif
72-
#ifndef CLONE_NEWNET
73-
#define CLONE_NEWNET 0x40000000
74-
#endif
75-
7657
extern logf_handler_t *cml_daemon_logfile_handler;
7758

7859
/* Timeout for a compartment boot. If the compartment does not come up in that time frame
@@ -160,6 +141,12 @@ enum compartment_start_sync_msg {
160141

161142
static list_t *compartment_module_list = NULL;
162143

144+
static int
145+
clone3(struct clone_args *cl_args, size_t size)
146+
{
147+
return syscall(SYS_clone3, cl_args, size);
148+
}
149+
163150
bool
164151
compartment_is_stoppable(compartment_t *compartment)
165152
{
@@ -841,13 +828,12 @@ compartment_close_all_fds()
841828
}
842829

843830
static int
844-
compartment_start_child(void *data)
831+
compartment_start_child(compartment_t *compartment)
845832
{
846-
ASSERT(data);
833+
ASSERT(compartment);
847834

848835
int ret = 0;
849836

850-
compartment_t *compartment = data;
851837
char *kvm_root = mem_printf("/tmp/%s", uuid_string(compartment->uuid));
852838

853839
/*******************************************************************/
@@ -1026,14 +1012,12 @@ compartment_start_child(void *data)
10261012
}
10271013

10281014
static int
1029-
compartment_start_child_early(void *data)
1015+
compartment_start_child_early(compartment_t *compartment)
10301016
{
1031-
ASSERT(data);
1017+
ASSERT(compartment);
10321018

10331019
int ret = 0;
10341020

1035-
compartment_t *compartment = data;
1036-
10371021
event_reset();
10381022
close(compartment->sync_sock_parent);
10391023

@@ -1054,23 +1038,15 @@ compartment_start_child_early(void *data)
10541038
}
10551039
}
10561040

1057-
void *compartment_stack = NULL;
1058-
/* Allocate node stack */
1041+
struct clone_args args = { 0 };
1042+
args.exit_signal = SIGCHLD;
10591043

1060-
if (MAP_FAILED ==
1061-
(compartment_stack = mmap(NULL, CLONE_STACK_SIZE, PROT_READ | PROT_WRITE,
1062-
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0))) {
1063-
WARN_ERRNO("Not enough memory for allocating compartment stack");
1064-
goto error;
1065-
}
1066-
void *compartment_stack_high = (void *)((const char *)compartment_stack + CLONE_STACK_SIZE);
10671044
/* Set namespaces for node */
10681045
/* set some basic and non-configurable namespaces */
1069-
unsigned long clone_flags = 0;
1070-
clone_flags |= SIGCHLD | CLONE_PARENT; // sig child to main process
1071-
clone_flags |= CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWPID;
1046+
args.flags |= CLONE_PARENT; // sig child to main process
1047+
args.flags |= CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWPID;
10721048
if (compartment_has_ipcns(compartment))
1073-
clone_flags |= CLONE_NEWIPC;
1049+
args.flags |= CLONE_NEWIPC;
10741050

10751051
compartment_module_instance_t *c_user =
10761052
compartment_module_get_mod_instance_by_name(compartment, "c_user");
@@ -1087,14 +1063,16 @@ compartment_start_child_early(void *data)
10871063
}
10881064
} else {
10891065
if (c_user && compartment_has_userns(compartment))
1090-
clone_flags |= CLONE_NEWUSER;
1066+
args.flags |= CLONE_NEWUSER;
10911067
if (c_net && compartment_has_netns(compartment))
1092-
clone_flags |= CLONE_NEWNET;
1068+
args.flags |= CLONE_NEWNET;
10931069
}
10941070

1095-
compartment->pid =
1096-
clone(compartment_start_child, compartment_stack_high, clone_flags, compartment);
1097-
if (compartment->pid < 0) {
1071+
compartment->pid = clone3(&args, sizeof(struct clone_args));
1072+
if (compartment->pid == 0) { // child
1073+
int ret = compartment_start_child(compartment);
1074+
_exit(ret);
1075+
} else if (compartment->pid < 0) {
10981076
ERROR_ERRNO("Double clone compartment failed");
10991077
goto error;
11001078
}
@@ -1378,7 +1356,6 @@ compartment_start(compartment_t *compartment)
13781356
ASSERT(compartment);
13791357

13801358
int ret = 0;
1381-
void *compartment_stack = NULL;
13821359

13831360
compartment_set_state(compartment, COMPARTMENT_STATE_STARTING);
13841361

@@ -1399,17 +1376,8 @@ compartment_start(compartment_t *compartment)
13991376
/*********************************************************/
14001377
/* PREPARE CLONE */
14011378

1402-
/* Allocate node stack */
1403-
if (MAP_FAILED ==
1404-
(compartment_stack = mmap(NULL, CLONE_STACK_SIZE, PROT_READ | PROT_WRITE,
1405-
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0))) {
1406-
WARN_ERRNO("Not enough memory for allocating compartment stack");
1407-
goto error_pre_clone;
1408-
}
1409-
void *compartment_stack_high = (void *)((const char *)compartment_stack + CLONE_STACK_SIZE);
1410-
1411-
unsigned long clone_flags = 0;
1412-
clone_flags |= SIGCHLD;
1379+
struct clone_args args = { 0 };
1380+
args.exit_signal = SIGCHLD;
14131381

14141382
/* Create a socketpair for synchronization and save it in the compartment structure to be able to
14151383
* pass it around */
@@ -1430,10 +1398,11 @@ compartment_start(compartment_t *compartment)
14301398
INFO("Container in setup mode!");
14311399
}
14321400

1433-
/* TODO find out if stack is only necessary with CLONE_VM */
1434-
pid_t compartment_pid = clone(compartment_start_child_early, compartment_stack_high,
1435-
clone_flags, compartment);
1436-
if (compartment_pid < 0) {
1401+
pid_t compartment_pid = clone3(&args, sizeof(struct clone_args));
1402+
if (compartment_pid == 0) { // child
1403+
int ret = compartment_start_child_early(compartment);
1404+
_exit(ret);
1405+
} else if (compartment_pid < 0) {
14371406
WARN_ERRNO("Clone compartment failed");
14381407
goto error_pre_clone;
14391408
}
@@ -1463,16 +1432,12 @@ compartment_start(compartment_t *compartment)
14631432
goto error_post_clone;
14641433
}
14651434
}
1466-
if (compartment_stack && munmap(compartment_stack, CLONE_STACK_SIZE) == -1)
1467-
WARN("Could not unmap compartment_stack!");
14681435

14691436
return 0;
14701437

14711438
error_pre_clone:
14721439
compartment_cleanup(compartment, false);
14731440
compartment_set_state(compartment, COMPARTMENT_STATE_STOPPED);
1474-
if (compartment_stack && munmap(compartment_stack, CLONE_STACK_SIZE) == -1)
1475-
WARN("Could not unmap compartment_stack!");
14761441
return ret;
14771442

14781443
error_post_clone:
@@ -1483,8 +1448,6 @@ compartment_start(compartment_t *compartment)
14831448
WARN_ERRNO("write to sync socket failed");
14841449
compartment_kill(compartment);
14851450
}
1486-
if (compartment_stack && munmap(compartment_stack, CLONE_STACK_SIZE) == -1)
1487-
WARN("Could not unmap compartment_stack!");
14881451
return ret;
14891452
}
14901453

0 commit comments

Comments
 (0)