22
22
*/
23
23
24
24
#define _GNU_SOURCE
25
+ #include <linux/sched.h>
25
26
#include <sched.h>
26
27
27
28
#include "compartment.h"
47
48
#include <unistd.h>
48
49
#include <sys/types.h>
49
50
#include <sys/stat.h>
51
+ #include <sys/syscall.h>
50
52
#include <signal.h>
51
53
#include <sys/wait.h>
52
54
#include <pty.h>
53
55
#include <sys/mman.h>
54
56
55
- #define CLONE_STACK_SIZE 8 * 1024 * 1024
56
- /* Define some missing clone flags in BIONIC */
57
- #ifndef CLONE_NEWNS
58
- #define CLONE_NEWNS 0x00020000
59
- #endif
60
- #ifndef CLONE_NEWUTS
61
- #define CLONE_NEWUTS 0x04000000
62
- #endif
63
- #ifndef CLONE_NEWIPC
64
- #define CLONE_NEWIPC 0x08000000
65
- #endif
66
- #ifndef CLONE_NEWUSER
67
- #define CLONE_NEWUSER 0x10000000
68
- #endif
69
- #ifndef CLONE_NEWPID
70
- #define CLONE_NEWPID 0x20000000
71
- #endif
72
- #ifndef CLONE_NEWNET
73
- #define CLONE_NEWNET 0x40000000
74
- #endif
75
-
76
57
extern logf_handler_t * cml_daemon_logfile_handler ;
77
58
78
59
/* Timeout for a compartment boot. If the compartment does not come up in that time frame
@@ -160,6 +141,12 @@ enum compartment_start_sync_msg {
160
141
161
142
static list_t * compartment_module_list = NULL ;
162
143
144
+ static int
145
+ clone3 (struct clone_args * cl_args , size_t size )
146
+ {
147
+ return syscall (SYS_clone3 , cl_args , size );
148
+ }
149
+
163
150
bool
164
151
compartment_is_stoppable (compartment_t * compartment )
165
152
{
@@ -841,13 +828,12 @@ compartment_close_all_fds()
841
828
}
842
829
843
830
static int
844
- compartment_start_child (void * data )
831
+ compartment_start_child (compartment_t * compartment )
845
832
{
846
- ASSERT (data );
833
+ ASSERT (compartment );
847
834
848
835
int ret = 0 ;
849
836
850
- compartment_t * compartment = data ;
851
837
char * kvm_root = mem_printf ("/tmp/%s" , uuid_string (compartment -> uuid ));
852
838
853
839
/*******************************************************************/
@@ -1026,14 +1012,12 @@ compartment_start_child(void *data)
1026
1012
}
1027
1013
1028
1014
static int
1029
- compartment_start_child_early (void * data )
1015
+ compartment_start_child_early (compartment_t * compartment )
1030
1016
{
1031
- ASSERT (data );
1017
+ ASSERT (compartment );
1032
1018
1033
1019
int ret = 0 ;
1034
1020
1035
- compartment_t * compartment = data ;
1036
-
1037
1021
event_reset ();
1038
1022
close (compartment -> sync_sock_parent );
1039
1023
@@ -1054,23 +1038,15 @@ compartment_start_child_early(void *data)
1054
1038
}
1055
1039
}
1056
1040
1057
- void * compartment_stack = NULL ;
1058
- /* Allocate node stack */
1041
+ struct clone_args args = { 0 } ;
1042
+ args . exit_signal = SIGCHLD ;
1059
1043
1060
- if (MAP_FAILED ==
1061
- (compartment_stack = mmap (NULL , CLONE_STACK_SIZE , PROT_READ | PROT_WRITE ,
1062
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK , -1 , 0 ))) {
1063
- WARN_ERRNO ("Not enough memory for allocating compartment stack" );
1064
- goto error ;
1065
- }
1066
- void * compartment_stack_high = (void * )((const char * )compartment_stack + CLONE_STACK_SIZE );
1067
1044
/* Set namespaces for node */
1068
1045
/* set some basic and non-configurable namespaces */
1069
- unsigned long clone_flags = 0 ;
1070
- clone_flags |= SIGCHLD | CLONE_PARENT ; // sig child to main process
1071
- clone_flags |= CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWPID ;
1046
+ args .flags |= CLONE_PARENT ; // sig child to main process
1047
+ args .flags |= CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWPID ;
1072
1048
if (compartment_has_ipcns (compartment ))
1073
- clone_flags |= CLONE_NEWIPC ;
1049
+ args . flags |= CLONE_NEWIPC ;
1074
1050
1075
1051
compartment_module_instance_t * c_user =
1076
1052
compartment_module_get_mod_instance_by_name (compartment , "c_user" );
@@ -1087,14 +1063,16 @@ compartment_start_child_early(void *data)
1087
1063
}
1088
1064
} else {
1089
1065
if (c_user && compartment_has_userns (compartment ))
1090
- clone_flags |= CLONE_NEWUSER ;
1066
+ args . flags |= CLONE_NEWUSER ;
1091
1067
if (c_net && compartment_has_netns (compartment ))
1092
- clone_flags |= CLONE_NEWNET ;
1068
+ args . flags |= CLONE_NEWNET ;
1093
1069
}
1094
1070
1095
- compartment -> pid =
1096
- clone (compartment_start_child , compartment_stack_high , clone_flags , compartment );
1097
- if (compartment -> pid < 0 ) {
1071
+ compartment -> pid = clone3 (& args , sizeof (struct clone_args ));
1072
+ if (compartment -> pid == 0 ) { // child
1073
+ int ret = compartment_start_child (compartment );
1074
+ _exit (ret );
1075
+ } else if (compartment -> pid < 0 ) {
1098
1076
ERROR_ERRNO ("Double clone compartment failed" );
1099
1077
goto error ;
1100
1078
}
@@ -1378,7 +1356,6 @@ compartment_start(compartment_t *compartment)
1378
1356
ASSERT (compartment );
1379
1357
1380
1358
int ret = 0 ;
1381
- void * compartment_stack = NULL ;
1382
1359
1383
1360
compartment_set_state (compartment , COMPARTMENT_STATE_STARTING );
1384
1361
@@ -1399,17 +1376,8 @@ compartment_start(compartment_t *compartment)
1399
1376
/*********************************************************/
1400
1377
/* PREPARE CLONE */
1401
1378
1402
- /* Allocate node stack */
1403
- if (MAP_FAILED ==
1404
- (compartment_stack = mmap (NULL , CLONE_STACK_SIZE , PROT_READ | PROT_WRITE ,
1405
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK , -1 , 0 ))) {
1406
- WARN_ERRNO ("Not enough memory for allocating compartment stack" );
1407
- goto error_pre_clone ;
1408
- }
1409
- void * compartment_stack_high = (void * )((const char * )compartment_stack + CLONE_STACK_SIZE );
1410
-
1411
- unsigned long clone_flags = 0 ;
1412
- clone_flags |= SIGCHLD ;
1379
+ struct clone_args args = { 0 };
1380
+ args .exit_signal = SIGCHLD ;
1413
1381
1414
1382
/* Create a socketpair for synchronization and save it in the compartment structure to be able to
1415
1383
* pass it around */
@@ -1430,10 +1398,11 @@ compartment_start(compartment_t *compartment)
1430
1398
INFO ("Container in setup mode!" );
1431
1399
}
1432
1400
1433
- /* TODO find out if stack is only necessary with CLONE_VM */
1434
- pid_t compartment_pid = clone (compartment_start_child_early , compartment_stack_high ,
1435
- clone_flags , compartment );
1436
- if (compartment_pid < 0 ) {
1401
+ pid_t compartment_pid = clone3 (& args , sizeof (struct clone_args ));
1402
+ if (compartment_pid == 0 ) { // child
1403
+ int ret = compartment_start_child_early (compartment );
1404
+ _exit (ret );
1405
+ } else if (compartment_pid < 0 ) {
1437
1406
WARN_ERRNO ("Clone compartment failed" );
1438
1407
goto error_pre_clone ;
1439
1408
}
@@ -1463,16 +1432,12 @@ compartment_start(compartment_t *compartment)
1463
1432
goto error_post_clone ;
1464
1433
}
1465
1434
}
1466
- if (compartment_stack && munmap (compartment_stack , CLONE_STACK_SIZE ) == -1 )
1467
- WARN ("Could not unmap compartment_stack!" );
1468
1435
1469
1436
return 0 ;
1470
1437
1471
1438
error_pre_clone :
1472
1439
compartment_cleanup (compartment , false);
1473
1440
compartment_set_state (compartment , COMPARTMENT_STATE_STOPPED );
1474
- if (compartment_stack && munmap (compartment_stack , CLONE_STACK_SIZE ) == -1 )
1475
- WARN ("Could not unmap compartment_stack!" );
1476
1441
return ret ;
1477
1442
1478
1443
error_post_clone :
@@ -1483,8 +1448,6 @@ compartment_start(compartment_t *compartment)
1483
1448
WARN_ERRNO ("write to sync socket failed" );
1484
1449
compartment_kill (compartment );
1485
1450
}
1486
- if (compartment_stack && munmap (compartment_stack , CLONE_STACK_SIZE ) == -1 )
1487
- WARN ("Could not unmap compartment_stack!" );
1488
1451
return ret ;
1489
1452
}
1490
1453
0 commit comments