Skip to content

Commit 79e07b4

Browse files
authored
vminitd: Add init and execs to cgroup (#265)
We currently weren't doing any cgroup setup whatsoever. For the most part this doesn't matter too much, however certain images that may fool around with cgroups don't like this :). Lets do the bare minimum these expect which is to at least have the process running in a nested cg and not apart of the root cg.
1 parent 5050027 commit 79e07b4

File tree

5 files changed

+194
-21
lines changed

5 files changed

+194
-21
lines changed

Sources/Containerization/Agent/Vminitd.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ extension Vminitd: VirtualMachineAgent {
6666
}
6767

6868
// Setup root cg subtree_control.
69-
let data = "+memory +pids +io +cpu +cpuset".data(using: .utf8)!
69+
let data = "+memory +pids +io +cpu +cpuset +hugetlb".data(using: .utf8)!
7070
try await writeFile(
7171
path: "/sys/fs/cgroup/cgroup.subtree_control",
7272
data: data,

Sources/Containerization/LinuxContainer.swift

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,8 @@ public final class LinuxContainer: Container, Sendable {
421421
readonly: false
422422
),
423423
linux: .init(
424-
resources: .init()
424+
resources: .init(),
425+
cgroupsPath: "/container/\(id)"
425426
)
426427
)
427428
}
@@ -436,9 +437,7 @@ public final class LinuxContainer: Container, Sendable {
436437
spec.hostname = config.hostname
437438

438439
// Linux toggles.
439-
var linux = ContainerizationOCI.Linux.init()
440-
linux.sysctl = config.sysctl
441-
spec.linux = linux
440+
spec.linux?.sysctl = config.sysctl
442441

443442
return spec
444443
}
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
//===----------------------------------------------------------------------===//
2+
// Copyright © 2025 Apple Inc. and the Containerization project authors. All rights reserved.
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// https://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
//===----------------------------------------------------------------------===//
16+
17+
import ContainerizationOS
18+
import Foundation
19+
import Logging
20+
import Musl
21+
22+
enum CgroupController: String {
23+
case pids
24+
case memory
25+
case cpuset
26+
case cpu
27+
case io
28+
case hugetlb
29+
}
30+
31+
// Extremely simple cgroup manager. Our needs are simple for now, and this is
32+
// reflected in the type.
33+
internal struct CgroupManager {
34+
static let defaultMountPoint = URL(filePath: "/sys/fs/cgroup")
35+
36+
static let killFile = "cgroup.kill"
37+
static let procsFile = "cgroup.procs"
38+
static let subtreeControlFile = "cgroup.subtree_control"
39+
40+
private let mountPoint: URL
41+
private let path: URL
42+
private let logger: Logger?
43+
44+
init(
45+
mountPoint: URL = defaultMountPoint,
46+
path: URL,
47+
perms: Int16 = 0o755,
48+
logger: Logger? = nil
49+
) throws {
50+
self.mountPoint = mountPoint
51+
self.path = mountPoint.appending(path: path.path)
52+
self.logger = logger
53+
54+
self.logger?.error(
55+
"creating cgroup manager",
56+
metadata: [
57+
"mountpoint": "\(self.mountPoint.path)",
58+
"path": "\(self.path.path)",
59+
])
60+
61+
try FileManager.default.createDirectory(
62+
at: self.path,
63+
withIntermediateDirectories: true,
64+
attributes: [.posixPermissions: perms]
65+
)
66+
}
67+
68+
private static func writeValue(path: URL, value: String, fileName: String) throws {
69+
let file = path.appending(path: fileName)
70+
let fd = open(file.path, O_WRONLY, 0)
71+
if fd == -1 {
72+
throw Error.errno(errno: errno, message: "failed to open \(file.path)")
73+
}
74+
defer { Musl.close(fd) }
75+
76+
let bytes = Array(value.utf8)
77+
let res = Syscall.retrying {
78+
bytes.withUnsafeBytes { write(fd, $0.baseAddress!, bytes.count) }
79+
}
80+
if res == -1 {
81+
throw Error.errno(errno: errno, message: "failed to write to \(file.path)")
82+
}
83+
}
84+
85+
func toggleSubtreeControllers(controllers: [CgroupController], enable: Bool) throws {
86+
let value = controllers.map { (enable ? "+" : "-") + $0.rawValue }.joined(separator: " ")
87+
let mountComponents = self.mountPoint.pathComponents
88+
let pathComponents = self.path.pathComponents
89+
90+
// First ensure it's set on the root.
91+
var current = self.mountPoint
92+
try Self.writeValue(
93+
path: current,
94+
value: value,
95+
fileName: Self.subtreeControlFile
96+
)
97+
98+
// Toggle everything except the leaf, as otherwise we won't be able to write
99+
// to cgroup.procs, and what fun is that :)
100+
if mountComponents.count < pathComponents.count - 1 {
101+
for i in mountComponents.count...pathComponents.count - 2 {
102+
current = current.appending(path: pathComponents[i])
103+
try Self.writeValue(
104+
path: current,
105+
value: value,
106+
fileName: Self.subtreeControlFile
107+
)
108+
}
109+
}
110+
}
111+
112+
func addProcess(pid: Int32) throws {
113+
let pidStr = String(pid)
114+
try Self.writeValue(
115+
path: self.path,
116+
value: pidStr,
117+
fileName: Self.procsFile
118+
)
119+
}
120+
121+
func kill() throws {
122+
try Self.writeValue(
123+
path: self.path,
124+
value: "1",
125+
fileName: Self.killFile
126+
)
127+
}
128+
129+
func delete(force: Bool = false) throws {
130+
if force {
131+
try self.kill()
132+
}
133+
try FileManager.default.removeItem(at: self.path)
134+
}
135+
}
136+
137+
extension CgroupManager {
138+
enum Error: Swift.Error, CustomStringConvertible {
139+
case errno(errno: Int32, message: String)
140+
141+
var description: String {
142+
switch self {
143+
case .errno(let errno, let message):
144+
return "failed with errno \(errno): \(message)"
145+
}
146+
}
147+
}
148+
}

vminitd/Sources/vminitd/ManagedContainer.swift

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@ actor ManagedContainer {
2424
let id: String
2525
let initProcess: ManagedProcess
2626

27-
private let _log: Logger
28-
private let _bundle: ContainerizationOCI.Bundle
29-
private var _execs: [String: ManagedProcess] = [:]
27+
private let cgroupManager: CgroupManager
28+
private let log: Logger
29+
private let bundle: ContainerizationOCI.Bundle
30+
private var execs: [String: ManagedProcess] = [:]
3031

3132
var pid: Int32 {
3233
self.initProcess.pid
@@ -44,25 +45,43 @@ actor ManagedContainer {
4445
)
4546
log.info("created bundle with spec \(spec)")
4647

48+
var cgroupsPath: String
49+
if let cgPath = spec.linux?.cgroupsPath {
50+
cgroupsPath = cgPath
51+
} else {
52+
cgroupsPath = "/container/\(id)"
53+
}
54+
55+
let cgManager = try CgroupManager(
56+
path: URL(filePath: cgroupsPath),
57+
logger: log
58+
)
59+
try cgManager.toggleSubtreeControllers(
60+
controllers: [.cpu, .cpuset, .hugetlb, .io, .memory, .pids],
61+
enable: true
62+
)
63+
4764
let initProcess = try ManagedProcess(
4865
id: id,
4966
stdio: stdio,
5067
bundle: bundle,
68+
cgroupManager: cgManager,
5169
owningPid: nil,
5270
log: log
5371
)
5472
log.info("created managed init process")
5573

5674
self.initProcess = initProcess
5775
self.id = id
58-
self._bundle = bundle
59-
self._log = log
76+
self.cgroupManager = cgManager
77+
self.bundle = bundle
78+
self.log = log
6079
}
6180
}
6281

6382
extension ManagedContainer {
6483
private func ensureExecExists(_ id: String) throws {
65-
if self._execs[id] == nil {
84+
if self.execs[id] == nil {
6685
throw ContainerizationError(
6786
.invalidState,
6887
message: "exec \(id) does not exist in container \(self.id)"
@@ -77,18 +96,19 @@ extension ManagedContainer {
7796
) throws {
7897
// Write the process config to the bundle, and pass this on
7998
// over to ManagedProcess to deal with.
80-
try self._bundle.createExecSpec(
99+
try self.bundle.createExecSpec(
81100
id: id,
82101
process: process
83102
)
84103
let process = try ManagedProcess(
85104
id: id,
86105
stdio: stdio,
87-
bundle: self._bundle,
106+
bundle: self.bundle,
107+
cgroupManager: self.cgroupManager,
88108
owningPid: self.initProcess.pid,
89-
log: self._log
109+
log: self.log
90110
)
91-
self._execs[id] = process
111+
self.execs[id] = process
92112
}
93113

94114
func start(execID: String) async throws -> Int32 {
@@ -119,22 +139,23 @@ extension ManagedContainer {
119139
func deleteExec(id: String) throws {
120140
try ensureExecExists(id)
121141
do {
122-
try self._bundle.deleteExecSpec(id: id)
142+
try self.bundle.deleteExecSpec(id: id)
123143
} catch {
124-
self._log.error("failed to remove exec spec from filesystem: \(error)")
144+
self.log.error("failed to remove exec spec from filesystem: \(error)")
125145
}
126-
self._execs.removeValue(forKey: id)
146+
self.execs.removeValue(forKey: id)
127147
}
128148

129149
func delete() throws {
130-
try self._bundle.delete()
150+
try self.bundle.delete()
151+
try self.cgroupManager.delete(force: true)
131152
}
132153

133154
func getExecOrInit(execID: String) throws -> ManagedProcess {
134155
if execID == self.id {
135156
return self.initProcess
136157
}
137-
guard let proc = self._execs[execID] else {
158+
guard let proc = self.execs[execID] else {
138159
throw ContainerizationError(
139160
.invalidState,
140161
message: "exec \(execID) does not exist in container \(self.id)"

vminitd/Sources/vminitd/ManagedProcess.swift

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ final class ManagedProcess: Sendable {
3434
private let syncPipe: FileHandle
3535
private let terminal: Bool
3636
private let bundle: ContainerizationOCI.Bundle
37+
private let cgroupManager: CgroupManager
3738

3839
private struct State {
3940
init(io: IO) {
@@ -74,6 +75,7 @@ final class ManagedProcess: Sendable {
7475
id: String,
7576
stdio: HostStdio,
7677
bundle: ContainerizationOCI.Bundle,
78+
cgroupManager: CgroupManager,
7779
owningPid: Int32? = nil,
7880
log: Logger
7981
) throws {
@@ -82,6 +84,7 @@ final class ManagedProcess: Sendable {
8284
Self.localizeLogger(log: &log, id: id)
8385
self.log = log
8486
self.owningPid = owningPid
87+
self.cgroupManager = cgroupManager
8588

8689
let syncPipe = Pipe()
8790
try syncPipe.setCloexec()
@@ -181,7 +184,9 @@ extension ManagedProcess {
181184
])
182185
$0.pid = pid
183186

184-
// Ack the pid from the child.
187+
// First add to our cg, then ack the pid.
188+
try self.cgroupManager.addProcess(pid: pid)
189+
185190
log.info(
186191
"sending pid acknowledgement",
187192
metadata: [

0 commit comments

Comments
 (0)