Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 23 additions & 7 deletions src/hotspot/os/linux/cgroupUtil_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, Red Hat, Inc.
* Copyright (c) 2024, 2025, Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -49,20 +49,27 @@ int CgroupUtil::processor_count(CgroupCpuController* cpu_ctrl, int host_cpus) {
}

void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
assert(mem->cgroup_path() != nullptr, "invariant");
if (strstr(mem->cgroup_path(), "../") != nullptr) {
log_warning(os, container)("Cgroup memory controller path at '%s' seems to have moved to '%s', detected limits won't be accurate",
mem->mount_point(), mem->cgroup_path());
mem->set_subsystem_path("/");
return;
}
if (!mem->needs_hierarchy_adjustment()) {
// nothing to do
return;
}
log_trace(os, container)("Adjusting controller path for memory: %s", mem->subsystem_path());
assert(mem->cgroup_path() != nullptr, "invariant");
char* orig = os::strdup(mem->cgroup_path());
char* cg_path = os::strdup(orig);
char* last_slash;
assert(cg_path[0] == '/', "cgroup path must start with '/'");
julong phys_mem = os::Linux::physical_memory();
char* limit_cg_path = nullptr;
jlong limit = mem->read_memory_limit_in_bytes(phys_mem);
jlong lowest_limit = phys_mem;
jlong lowest_limit = limit < 0 ? phys_mem : limit;
julong orig_limit = ((julong)lowest_limit) != phys_mem ? lowest_limit : phys_mem;
while ((last_slash = strrchr(cg_path, '/')) != cg_path) {
*last_slash = '\0'; // strip path
// update to shortened path and try again
Expand All @@ -83,7 +90,7 @@ void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
limit_cg_path = os::strdup("/");
}
assert(lowest_limit >= 0, "limit must be positive");
if ((julong)lowest_limit != phys_mem) {
if ((julong)lowest_limit != orig_limit) {
// we've found a lower limit anywhere in the hierarchy,
// set the path to the limit path
assert(limit_cg_path != nullptr, "limit path must be set");
Expand All @@ -93,6 +100,7 @@ void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
mem->subsystem_path(),
lowest_limit);
} else {
log_trace(os, container)("Lowest limit was: " JLONG_FORMAT, lowest_limit);
log_trace(os, container)("No lower limit found for memory in hierarchy %s, "
"adjusting to original path %s",
mem->mount_point(), orig);
Expand All @@ -104,19 +112,26 @@ void CgroupUtil::adjust_controller(CgroupMemoryController* mem) {
}

void CgroupUtil::adjust_controller(CgroupCpuController* cpu) {
assert(cpu->cgroup_path() != nullptr, "invariant");
if (strstr(cpu->cgroup_path(), "../") != nullptr) {
log_warning(os, container)("Cgroup cpu controller path at '%s' seems to have moved to '%s', detected limits won't be accurate",
cpu->mount_point(), cpu->cgroup_path());
cpu->set_subsystem_path("/");
return;
}
if (!cpu->needs_hierarchy_adjustment()) {
// nothing to do
return;
}
log_trace(os, container)("Adjusting controller path for cpu: %s", cpu->subsystem_path());
assert(cpu->cgroup_path() != nullptr, "invariant");
char* orig = os::strdup(cpu->cgroup_path());
char* cg_path = os::strdup(orig);
char* last_slash;
assert(cg_path[0] == '/', "cgroup path must start with '/'");
int host_cpus = os::Linux::active_processor_count();
int cpus = CgroupUtil::processor_count(cpu, host_cpus);
int lowest_limit = host_cpus;
int lowest_limit = cpus < host_cpus ? cpus: host_cpus;
int orig_limit = lowest_limit != host_cpus ? lowest_limit : host_cpus;
char* limit_cg_path = nullptr;
while ((last_slash = strrchr(cg_path, '/')) != cg_path) {
*last_slash = '\0'; // strip path
Expand All @@ -138,7 +153,7 @@ void CgroupUtil::adjust_controller(CgroupCpuController* cpu) {
limit_cg_path = os::strdup(cg_path);
}
assert(lowest_limit >= 0, "limit must be positive");
if (lowest_limit != host_cpus) {
if (lowest_limit != orig_limit) {
// we've found a lower limit anywhere in the hierarchy,
// set the path to the limit path
assert(limit_cg_path != nullptr, "limit path must be set");
Expand All @@ -148,6 +163,7 @@ void CgroupUtil::adjust_controller(CgroupCpuController* cpu) {
cpu->subsystem_path(),
lowest_limit);
} else {
log_trace(os, container)("Lowest limit was: %d", lowest_limit);
log_trace(os, container)("No lower limit found for cpu in hierarchy %s, "
"adjusting to original path %s",
cpu->mount_point(), orig);
Expand Down
77 changes: 63 additions & 14 deletions src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -37,6 +37,47 @@
/*
* Set directory to subsystem specific files based
* on the contents of the mountinfo and cgroup files.
*
* The method determines whether it runs in
* - host mode
* - container mode
*
* In the host mode, _root is equal to "/" and
* the subsystem path is equal to the _mount_point path
* joined with cgroup_path.
*
* In the container mode, it can be two possibilities:
* - private namespace (cgroupns=private)
* - host namespace (cgroupns=host, default mode in cgroup V1 hosts)
*
* Private namespace is equivalent to the host mode, i.e.
* the subsystem path is set by concatenating
* _mount_point and cgroup_path.
*
* In the host namespace, _root is equal to host's cgroup path
* of the control group to which the containerized process
* belongs to at the moment of creation. The mountinfo and
* cgroup files are mirrored from the host, while the subsystem
* specific files are mapped directly at _mount_point, i.e.
* at /sys/fs/cgroup/<controller>/, the subsystem path is
* then set equal to _mount_point.
*
* A special case of the subsystem path is when a cgroup path
* includes a subgroup, when a containerized process was associated
* with an existing cgroup, that is different from cgroup
* in which the process has been created.
* Here, the _root is equal to the host's initial cgroup path,
* cgroup_path will be equal to host's new cgroup path.
* As host cgroup hierarchies are not accessible in the container,
* it needs to be determined which part of cgroup path
* is accessible inside container, i.e. mapped under
* /sys/fs/cgroup/<controller>/<subgroup>.
* In Docker default setup, host's cgroup path can be
* of the form: /docker/<CONTAINER_ID>/<subgroup>,
* from which only <subgroup> is mapped.
* The method trims cgroup path from left, until the subgroup
* component is found. The subsystem path will be set to
* the _mount_point joined with the subgroup path.
*/
void CgroupV1Controller::set_subsystem_path(const char* cgroup_path) {
if (_cgroup_path != nullptr) {
Expand All @@ -49,28 +90,36 @@ void CgroupV1Controller::set_subsystem_path(const char* cgroup_path) {
_cgroup_path = os::strdup(cgroup_path);
stringStream ss;
if (_root != nullptr && cgroup_path != nullptr) {
ss.print_raw(_mount_point);
if (strcmp(_root, "/") == 0) {
ss.print_raw(_mount_point);
// host processes and containers with cgroupns=private
if (strcmp(cgroup_path,"/") != 0) {
ss.print_raw(cgroup_path);
}
_path = os::strdup(ss.base());
} else {
if (strcmp(_root, cgroup_path) == 0) {
ss.print_raw(_mount_point);
_path = os::strdup(ss.base());
} else {
char *p = strstr((char*)cgroup_path, _root);
if (p != nullptr && p == _root) {
if (strlen(cgroup_path) > strlen(_root)) {
ss.print_raw(_mount_point);
const char* cg_path_sub = cgroup_path + strlen(_root);
ss.print_raw(cg_path_sub);
_path = os::strdup(ss.base());
// containers with cgroupns=host, default setting is _root==cgroup_path
if (strcmp(_root, cgroup_path) != 0) {
if (*cgroup_path != '\0' && strcmp(cgroup_path, "/") != 0) {
// When moved to a subgroup, between subgroups, the path suffix will change.
const char *suffix = cgroup_path;
while (suffix != nullptr) {
stringStream pp;
pp.print_raw(_mount_point);
pp.print_raw(suffix);
if (os::file_exists(pp.base())) {
ss.print_raw(suffix);
if (suffix != cgroup_path) {
log_trace(os, container)("set_subsystem_path: cgroup v1 path reduced to: %s.", suffix);
}
break;
}
log_trace(os, container)("set_subsystem_path: skipped non-existent directory: %s.", suffix);
suffix = strchr(suffix + 1, '/');
}
}
}
}
_path = os::strdup(ss.base());
}
}

Expand Down
6 changes: 5 additions & 1 deletion src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2022, Red Hat Inc.
* Copyright (c) 2020, 2025, Red Hat Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -283,6 +283,10 @@ jlong memory_swap_limit_value(CgroupV2Controller* ctrl) {
}

void CgroupV2Controller::set_subsystem_path(const char* cgroup_path) {
if (_cgroup_path != nullptr) {
os::free(_cgroup_path);
}
_cgroup_path = os::strdup(cgroup_path);
if (_path != nullptr) {
os::free(_path);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -25,6 +25,9 @@

package jdk.internal.platform.cgroupv1;

import java.lang.System.Logger.Level;
import java.nio.file.Path;
import java.nio.file.Files;
import jdk.internal.platform.CgroupSubsystem;
import jdk.internal.platform.CgroupSubsystemController;

Expand All @@ -44,27 +47,36 @@ public CgroupV1SubsystemController(String root, String mountPoint) {

public void setPath(String cgroupPath) {
if (root != null && cgroupPath != null) {
String path = mountPoint;
if (root.equals("/")) {
// host processes and containers with cgroupns=private
if (!cgroupPath.equals("/")) {
path = mountPoint + cgroupPath;
path += cgroupPath;
}
else {
path = mountPoint;
}
}
else {
if (root.equals(cgroupPath)) {
path = mountPoint;
}
else {
if (cgroupPath.startsWith(root)) {
if (cgroupPath.length() > root.length()) {
String cgroupSubstr = cgroupPath.substring(root.length());
path = mountPoint + cgroupSubstr;
} else {
// containers with cgroupns=host, default setting is _root==cgroup_path
if (!cgroupPath.equals(root)) {
if (!cgroupPath.equals("") && !cgroupPath.equals("/")) {
// When moved to a subgroup, between subgroups, the path suffix will change.
Path cgp = Path.of(cgroupPath);
int nameCount = cgp.getNameCount();
for (int i=0; i < nameCount; i++) {
Path dir = Path.of(mountPoint, cgp.toString());
if (Files.isDirectory(dir)) {
path = dir.toString();
if (i > 0) {
System.getLogger("jdk.internal.platform").log(Level.DEBUG, String.format(
"Cgroup v1 path reduced to: %s.", cgp));
}
break;
}
int currentNameCount = cgp.getNameCount();
cgp = (currentNameCount > 1) ? cgp.subpath(1, currentNameCount) : Path.of("");
}
}
}
}
this.path = path;
}
}

Expand Down
78 changes: 76 additions & 2 deletions test/hotspot/gtest/runtime/test_cgroupSubsystem_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "runtime/os.hpp"
#include "cgroupSubsystem_linux.hpp"
#include "cgroupUtil_linux.hpp"
#include "cgroupV1Subsystem_linux.hpp"
#include "cgroupV2Subsystem_linux.hpp"
#include "unittest.hpp"
Expand Down Expand Up @@ -434,9 +435,16 @@ TEST(cgroupTest, set_cgroupv1_subsystem_path) {
"/user.slice/user-1000.slice/[email protected]", // cgroup_path
"/sys/fs/cgroup/mem" // expected_path
};
int length = 2;
TestCase container_moving_cgroup = {
"/sys/fs/cgroup/cpu,cpuacct", // mount_path
"/system.slice/garden.service/garden/good/2f57368b-0eda-4e52-64d8-af5c", // root_path
"/system.slice/garden.service/garden/bad/2f57368b-0eda-4e52-64d8-af5c", // cgroup_path
"/sys/fs/cgroup/cpu,cpuacct" // expected_path
};
int length = 3;
TestCase* testCases[] = { &host,
&container_engine };
&container_engine,
&container_moving_cgroup };
for (int i = 0; i < length; i++) {
CgroupV1Controller* ctrl = new CgroupV1Controller( (char*)testCases[i]->root_path,
(char*)testCases[i]->mount_path,
Expand All @@ -446,6 +454,72 @@ TEST(cgroupTest, set_cgroupv1_subsystem_path) {
}
}

TEST(cgroupTest, set_cgroupv1_subsystem_path_adjusted) {
TestCase memory = {
"/sys/fs/cgroup/memory", // mount_path
"/", // root_path
"../test1", // cgroup_path
"/sys/fs/cgroup/memory" // expected_path
};
TestCase cpu = {
"/sys/fs/cgroup/cpu", // mount_path
"/", // root_path
"../../test2", // cgroup_path
"/sys/fs/cgroup/cpu" // expected_path
};
CgroupCpuController* ccc = new CgroupV1CpuController(CgroupV1Controller((char*)cpu.root_path,
(char*)cpu.mount_path,
true /* read-only mount */));
ccc->set_subsystem_path((char*)cpu.cgroup_path);
EXPECT_TRUE(ccc->needs_hierarchy_adjustment());

CgroupUtil::adjust_controller(ccc);
ASSERT_STREQ(cpu.expected_path, ccc->subsystem_path());
EXPECT_FALSE(ccc->needs_hierarchy_adjustment());

CgroupMemoryController* cmc = new CgroupV1MemoryController(CgroupV1Controller((char*)memory.root_path,
(char*)memory.mount_path,
true /* read-only mount */));
cmc->set_subsystem_path((char*)memory.cgroup_path);
EXPECT_TRUE(cmc->needs_hierarchy_adjustment());

CgroupUtil::adjust_controller(cmc);
ASSERT_STREQ(memory.expected_path, cmc->subsystem_path());
EXPECT_FALSE(cmc->needs_hierarchy_adjustment());
}

TEST(cgroupTest, set_cgroupv2_subsystem_path_adjusted) {
TestCase memory = {
"/sys/fs/cgroup", // mount_path
"/", // root_path
"../test1", // cgroup_path
"/sys/fs/cgroup" // expected_path
};
TestCase cpu = {
"/sys/fs/cgroup", // mount_path
"/", // root_path
"../../test2", // cgroup_path
"/sys/fs/cgroup" // expected_path
};
CgroupCpuController* ccc = new CgroupV2CpuController(CgroupV2Controller((char*)cpu.mount_path,
(char*)cpu.cgroup_path,
true /* read-only mount */));
EXPECT_TRUE(ccc->needs_hierarchy_adjustment());

CgroupUtil::adjust_controller(ccc);
ASSERT_STREQ(cpu.expected_path, ccc->subsystem_path());
EXPECT_FALSE(ccc->needs_hierarchy_adjustment());

CgroupMemoryController* cmc = new CgroupV2MemoryController(CgroupV2Controller((char*)memory.mount_path,
(char*)memory.cgroup_path,
true /* read-only mount */));
EXPECT_TRUE(cmc->needs_hierarchy_adjustment());

CgroupUtil::adjust_controller(cmc);
ASSERT_STREQ(memory.expected_path, cmc->subsystem_path());
EXPECT_FALSE(cmc->needs_hierarchy_adjustment());
}

TEST(cgroupTest, set_cgroupv2_subsystem_path) {
TestCase at_mount_root = {
"/sys/fs/cgroup", // mount_path
Expand Down
Loading