Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions be/src/common/cgroup_memory_ctl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ struct CgroupsV2Reader : CGroupMemoryCtl::ICgroupsReader {
return Status::CgroupError("Error reading {}: {}", file_path.string(),
get_str_err_msg());
}
// This means no limit, for example, all process in linux will belong to a cgroup, and
// the default value of the memory limit in memory.max file is "max", which means no limit.
if (line == "max") {
*value = std::numeric_limits<int64_t>::max();
return Status::OK();
Expand All @@ -107,15 +109,31 @@ struct CgroupsV2Reader : CGroupMemoryCtl::ICgroupsReader {
std::unordered_map<std::string, int64_t> metrics_map;
CGroupUtil::read_int_metric_from_cgroup_file((_mount_file_dir / "memory.stat"),
metrics_map);
if (*value < metrics_map["inactive_file"]) {
return Status::CgroupError("CgroupsV2Reader read_memory_usage negative memory usage");
int64_t inactive_file =
metrics_map.contains("inactive_file") ? metrics_map["inactive_file"] : 0;
int64_t active_file = metrics_map.contains("active_file") ? metrics_map["active_file"] : 0;
int64_t slab_reclaimable =
metrics_map.contains("slab_reclaimable") ? metrics_map["slab_reclaimable"] : 0;
if (inactive_file < 1 || active_file < 1 || slab_reclaimable < 1) {
// In this scenario, not return error, ignore it and print log.
LOG(WARNING) << "CgroupsV2Reader read_memory_usage missing expected metrics in "
"memory.stat, inactive_file: "
<< inactive_file << ", active_file: " << active_file
<< ", slab_reclaimable: " << slab_reclaimable;
}
// the reason why we subtract inactive_file described here:

const int64_t reclaimable_usage = inactive_file + active_file + slab_reclaimable;
if (*value < reclaimable_usage) {
return Status::CgroupError(
"CgroupsV2Reader read_memory_usage negative memory usage, memory.current: {}, "
"active_file: {}, inactive_file: {}, slab_reclaimable: {}",
*value, active_file, inactive_file, slab_reclaimable);
}
// The reclaimable file cache described here should not be counted as used memory:
// https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667
*value -= metrics_map["inactive_file"];
// Part of "slab" that might be reclaimed, such as dentries and inodes.
// https://arthurchiao.art/blog/cgroupv2-zh/
*value -= metrics_map["slab_reclaimable"];
*value -= reclaimable_usage;
return Status::OK();
}

Expand Down
7 changes: 7 additions & 0 deletions be/src/util/cgroup_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ std::string CGroupUtil::cgroupv2_of_process() {
}
// With cgroups v2, there will be a *single* line with prefix "0::/"
// (see https://docs.kernel.org/admin-guide/cgroup-v2.html)
// such as 0::/user.slice/user-1005.slice/session-213906.scope this is the cgroup name
// it should be combined with the default cgroup mount point to get the full path to the cgroup, e.g.
// /sys/fs/cgroup/user.slice/user-1005.slice/session-213906.scope
std::string cgroup;
std::getline(cgroup_name_file, cgroup);
static const std::string v2_prefix = "0::/";
Expand All @@ -198,6 +201,7 @@ std::optional<std::string> CGroupUtil::get_cgroupsv2_path(const std::string& sub
}

std::string cgroup = CGroupUtil::cgroupv2_of_process();
// /sys/fs/cgroup/user.slice/user-1005.slice/session-213906.scope
auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup);

// Return the bottom-most nested current memory file. If there is no such file at the current
Expand Down Expand Up @@ -259,6 +263,9 @@ void CGroupUtil::read_int_metric_from_cgroup_file(
metrics_map[key] = value;
} else if (fields[2] == "kB") {
metrics_map[key] = value * 1024L;
} else {
LOG(WARNING) << "Unknown unit in cgroup file " << file_path.string()
<< ", line: " << line;
}
}
}
Expand Down
16 changes: 7 additions & 9 deletions be/src/util/mem_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void MemInfo::refresh_proc_meminfo() {
if (meminfo.is_open()) {
meminfo.close();
}

_s_cgroup_mem_refresh_state = false;
// refresh cgroup memory
if (config::enable_use_cgroup_memory_info) {
if (_s_cgroup_mem_refresh_wait_times >= 0) {
Expand All @@ -119,30 +119,28 @@ void MemInfo::refresh_proc_meminfo() {

// cgroup mem limit is refreshed every 10 seconds,
// cgroup mem usage is refreshed together with memInfo every time, which is very frequent.
// If _s_cgroup_mem_limit == max, it means get cgroup mem limit failed OR the cgroup has no memory limit for example
// there is just "max" in memory.max file.
if (_s_cgroup_mem_limit != std::numeric_limits<int64_t>::max()) {
int64_t cgroup_mem_usage;
auto status = CGroupMemoryCtl::find_cgroup_mem_usage(&cgroup_mem_usage);
if (!status.ok()) {
_s_cgroup_mem_usage = std::numeric_limits<int64_t>::min();
_s_cgroup_mem_refresh_state = false;
LOG_EVERY_N(WARNING, 500)
<< "Refresh cgroup memory usage failed, cgroup mem limit: "
<< _s_cgroup_mem_limit << ", " << status;
} else {
_s_cgroup_mem_usage = cgroup_mem_usage;
_s_cgroup_mem_refresh_state = true;
}
} else {
_s_cgroup_mem_refresh_state = false;
}
} else {
_s_cgroup_mem_refresh_state = false;
}

// 1. calculate physical_mem
int64_t physical_mem = -1;

physical_mem = _mem_info_bytes["MemTotal"];
if (_mem_info_bytes.find("MemTotal") != _mem_info_bytes.end()) {
physical_mem = _mem_info_bytes["MemTotal"];
}
if (_s_cgroup_mem_refresh_state) {
// In theory, always cgroup_mem_limit < physical_mem
if (physical_mem < 0) {
Expand Down Expand Up @@ -200,7 +198,7 @@ void MemInfo::refresh_proc_meminfo() {
// Process `MemAvailable = MemFree - LowWaterMark + (PageCache - min(PageCache / 2, LowWaterMark))`,
// from `MemAvailable` in `/proc/meminfo`, calculated by OS.
// CgroupV2 `MemAvailable = cgroup_mem_limit - cgroup_mem_usage`,
// `cgroup_mem_usage = memory.current - inactive_file - slab_reclaimable`, in fact,
// `cgroup_mem_usage = memory.current - inactive_file - active_file - slab_reclaimable`, in fact,
// there seems to be some memory that can be reused in `cgroup_mem_usage`.
if (mem_available < 0) {
mem_available = _s_cgroup_mem_limit - _s_cgroup_mem_usage;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ std::optional<CgroupMemoryInfo> get_cgroup_memory_info() {
}
auto metrics = read_metrics_map(*dir / "memory.stat");
int64_t adjusted_usage = *usage;
adjusted_usage -= metrics["inactive_file"];
adjusted_usage -= metrics["inactive_file"] + metrics["active_file"];
adjusted_usage -= metrics["slab_reclaimable"];
adjusted_usage = std::max<int64_t>(0, adjusted_usage);
return CgroupMemoryInfo {limit_bytes, adjusted_usage};
Expand Down
Loading