diff --git a/src/bindings.c b/src/bindings.c index 27c08c38..d6af4982 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -866,6 +866,7 @@ static void __attribute__((constructor)) lxcfs_init(void) { __do_close int init_ns = -EBADF, root_fd = -EBADF, pidfd = -EBADF; + __do_free char *cgroup = NULL; int i = 0; pid_t pid; struct hierarchy *hierarchy; @@ -920,7 +921,8 @@ static void __attribute__((constructor)) lxcfs_init(void) lxcfs_info("Kernel supports pidfds"); } - can_use_swap = cgroup_ops->can_use_swap(cgroup_ops); + cgroup = get_pid_cgroup(pid, "memory"); + can_use_swap = cgroup_ops->can_use_swap(cgroup_ops, cgroup); if (can_use_swap) lxcfs_info("Kernel supports swap accounting"); else diff --git a/src/cgroups/cgfsng.c b/src/cgroups/cgfsng.c index 2d583c67..7b732926 100644 --- a/src/cgroups/cgfsng.c +++ b/src/cgroups/cgfsng.c @@ -631,34 +631,25 @@ static int cgfsng_get_memory_slabinfo_fd(struct cgroup_ops *ops, const char *cgr return openat(h->fd, path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW); } -static bool cgfsng_can_use_swap(struct cgroup_ops *ops) +static bool cgfsng_can_use_swap(struct cgroup_ops *ops, const char *cgroup) { - bool has_swap = false; + __do_free char *cgroup_rel = NULL, *junk_value = NULL; + const char *file; struct hierarchy *h; h = ops->get_hierarchy(ops, "memory"); if (!h) return false; - if (is_unified_hierarchy(h)) { - if (faccessat(h->fd, "memory.swap.max", F_OK, 0)) - return false; - - if (faccessat(h->fd, "memory.swap.current", F_OK, 0)) - return false; - - has_swap = true; - } else { - if (faccessat(h->fd, "memory.memsw.limit_in_bytes", F_OK, 0)) - return false; - - if (faccessat(h->fd, "memory.memsw.usage_in_bytes", F_OK, 0)) - return false; - - has_swap = true; - } - - return has_swap; + cgroup_rel = must_make_path_relative(cgroup, NULL); + file = is_unified_hierarchy(h) ? "memory.swap.current" : "memory.memsw.usage_in_bytes"; + /* For v2, we need to look at the lower levels of the hierarchy because + * no 'memory.swap.current' file exists at the root. We must search + * upwards in the hierarchy in case memory accounting is disabled via + * cgroup.subtree_control for the given cgroup itself. + */ + int ret = cgroup_walkup_to_root(ops->cgroup2_root_fd, h->fd, cgroup_rel, file, &junk_value); + return ret == 0; } static int cgfsng_get_memory_stats(struct cgroup_ops *ops, const char *cgroup, diff --git a/src/cgroups/cgroup.h b/src/cgroups/cgroup.h index 122e8ebf..afa7db2e 100644 --- a/src/cgroups/cgroup.h +++ b/src/cgroups/cgroup.h @@ -148,7 +148,7 @@ struct cgroup_ops { char **value); int (*get_memory_slabinfo_fd)(struct cgroup_ops *ops, const char *cgroup); - bool (*can_use_swap)(struct cgroup_ops *ops); + bool (*can_use_swap)(struct cgroup_ops *ops, const char *cgroup); /* cpuset */ int (*get_cpuset_cpus)(struct cgroup_ops *ops, const char *cgroup, diff --git a/src/proc_fuse.c b/src/proc_fuse.c index cb2fca2e..9dedc375 100644 --- a/src/proc_fuse.c +++ b/src/proc_fuse.c @@ -459,11 +459,13 @@ static int proc_swaps_read(char *buf, size_t size, off_t offset, } if (wants_swap) { - /* The total amount of swap is always reported to be the + /* For cgroups v1, the total amount of swap is always reported to be the lesser of the RAM+SWAP limit or the SWAP device size. This is because the kernel can swap as much as it wants and not only up to swtotal. */ - swtotal = memlimit / 1024 + swtotal; + if (!liblxcfs_memory_is_cgroupv2()) + swtotal = memlimit / 1024 + swtotal; + if (hostswtotal < swtotal) { swtotal = hostswtotal; } @@ -1359,11 +1361,10 @@ static int proc_meminfo_read(char *buf, size_t size, off_t offset, sscanf(line + STRLITERALLEN("SwapTotal:"), "%" PRIu64, &hostswtotal); - /* The total amount of swap is always reported to be the + /* In cgroups v1, the total amount of swap is always reported to be the lesser of the RAM+SWAP limit or the SWAP device size. This is because the kernel can swap as much as it wants and not only up to swtotal. */ - if (!liblxcfs_memory_is_cgroupv2()) swtotal += memlimit;