Skip to content

Commit

Permalink
proc: Fix swap handling for cgroups v2 (can_use_swap)
Browse files Browse the repository at this point in the history
On cgroups v2, there are no swap current/max files at the cgroup root, so
can_use_swap must look lower in the hierarchy to determine if swap accounting
is enabled. To also account for memory accounting being turned off at some
level, walk the hierarchy upwards from lxcfs' own cgroup.

Signed-off-by: Alex Hudspith <[email protected]>
  • Loading branch information
alexhudspith committed Nov 6, 2023
1 parent 494a141 commit 5827b93
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 27 deletions.
4 changes: 3 additions & 1 deletion src/bindings.c
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,7 @@ static void __attribute__((constructor)) lxcfs_init(void)
{
__do_close int init_ns = -EBADF, root_fd = -EBADF,
pidfd = -EBADF;
__do_free char *cgroup = NULL;
int i = 0;
pid_t pid;
struct hierarchy *hierarchy;
Expand Down Expand Up @@ -920,7 +921,8 @@ static void __attribute__((constructor)) lxcfs_init(void)
lxcfs_info("Kernel supports pidfds");
}

can_use_swap = cgroup_ops->can_use_swap(cgroup_ops);
cgroup = get_pid_cgroup(pid, "memory");
can_use_swap = cgroup_ops->can_use_swap(cgroup_ops, cgroup);
if (can_use_swap)
lxcfs_info("Kernel supports swap accounting");
else
Expand Down
33 changes: 12 additions & 21 deletions src/cgroups/cgfsng.c
Original file line number Diff line number Diff line change
Expand Up @@ -631,34 +631,25 @@ static int cgfsng_get_memory_slabinfo_fd(struct cgroup_ops *ops, const char *cgr
return openat(h->fd, path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW);
}

static bool cgfsng_can_use_swap(struct cgroup_ops *ops)
static bool cgfsng_can_use_swap(struct cgroup_ops *ops, const char *cgroup)
{
bool has_swap = false;
__do_free char *cgroup_rel = NULL, *junk_value = NULL;
const char *file;
struct hierarchy *h;

h = ops->get_hierarchy(ops, "memory");
if (!h)
return false;

if (is_unified_hierarchy(h)) {
if (faccessat(h->fd, "memory.swap.max", F_OK, 0))
return false;

if (faccessat(h->fd, "memory.swap.current", F_OK, 0))
return false;

has_swap = true;
} else {
if (faccessat(h->fd, "memory.memsw.limit_in_bytes", F_OK, 0))
return false;

if (faccessat(h->fd, "memory.memsw.usage_in_bytes", F_OK, 0))
return false;

has_swap = true;
}

return has_swap;
cgroup_rel = must_make_path_relative(cgroup, NULL);
file = is_unified_hierarchy(h) ? "memory.swap.current" : "memory.memsw.usage_in_bytes";
/* For v2, we need to look at the lower levels of the hierarchy because
* no 'memory.swap.current' file exists at the root. We must search
* upwards in the hierarchy in case memory accounting is disabled via
* cgroup.subtree_control for the given cgroup itself.
*/
int ret = cgroup_walkup_to_root(ops->cgroup2_root_fd, h->fd, cgroup_rel, file, &junk_value);
return ret == 0;
}

static int cgfsng_get_memory_stats(struct cgroup_ops *ops, const char *cgroup,
Expand Down
2 changes: 1 addition & 1 deletion src/cgroups/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ struct cgroup_ops {
char **value);
int (*get_memory_slabinfo_fd)(struct cgroup_ops *ops,
const char *cgroup);
bool (*can_use_swap)(struct cgroup_ops *ops);
bool (*can_use_swap)(struct cgroup_ops *ops, const char *cgroup);

/* cpuset */
int (*get_cpuset_cpus)(struct cgroup_ops *ops, const char *cgroup,
Expand Down
9 changes: 5 additions & 4 deletions src/proc_fuse.c
Original file line number Diff line number Diff line change
Expand Up @@ -459,11 +459,13 @@ static int proc_swaps_read(char *buf, size_t size, off_t offset,
}

if (wants_swap) {
/* The total amount of swap is always reported to be the
/* For cgroups v1, the total amount of swap is always reported to be the
lesser of the RAM+SWAP limit or the SWAP device size.
This is because the kernel can swap as much as it
wants and not only up to swtotal. */
swtotal = memlimit / 1024 + swtotal;
if (!liblxcfs_memory_is_cgroupv2())
swtotal = memlimit / 1024 + swtotal;

if (hostswtotal < swtotal) {
swtotal = hostswtotal;
}
Expand Down Expand Up @@ -1359,11 +1361,10 @@ static int proc_meminfo_read(char *buf, size_t size, off_t offset,

sscanf(line + STRLITERALLEN("SwapTotal:"), "%" PRIu64, &hostswtotal);

/* The total amount of swap is always reported to be the
/* In cgroups v1, the total amount of swap is always reported to be the
lesser of the RAM+SWAP limit or the SWAP device size.
This is because the kernel can swap as much as it
wants and not only up to swtotal. */

if (!liblxcfs_memory_is_cgroupv2())
swtotal += memlimit;

Expand Down

0 comments on commit 5827b93

Please sign in to comment.