Skip to content

Allow mmap of /sys/kernel/btf/vmlinux #8976

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: bpf-next_base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/asm-generic/vmlinux.lds.h
Original file line number Diff line number Diff line change
Expand Up @@ -667,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
*/
#ifdef CONFIG_DEBUG_INFO_BTF
#define BTF \
. = ALIGN(PAGE_SIZE); \
.BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \
BOUNDED_SECTION_BY(.BTF, _BTF) \
} \
. = ALIGN(4); \
. = ALIGN(PAGE_SIZE); \
.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \
*(.BTF_ids) \
}
Expand Down
32 changes: 32 additions & 0 deletions kernel/bpf/sysfs_btf.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,46 @@
#include <linux/kobject.h>
#include <linux/init.h>
#include <linux/sysfs.h>
#include <linux/mm.h>
#include <linux/io.h>
#include <linux/btf.h>

/* See scripts/link-vmlinux.sh, gen_btf() func for details */
extern char __start_BTF[];
extern char __stop_BTF[];

static int btf_sysfs_vmlinux_mmap(struct file *filp, struct kobject *kobj,
const struct bin_attribute *attr,
struct vm_area_struct *vma)
{
unsigned long pages = PAGE_ALIGN(attr->size) >> PAGE_SHIFT;
size_t vm_size = vma->vm_end - vma->vm_start;
phys_addr_t addr = virt_to_phys(__start_BTF);
unsigned long pfn = addr >> PAGE_SHIFT;

if (attr->private != __start_BTF || !PAGE_ALIGNED(addr))
return -EINVAL;

if (vma->vm_pgoff)
return -EINVAL;

if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_MAYSHARE))
return -EACCES;

if (pfn + pages < pfn)
return -EINVAL;

if ((vm_size >> PAGE_SHIFT) > pages)
return -EINVAL;

vm_flags_mod(vma, VM_DONTDUMP, VM_MAYEXEC | VM_MAYWRITE);
return remap_pfn_range(vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot);
}

static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = {
.attr = { .name = "vmlinux", .mode = 0444, },
.read_new = sysfs_bin_attr_simple_read,
.mmap = btf_sysfs_vmlinux_mmap,
};

struct kobject *btf_kobj;
Expand Down
89 changes: 71 additions & 18 deletions tools/lib/bpf/btf.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <sys/utsname.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <linux/kernel.h>
#include <linux/err.h>
#include <linux/btf.h>
Expand Down Expand Up @@ -120,6 +121,9 @@ struct btf {
/* whether base_btf should be freed in btf_free for this instance */
bool owns_base;

/* whether raw_data is a (read-only) mmap */
bool raw_data_is_mmap;

/* BTF object FD, if loaded into kernel */
int fd;

Expand Down Expand Up @@ -951,6 +955,17 @@ static bool btf_is_modifiable(const struct btf *btf)
return (void *)btf->hdr != btf->raw_data;
}

static void btf_free_raw_data(struct btf *btf)
{
if (btf->raw_data_is_mmap) {
munmap(btf->raw_data, btf->raw_size);
btf->raw_data_is_mmap = false;
} else {
free(btf->raw_data);
}
btf->raw_data = NULL;
}

void btf__free(struct btf *btf)
{
if (IS_ERR_OR_NULL(btf))
Expand All @@ -970,7 +985,7 @@ void btf__free(struct btf *btf)
free(btf->types_data);
strset__free(btf->strs_set);
}
free(btf->raw_data);
btf_free_raw_data(btf);
free(btf->raw_data_swapped);
free(btf->type_offs);
if (btf->owns_base)
Expand Down Expand Up @@ -1030,7 +1045,7 @@ struct btf *btf__new_empty_split(struct btf *base_btf)
return libbpf_ptr(btf_new_empty(base_btf));
}

static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap)
{
struct btf *btf;
int err;
Expand All @@ -1050,12 +1065,18 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
btf->start_str_off = base_btf->hdr->str_len;
}

btf->raw_data = malloc(size);
if (!btf->raw_data) {
err = -ENOMEM;
goto done;
if (is_mmap) {
btf->raw_data = (void *)data;
btf->raw_data_is_mmap = true;
} else {
btf->raw_data = malloc(size);
if (!btf->raw_data) {
err = -ENOMEM;
goto done;
}
memcpy(btf->raw_data, data, size);
}
memcpy(btf->raw_data, data, size);

btf->raw_size = size;

btf->hdr = btf->raw_data;
Expand Down Expand Up @@ -1083,12 +1104,12 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)

struct btf *btf__new(const void *data, __u32 size)
{
return libbpf_ptr(btf_new(data, size, NULL));
return libbpf_ptr(btf_new(data, size, NULL, false));
}

struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
{
return libbpf_ptr(btf_new(data, size, base_btf));
return libbpf_ptr(btf_new(data, size, base_btf, false));
}

struct btf_elf_secs {
Expand Down Expand Up @@ -1209,7 +1230,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,

if (secs.btf_base_data) {
dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size,
NULL);
NULL, false);
if (IS_ERR(dist_base_btf)) {
err = PTR_ERR(dist_base_btf);
dist_base_btf = NULL;
Expand All @@ -1218,7 +1239,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
}

btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size,
dist_base_btf ?: base_btf);
dist_base_btf ?: base_btf, false);
if (IS_ERR(btf)) {
err = PTR_ERR(btf);
goto done;
Expand Down Expand Up @@ -1335,7 +1356,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
}

/* finally parse BTF data */
btf = btf_new(data, sz, base_btf);
btf = btf_new(data, sz, base_btf, false);

err_out:
free(data);
Expand All @@ -1354,6 +1375,37 @@ struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
return libbpf_ptr(btf_parse_raw(path, base_btf));
}

static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf)
{
struct stat st;
void *data;
struct btf *btf;
int fd, err;

fd = open(path, O_RDONLY);
if (fd < 0)
return libbpf_err_ptr(-errno);

if (fstat(fd, &st) < 0) {
err = -errno;
close(fd);
return libbpf_err_ptr(err);
}

data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
err = -errno;
close(fd);

if (data == MAP_FAILED)
return libbpf_err_ptr(err);

btf = btf_new(data, st.st_size, base_btf, true);
if (IS_ERR(btf))
munmap(data, st.st_size);

return btf;
}

static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
{
struct btf *btf;
Expand Down Expand Up @@ -1618,7 +1670,7 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)
goto exit_free;
}

btf = btf_new(ptr, btf_info.btf_size, base_btf);
btf = btf_new(ptr, btf_info.btf_size, base_btf, false);

exit_free:
free(ptr);
Expand Down Expand Up @@ -1658,10 +1710,8 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)

static void btf_invalidate_raw_data(struct btf *btf)
{
if (btf->raw_data) {
free(btf->raw_data);
btf->raw_data = NULL;
}
if (btf->raw_data)
btf_free_raw_data(btf);
if (btf->raw_data_swapped) {
free(btf->raw_data_swapped);
btf->raw_data_swapped = NULL;
Expand Down Expand Up @@ -5331,7 +5381,10 @@ struct btf *btf__load_vmlinux_btf(void)
pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n",
sysfs_btf_path);
} else {
btf = btf__parse(sysfs_btf_path, NULL);
btf = btf_parse_raw_mmap(sysfs_btf_path, NULL);
if (IS_ERR(btf))
btf = btf__parse(sysfs_btf_path, NULL);

if (!btf) {
err = -errno;
pr_warn("failed to read kernel BTF from '%s': %s\n",
Expand Down
81 changes: 81 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Copyright (c) 2025 Isovalent */

#include <test_progs.h>
#include <bpf/btf.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>

static void test_btf_mmap_sysfs(const char *path, struct btf *base)
{
struct stat st;
__u64 btf_size, end;
void *raw_data = NULL;
int fd = -1;
long page_size;
struct btf *btf = NULL;

page_size = sysconf(_SC_PAGESIZE);
if (!ASSERT_GE(page_size, 0, "get_page_size"))
goto cleanup;

if (!ASSERT_OK(stat(path, &st), "stat_btf"))
goto cleanup;

btf_size = st.st_size;
end = (btf_size + page_size - 1) / page_size * page_size;

fd = open(path, O_RDONLY);
if (!ASSERT_GE(fd, 0, "open_btf"))
goto cleanup;

raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable"))
goto cleanup;

raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0);
if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared"))
goto cleanup;

raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0);
if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size"))
goto cleanup;

raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
if (!ASSERT_OK_PTR(raw_data, "mmap_btf"))
goto cleanup;

if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1,
"mprotect_writable"))
goto cleanup;

if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1,
"mprotect_executable"))
goto cleanup;

/* Check padding is zeroed */
for (int i = btf_size; i < end; i++) {
if (((__u8 *)raw_data)[i] != 0) {
PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i);
goto cleanup;
}
}

btf = btf__new_split(raw_data, btf_size, base);
if (!ASSERT_OK_PTR(btf, "parse_btf"))
goto cleanup;

cleanup:
btf__free(btf);
if (raw_data && raw_data != MAP_FAILED)
munmap(raw_data, btf_size);
if (fd >= 0)
close(fd);
}

void test_btf_sysfs(void)
{
test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL);
}
Loading