libbpf: Automatically fix up BPF_MAP_TYPE_RINGBUF size, if necessary

anakryiko · borkmann · commit 0087a681fa8c · 2022-05-09T17:15:32.000+02:00
Kernel imposes a pretty particular restriction on ringbuf map size. It has to be a power-of-2 multiple of page size. While generally this isn't hard for user to satisfy, sometimes it's impossible to do this declaratively in BPF source code or just plain inconvenient to do at runtime. One such example might be BPF libraries that are supposed to work on different architectures, which might not agree on what the common page size is. Let libbpf find the right size for user instead, if it turns out to not satisfy kernel requirements. If user didn't set size at all, that's most probably a mistake so don't upsize such zero size to one full page, though. Also we need to be careful about not overflowing __u32 max_entries. Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20220509004148.1801791-9-andrii@kernel.org
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
@@ -4943,6 +4943,44 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 
 static void bpf_map__destroy(struct bpf_map *map);
 
+static bool is_pow_of_2(size_t x)
+{
+	return x && (x & (x - 1));
+}
+
+static size_t adjust_ringbuf_sz(size_t sz)
+{
+	__u32 page_sz = sysconf(_SC_PAGE_SIZE);
+	__u32 i, mul;
+
+	/* if user forgot to set any size, make sure they see error */
+	if (sz == 0)
+		return 0;
+	/* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
+	 * a power-of-2 multiple of kernel's page size. If user diligently
+	 * satisified these conditions, pass the size through.
+	 */
+	if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
+		return sz;
+
+	/* Otherwise find closest (page_sz * power_of_2) product bigger than
+	 * user-set size to satisfy both user size request and kernel
+	 * requirements and substitute correct max_entries for map creation.
+	 */
+	for (i = 0, mul = 1; ; i++, mul <<= 1) {
+		if (mul > UINT_MAX / page_sz) /* prevent __u32 overflow */
+			break;
+		if (mul * page_sz > sz)
+			return mul * page_sz;
+	}
+
+	/* if it's impossible to satisfy the conditions (i.e., user size is
+	 * very close to UINT_MAX but is not a power-of-2 multiple of
+	 * page_size) then just return original size and let kernel reject it
+	 */
+	return sz;
+}
+
 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
 {
 	LIBBPF_OPTS(bpf_map_create_opts, create_attr);
@@ -4981,6 +5019,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 	}
 
 	switch (def->type) {
+	case BPF_MAP_TYPE_RINGBUF:
+		map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
+		/* fallthrough */
 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
 	case BPF_MAP_TYPE_CGROUP_ARRAY:
 	case BPF_MAP_TYPE_STACK_TRACE:
@@ -4994,7 +5035,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 	case BPF_MAP_TYPE_SOCKHASH:
 	case BPF_MAP_TYPE_QUEUE:
 	case BPF_MAP_TYPE_STACK:
-	case BPF_MAP_TYPE_RINGBUF:
 		create_attr.btf_fd = 0;
 		create_attr.btf_key_type_id = 0;
 		create_attr.btf_value_type_id = 0;