diff options
25 files changed, 1429 insertions, 51 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 31421c74ba08..50105e0b8fcc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -169,6 +169,7 @@ struct bpf_map { u32 value_size; u32 max_entries; u32 map_flags; + u64 map_extra; /* any per-map-type extra fields */ int spin_lock_off; /* >=0 valid offset, <0 error */ int timer_off; /* >=0 valid offset, <0 error */ u32 id; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 9c81724e4b98..c4424ac2fa02 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -125,6 +125,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c10820037883..8bead4aa3ad0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -906,6 +906,7 @@ enum bpf_map_type { BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, + BPF_MAP_TYPE_BLOOM_FILTER, }; /* Note that tracing related programs such as @@ -1274,6 +1275,13 @@ union bpf_attr { * struct stored as the * map value */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + */ + __u64 map_extra; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -5638,6 +5646,7 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u64 map_extra; } __attribute__((aligned(8))); struct bpf_btf_info { diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 7f33098ca63f..cf6ca339f3cd 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -7,7 +7,7 @@ endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c new file mode 100644 index 000000000000..7c50232b7571 --- /dev/null +++ b/kernel/bpf/bloom_filter.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <linux/bitmap.h> +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/err.h> +#include <linux/jhash.h> +#include <linux/random.h> + +#define BLOOM_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK) + +struct bpf_bloom_filter { + struct bpf_map map; + u32 bitset_mask; + u32 hash_seed; + /* If the size of the values in the bloom filter is u32 aligned, + * then it is more performant to use jhash2 as the underlying hash + * function, else we use jhash. This tracks the number of u32s + * in an u32-aligned value size. If the value size is not u32 aligned, + * this will be 0. + */ + u32 aligned_u32_count; + u32 nr_hash_funcs; + unsigned long bitset[]; +}; + +static u32 hash(struct bpf_bloom_filter *bloom, void *value, + u32 value_size, u32 index) +{ + u32 h; + + if (bloom->aligned_u32_count) + h = jhash2(value, bloom->aligned_u32_count, + bloom->hash_seed + index); + else + h = jhash(value, value_size, bloom->hash_seed + index); + + return h & bloom->bitset_mask; +} + +static int peek_elem(struct bpf_map *map, void *value) +{ + struct bpf_bloom_filter *bloom = + container_of(map, struct bpf_bloom_filter, map); + u32 i, h; + + for (i = 0; i < bloom->nr_hash_funcs; i++) { + h = hash(bloom, value, map->value_size, i); + if (!test_bit(h, bloom->bitset)) + return -ENOENT; + } + + return 0; +} + +static int push_elem(struct bpf_map *map, void *value, u64 flags) +{ + struct bpf_bloom_filter *bloom = + container_of(map, struct bpf_bloom_filter, map); + u32 i, h; + + if (flags != BPF_ANY) + return -EINVAL; + + for (i = 0; i < bloom->nr_hash_funcs; i++) { + h = hash(bloom, value, map->value_size, i); + set_bit(h, bloom->bitset); + } + + return 0; +} + +static int pop_elem(struct bpf_map *map, void *value) +{ + return -EOPNOTSUPP; +} + +static struct bpf_map *map_alloc(union bpf_attr *attr) +{ + u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; + int numa_node = bpf_map_attr_numa_node(attr); + struct bpf_bloom_filter *bloom; + + if (!bpf_capable()) + return ERR_PTR(-EPERM); + + if (attr->key_size != 0 || attr->value_size == 0 || + attr->max_entries == 0 || + attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || + !bpf_map_flags_access_ok(attr->map_flags) || + (attr->map_extra & ~0xF)) + return ERR_PTR(-EINVAL); + + /* The lower 4 bits of map_extra specify the number of hash functions */ + nr_hash_funcs = attr->map_extra & 0xF; + if (nr_hash_funcs == 0) + /* Default to using 5 hash functions if unspecified */ + nr_hash_funcs = 5; + + /* For the bloom filter, the optimal bit array size that minimizes the + * false positive probability is n * k / ln(2) where n is the number of + * expected entries in the bloom filter and k is the number of hash + * functions. We use 7 / 5 to approximate 1 / ln(2). + * + * We round this up to the nearest power of two to enable more efficient + * hashing using bitmasks. The bitmask will be the bit array size - 1. + * + * If this overflows a u32, the bit array size will have 2^32 (4 + * GB) bits. + */ + if (check_mul_overflow(attr->max_entries, nr_hash_funcs, &nr_bits) || + check_mul_overflow(nr_bits / 5, (u32)7, &nr_bits) || + nr_bits > (1UL << 31)) { + /* The bit array size is 2^32 bits but to avoid overflowing the + * u32, we use U32_MAX, which will round up to the equivalent + * number of bytes + */ + bitset_bytes = BITS_TO_BYTES(U32_MAX); + bitset_mask = U32_MAX; + } else { + if (nr_bits <= BITS_PER_LONG) + nr_bits = BITS_PER_LONG; + else + nr_bits = roundup_pow_of_two(nr_bits); + bitset_bytes = BITS_TO_BYTES(nr_bits); + bitset_mask = nr_bits - 1; + } + + bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long)); + bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes, numa_node); + + if (!bloom) + return ERR_PTR(-ENOMEM); + + bpf_map_init_from_attr(&bloom->map, attr); + + bloom->nr_hash_funcs = nr_hash_funcs; + bloom->bitset_mask = bitset_mask; + + /* Check whether the value size is u32-aligned */ + if ((attr->value_size & (sizeof(u32) - 1)) == 0) + bloom->aligned_u32_count = + attr->value_size / sizeof(u32); + + if (!(attr->map_flags & BPF_F_ZERO_SEED)) + bloom->hash_seed = get_random_int(); + + return &bloom->map; +} + +static void map_free(struct bpf_map *map) +{ + struct bpf_bloom_filter *bloom = + container_of(map, struct bpf_bloom_filter, map); + + bpf_map_area_free(bloom); +} + +static void *lookup_elem(struct bpf_map *map, void *key) +{ + /* The eBPF program should use map_peek_elem instead */ + return ERR_PTR(-EINVAL); +} + +static int update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) +{ + /* The eBPF program should use map_push_elem instead */ + return -EINVAL; +} + +static int check_btf(const struct bpf_map *map, const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type) +{ + /* Bloom filter maps are keyless */ + return btf_type_is_void(key_type) ? 0 : -EINVAL; +} + +static int bpf_bloom_btf_id; +const struct bpf_map_ops bloom_filter_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc = map_alloc, + .map_free = map_free, + .map_push_elem = push_elem, + .map_peek_elem = peek_elem, + .map_pop_elem = pop_elem, + .map_lookup_elem = lookup_elem, + .map_update_elem = update_elem, + .map_check_btf = check_btf, + .map_btf_name = "bpf_bloom_filter", + .map_btf_id = &bpf_bloom_btf_id, +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3e1c024ce3ed..f7c2c6354add 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -199,7 +199,8 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, err = bpf_fd_reuseport_array_update_elem(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK) { + map->map_type == BPF_MAP_TYPE_STACK || + map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { err = map->ops->map_push_elem(map, value, flags); } else { rcu_read_lock(); @@ -238,7 +239,8 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { err = bpf_fd_reuseport_array_lookup_elem(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK) { + map->map_type == BPF_MAP_TYPE_STACK || + map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { err = map->ops->map_peek_elem(map, value); } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { /* struct_ops map requires directly updating "value" */ @@ -348,6 +350,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) map->max_entries = attr->max_entries; map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); map->numa_node = bpf_map_attr_numa_node(attr); + map->map_extra = attr->map_extra; } static int bpf_map_alloc_id(struct bpf_map *map) @@ -553,6 +556,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "value_size:\t%u\n" "max_entries:\t%u\n" "map_flags:\t%#x\n" + "map_extra:\t%#llx\n" "memlock:\t%lu\n" "map_id:\t%u\n" "frozen:\t%u\n", @@ -561,6 +565,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) map->value_size, map->max_entries, map->map_flags, + (unsigned long long)map->map_extra, bpf_map_memory_footprint(map), map->id, READ_ONCE(map->frozen)); @@ -810,7 +815,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, return ret; } -#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id +#define BPF_MAP_CREATE_LAST_FIELD map_extra /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -831,6 +836,10 @@ static int map_create(union bpf_attr *attr) return -EINVAL; } + if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && + attr->map_extra != 0) + return -EINVAL; + f_flags = bpf_get_file_flag(attr->map_flags); if (f_flags < 0) return f_flags; @@ -1080,6 +1089,14 @@ static int map_lookup_elem(union bpf_attr *attr) if (!value) goto free_key; + if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { + if (copy_from_user(value, uvalue, value_size)) + err = -EFAULT; + else + err = bpf_map_copy_value(map, key, value, attr->flags); + goto free_value; + } + err = bpf_map_copy_value(map, key, value, attr->flags); if (err) goto free_value; @@ -3881,6 +3898,7 @@ static int bpf_map_get_info_by_fd(struct file *file, info.value_size = map->value_size; info.max_entries = map->max_entries; info.map_flags = map->map_flags; + info.map_extra = map->map_extra; memcpy(info.name, map->name, sizeof(map->name)); if (map->btf) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c6616e325803..3c8aa7df1773 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5002,7 +5002,10 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env, return -EINVAL; } break; - + case BPF_MAP_TYPE_BLOOM_FILTER: + if (meta->func_id == BPF_FUNC_map_peek_elem) + *arg_type = ARG_PTR_TO_MAP_VALUE; + break; default: break; } @@ -5577,6 +5580,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_task_storage_delete) goto error; break; + case BPF_MAP_TYPE_BLOOM_FILTER: + if (func_id != BPF_FUNC_map_peek_elem && + func_id != BPF_FUNC_map_push_elem) + goto error; + break; default: break; } @@ -5644,13 +5652,18 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_SOCKHASH) goto error; break; - case BPF_FUNC_map_peek_elem: case BPF_FUNC_map_pop_elem: - case BPF_FUNC_map_push_elem: if (map->map_type != BPF_MAP_TYPE_QUEUE && map->map_type != BPF_MAP_TYPE_STACK) goto error; break; + case BPF_FUNC_map_peek_elem: + case BPF_FUNC_map_push_elem: + if (map->map_type != BPF_MAP_TYPE_QUEUE && + map->map_type != BPF_MAP_TYPE_STACK && + map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) + goto error; + break; case BPF_FUNC_sk_storage_get: case BPF_FUNC_sk_storage_delete: if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c10820037883..8bead4aa3ad0 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -906,6 +906,7 @@ enum bpf_map_type { BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, + BPF_MAP_TYPE_BLOOM_FILTER, }; /* Note that tracing related programs such as @@ -1274,6 +1275,13 @@ union bpf_attr { * struct stored as the * map value */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + */ + __u64 map_extra; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -5638,6 +5646,7 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u64 map_extra; } __attribute__((aligned(8))); struct bpf_btf_info { diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 7d1741ceaa32..fe4b6ebc9b8f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -77,7 +77,7 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) return fd; } -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) +int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr) { union bpf_attr attr; int fd; @@ -102,11 +102,36 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) create_attr->btf_vmlinux_value_type_id; else attr.inner_map_fd = create_attr->inner_map_fd; + attr.map_extra = create_attr->map_extra; fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } +int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) +{ + struct bpf_create_map_params p = {}; + + p.map_type = create_attr->map_type; + p.key_size = create_attr->key_size; + p.value_size = create_attr->value_size; + p.max_entries = create_attr->max_entries; + p.map_flags = create_attr->map_flags; + p.name = create_attr->name; + p.numa_node = create_attr->numa_node; + p.btf_fd = create_attr->btf_fd; + p.btf_key_type_id = create_attr->btf_key_type_id; + p.btf_value_type_id = create_attr->btf_value_type_id; + p.map_ifindex = create_attr->map_ifindex; + if (p.map_type == BPF_MAP_TYPE_STRUCT_OPS) + p.btf_vmlinux_value_type_id = + create_attr->btf_vmlinux_value_type_id; + else + p.inner_map_fd = create_attr->inner_map_fd; + + return libbpf__bpf_create_map_xattr(&p); +} + int bpf_create_map_node(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags, int node) diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index 70eccbffefb1..b8d41d6fbc40 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -43,7 +43,7 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level); int bpf_gen__finish(struct bpf_gen *gen); void bpf_gen__free(struct bpf_gen *gen); void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); -void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_attr *map_attr, int map_idx); +void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx); struct bpf_prog_load_params; void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx); void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 937bfc7db41e..e552484ae4a4 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -431,7 +431,7 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, } void bpf_gen__map_create(struct bpf_gen *gen, - struct bpf_create_map_attr *map_attr, int map_idx) + struct bpf_create_map_params *map_attr, int map_idx) { int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); bool close_inner_map_fd = false; @@ -443,6 +443,7 @@ void bpf_gen__map_create(struct bpf_gen *gen, attr.key_size = map_attr->key_size; attr.value_size = map_attr->value_size; attr.map_flags = map_attr->map_flags; + attr.map_extra = map_attr->map_extra; memcpy(attr.map_name, map_attr->name, min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1)); attr.numa_node = map_attr->numa_node; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 59d39ce9f375..232e4efe82e9 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -400,6 +400,7 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; + __u64 map_extra; }; enum extern_type { @@ -2324,6 +2325,13 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, } map_def->pinning = val; map_def->parts |= MAP_DEF_PINNING; + } else if (strcmp(name, "map_extra") == 0) { + __u32 map_extra; + + if (!get_map_field_int(map_name, btf, m, &map_extra)) + return -EINVAL; + map_def->map_extra = map_extra; + map_def->parts |= MAP_DEF_MAP_EXTRA; } else { if (strict) { pr_warn("map '%s': unknown field '%s'.\n", map_name, name); @@ -2348,6 +2356,7 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def map->def.value_size = def->value_size; map->def.max_entries = def->max_entries; map->def.map_flags = def->map_flags; + map->map_extra = def->map_extra; map->numa_node = def->numa_node; map->btf_key_type_id = def->key_type_id; @@ -2371,7 +2380,10 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def if (def->parts & MAP_DEF_MAX_ENTRIES) pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); if (def->parts & MAP_DEF_MAP_FLAGS) - pr_debug("map '%s': found map_flags = %u.\n", map->name, def->map_flags); + pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags); + if (def->parts & MAP_DEF_MAP_EXTRA) + pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name, + (unsigned long long)def->map_extra); if (def->parts & MAP_DEF_PINNING) pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); if (def->parts & MAP_DEF_NUMA_NODE) @@ -4210,6 +4222,7 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) map->btf_key_type_id = info.btf_key_type_id; map->btf_value_type_id = info.btf_value_type_id; map->reused = true; + map->map_extra = info.map_extra; return 0; @@ -4724,7 +4737,8 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) map_info.key_size == map->def.key_size && map_info.value_size == map->def.value_size && map_info.max_entries == map->def.max_entries && - map_info.map_flags == map->def.map_flags); + map_info.map_flags == map->def.map_flags && + map_info.map_extra == map->map_extra); } static int @@ -4807,7 +4821,7 @@ static void bpf_map__destroy(struct bpf_map *map); static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { - struct bpf_create_map_attr create_attr; + struct bpf_create_map_params create_attr; struct bpf_map_def *def = &map->def; int err = 0; @@ -4821,6 +4835,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.key_size = def->key_size; create_attr.value_size = def->value_size; create_attr.numa_node = map->numa_node; + create_attr.map_extra = map->map_extra; if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { int nr_cpus; @@ -4895,7 +4910,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b */ map->fd = 0; } else { - map->fd = bpf_create_map_xattr(&create_attr); + map->fd = libbpf__bpf_create_map_xattr(&create_attr); } if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { @@ -4910,7 +4925,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; - map->fd = bpf_create_map_xattr(&create_attr); + map->fd = libbpf__bpf_create_map_xattr(&create_attr); } err = map->fd < 0 ? -errno : 0; @@ -8880,6 +8895,19 @@ int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) return 0; } +__u64 bpf_map__map_extra(const struct bpf_map *map) +{ + return map->map_extra; +} + +int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) +{ + if (map->fd >= 0) + return libbpf_err(-EBUSY); + map->map_extra = map_extra; + return 0; +} + __u32 bpf_map__numa_node(const struct bpf_map *map) { return map->numa_node; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index defabdbe7760..9de0f299706b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -600,6 +600,9 @@ LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); /* get/set map if_index */ LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map); LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); +/* get/set map map_extra flags */ +LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 15239c05659c..43580eb47740 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -389,6 +389,8 @@ LIBBPF_0.5.0 { LIBBPF_0.6.0 { global: + bpf_map__map_extra; + bpf_map__set_map_extra; bpf_object__next_map; bpf_object__next_program; bpf_object__prev_map; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 13bc7950e304..a6dd7e25747f 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -193,8 +193,9 @@ enum map_def_parts { MAP_DEF_NUMA_NODE = 0x080, MAP_DEF_PINNING = 0x100, MAP_DEF_INNER_MAP = 0x200, + MAP_DEF_MAP_EXTRA = 0x400, - MAP_DEF_ALL = 0x3ff, /* combination of all above */ + MAP_DEF_ALL = 0x7ff, /* combination of all above */ }; struct btf_map_def { @@ -208,6 +209,7 @@ struct btf_map_def { __u32 map_flags; __u32 numa_node; __u32 pinning; + __u64 map_extra; }; int parse_btf_map_def(const char *map_name, struct btf *btf, @@ -303,6 +305,27 @@ struct bpf_prog_load_params { int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); +struct bpf_create_map_params { + const char *name; + enum bpf_map_type map_type; + __u32 map_flags; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 numa_node; + __u32 btf_fd; + __u32 btf_key_type_id; + __u32 btf_value_type_id; + __u32 map_ifindex; + union { + __u32 inner_map_fd; + __u32 btf_vmlinux_value_type_id; + }; + __u64 map_extra; +}; + +int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr); + struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, const char **prefix, int *kind); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index ac47cf9760fc..2c464cb78d6b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -524,18 +524,20 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) $(call msg,CC,,$@) - $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@ $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ $(OUTPUT)/perfbuf_bench.skel.h +$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_count.o \ $(OUTPUT)/bench_rename.o \ $(OUTPUT)/bench_trigger.o \ - $(OUTPUT)/bench_ringbufs.o + $(OUTPUT)/bench_ringbufs.o \ + $(OUTPUT)/bench_bloom_filter_map.o $(call msg,BINARY,,$@) $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 6ea15b93a2f8..cc4722f693e9 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -51,6 +51,35 @@ void setup_libbpf() fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err); } +void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns) +{ + long total = res->false_hits + res->hits + res->drops; + + printf("Iter %3d (%7.3lfus): ", + iter, (delta_ns - 1000000000) / 1000.0); + + printf("%ld false hits of %ld total operations. Percentage = %2.2f %%\n", + res->false_hits, total, ((float)res->false_hits / total) * 100); +} + +void false_hits_report_final(struct bench_res res[], int res_cnt) +{ + long total_hits = 0, total_drops = 0, total_false_hits = 0, total_ops = 0; + int i; + + for (i = 0; i < res_cnt; i++) { + total_hits += res[i].hits; + total_false_hits += res[i].false_hits; + total_drops += res[i].drops; + } + total_ops = total_hits + total_false_hits + total_drops; + + printf("Summary: %ld false hits of %ld total operations. ", + total_false_hits, total_ops); + printf("Percentage = %2.2f %%\n", + ((float)total_false_hits / total_ops) * 100); +} + void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) { double hits_per_sec, drops_per_sec; @@ -63,20 +92,22 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); - printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n", - hits_per_sec, hits_per_prod, drops_per_sec); + printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s, total operations %8.3lfM/s\n", + hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec); } void hits_drops_report_final(struct bench_res res[], int res_cnt) { int i; - double hits_mean = 0.0, drops_mean = 0.0; - double hits_stddev = 0.0, drops_stddev = 0.0; + double hits_mean = 0.0, drops_mean = 0.0, total_ops_mean = 0.0; + double hits_stddev = 0.0, drops_stddev = 0.0, total_ops_stddev = 0.0; + double total_ops; for (i = 0; i < res_cnt; i++) { hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt); } + total_ops_mean = hits_mean + drops_mean; if (res_cnt > 1) { for (i = 0; i < res_cnt; i++) { @@ -86,14 +117,21 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt) drops_stddev += (drops_mean - res[i].drops / 1000000.0) * (drops_mean - res[i].drops / 1000000.0) / (res_cnt - 1.0); + total_ops = res[i].hits + res[i].drops; + total_ops_stddev += (total_ops_mean - total_ops / 1000000.0) * + (total_ops_mean - total_ops / 1000000.0) / + (res_cnt - 1.0); } hits_stddev = sqrt(hits_stddev); drops_stddev = sqrt(drops_stddev); + total_ops_stddev = sqrt(total_ops_stddev); } printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ", hits_mean, hits_stddev, hits_mean / env.producer_cnt); - printf("drops %8.3lf \u00B1 %5.3lfM/s\n", + printf("drops %8.3lf \u00B1 %5.3lfM/s, ", drops_mean, drops_stddev); + printf("total operations %8.3lf \u00B1 %5.3lfM/s\n", + total_ops_mean, total_ops_stddev); } const char *argp_program_version = "benchmark"; @@ -132,9 +170,11 @@ static const struct argp_option opts[] = { }; extern struct argp bench_ringbufs_argp; +extern struct argp bench_bloom_map_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, + { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, {}, }; @@ -323,6 +363,11 @@ extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; extern const struct bench bench_pb_libbpf; extern const struct bench bench_pb_custom; +extern const struct bench bench_bloom_lookup; +extern const struct bench bench_bloom_update; +extern const struct bench bench_bloom_false_positive; +extern const struct bench bench_hashmap_without_bloom; +extern const struct bench bench_hashmap_with_bloom; static const struct bench *benchs[] = { &bench_count_global, @@ -344,6 +389,11 @@ static const struct bench *benchs[] = { &bench_rb_custom, &bench_pb_libbpf, &bench_pb_custom, + &bench_bloom_lookup, + &bench_bloom_update, + &bench_bloom_false_positive, + &bench_hashmap_without_bloom, + &bench_hashmap_with_bloom, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index c1f48a473b02..624c6b11501f 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -33,6 +33,7 @@ struct env { struct bench_res { long hits; long drops; + long false_hits; }; struct bench { @@ -56,6 +57,8 @@ extern const struct bench *bench; void setup_libbpf(); void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); void hits_drops_report_final(struct bench_res res[], int res_cnt); +void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); +void false_hits_report_final(struct bench_res res[], int res_cnt); static inline __u64 get_time_ns() { struct timespec t; diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c new file mode 100644 index 000000000000..6eeeed2913e6 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -0,0 +1,477 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <argp.h> +#include <linux/log2.h> +#include <pthread.h> +#include "bench.h" +#include "bloom_filter_bench.skel.h" +#include "bpf_util.h" + +static struct ctx { + bool use_array_map; + bool use_hashmap; + bool hashmap_use_bloom; + bool count_false_hits; + + struct bloom_filter_bench *skel; + + int bloom_fd; + int hashmap_fd; + int array_map_fd; + + pthread_mutex_t map_done_mtx; + pthread_cond_t map_done_cv; + bool map_done; + bool map_prepare_err; + + __u32 next_map_idx; +} ctx = { + .map_done_mtx = PTHREAD_MUTEX_INITIALIZER, + .map_done_cv = PTHREAD_COND_INITIALIZER, +}; + +struct stat { + __u32 stats[3]; +}; + +static struct { + __u32 nr_entries; + __u8 nr_hash_funcs; + __u8 value_size; +} args = { + .nr_entries = 1000, + .nr_hash_funcs = 3, + .value_size = 8, +}; + +enum { + ARG_NR_ENTRIES = 3000, + ARG_NR_HASH_FUNCS = 3001, + ARG_VALUE_SIZE = 3002, +}; + +static const struct argp_option opts[] = { + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "Set number of expected unique entries in the bloom filter"}, + { "nr_hash_funcs", ARG_NR_HASH_FUNCS, "NR_HASH_FUNCS", 0, + "Set number of hash functions in the bloom filter"}, + { "value_size", ARG_VALUE_SIZE, "VALUE_SIZE", 0, + "Set value size (in bytes) of bloom filter entries"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_NR_ENTRIES: + args.nr_entries = strtol(arg, NULL, 10); + if (args.nr_entries == 0) { + fprintf(stderr, "Invalid nr_entries count."); + argp_usage(state); + } + break; + case ARG_NR_HASH_FUNCS: + args.nr_hash_funcs = strtol(arg, NULL, 10); + if (args.nr_hash_funcs == 0 || args.nr_hash_funcs > 15) { + fprintf(stderr, + "The bloom filter must use 1 to 15 hash functions."); + argp_usage(state); + } + break; + case ARG_VALUE_SIZE: + args.value_size = strtol(arg, NULL, 10); + if (args.value_size < 2 || args.value_size > 256) { + fprintf(stderr, + "Invalid value size. Must be between 2 and 256 bytes"); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_bloom_map_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, + "The bloom filter benchmarks do not support multi-consumer use\n"); + exit(1); + } +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +static void *map_prepare_thread(void *arg) +{ + __u32 val_size, i; + void *val = NULL; + int err; + + val_size = args.value_size; + val = malloc(val_size); + if (!val) { + ctx.map_prepare_err = true; + goto done; + } + + while (true) { + i = __atomic_add_fetch(&ctx.next_map_idx, 1, __ATOMIC_RELAXED); + if (i > args.nr_entries) + break; + +again: + /* Populate hashmap, bloom filter map, and array map with the same + * random values + */ + err = syscall(__NR_getrandom, val, val_size, 0); + if (err != val_size) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to get random value: %d\n", -errno); + break; + } + + if (ctx.use_hashmap) { + err = bpf_map_update_elem(ctx.hashmap_fd, val, val, BPF_NOEXIST); + if (err) { + if (err != -EEXIST) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to hashmap: %d\n", + -errno); + break; + } + goto again; + } + } + + i--; + + if (ctx.use_array_map) { + err = bpf_map_update_elem(ctx.array_map_fd, &i, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to array map: %d\n", -errno); + break; + } + } + + if (ctx.use_hashmap && !ctx.hashmap_use_bloom) + continue; + + err = bpf_map_update_elem(ctx.bloom_fd, NULL, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, + "failed to add elem to bloom filter map: %d\n", -errno); + break; + } + } +done: + pthread_mutex_lock(&ctx.map_done_mtx); + ctx.map_done = true; + pthread_cond_signal(&ctx.map_done_cv); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (val) + free(val); + + return NULL; +} + +static void populate_maps(void) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + pthread_t map_thread; + int i, err, nr_rand_bytes; + + ctx.bloom_fd = bpf_map__fd(ctx.skel->maps.bloom_map); + ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap); + ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map); + + for (i = 0; i < nr_cpus; i++) { + err = pthread_create(&map_thread, NULL, map_prepare_thread, + NULL); + if (err) { + fprintf(stderr, "failed to create pthread: %d\n", -errno); + exit(1); + } + } + + pthread_mutex_lock(&ctx.map_done_mtx); + while (!ctx.map_done) + pthread_cond_wait(&ctx.map_done_cv, &ctx.map_done_mtx); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (ctx.map_prepare_err) + exit(1); + + nr_rand_bytes = syscall(__NR_getrandom, ctx.skel->bss->rand_vals, + ctx.skel->rodata->nr_rand_bytes, 0); + if (nr_rand_bytes != ctx.skel->rodata->nr_rand_bytes) { + fprintf(stderr, "failed to get random bytes\n"); + exit(1); + } +} + +static void check_args(void) +{ + if (args.value_size < 8) { + __u64 nr_unique_entries = 1ULL << (args.value_size * 8); + + if (args.nr_entries > nr_unique_entries) { + fprintf(stderr, + "Not enough unique values for the nr_entries requested\n"); + exit(1); + } + } +} + +static struct bloom_filter_bench *setup_skeleton(void) +{ + struct bloom_filter_bench *skel; + + check_args(); + + setup_libbpf(); + + skel = bloom_filter_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + skel->rodata->hashmap_use_bloom = ctx.hashmap_use_bloom; + skel->rodata->count_false_hits = ctx.count_false_hits; + + /* Resize number of entries */ + bpf_map__set_max_entries(skel->maps.hashmap, args.nr_entries); + + bpf_map__set_max_entries(skel->maps.array_map, args.nr_entries); + + bpf_map__set_max_entries(skel->maps.bloom_map, args.nr_entries); + + /* Set value size */ + bpf_map__set_value_size(skel->maps.array_map, args.value_size); + + bpf_map__set_value_size(skel->maps.bloom_map, args.value_size); + + bpf_map__set_value_size(skel->maps.hashmap, args.value_size); + + /* For the hashmap, we use the value as the key as well */ + bpf_map__set_key_size(skel->maps.hashmap, args.value_size); + + skel->bss->value_size = args.value_size; + + /* Set number of hash functions */ + bpf_map__set_map_extra(skel->maps.bloom_map, args.nr_hash_funcs); + + if (bloom_filter_bench__load(skel)) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + return skel; +} + +static void bloom_lookup_setup(void) +{ + struct bpf_link *link; + + ctx.use_array_map = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void bloom_update_setup(void) +{ + struct bpf_link *link; + + ctx.use_array_map = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_update); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void false_positive_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + ctx.hashmap_use_bloom = true; + ctx.count_false_hits = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void hashmap_with_bloom_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + ctx.hashmap_use_bloom = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void hashmap_no_bloom_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void measure(struct bench_res *res) +{ + unsigned long total_hits = 0, total_drops = 0, total_false_hits = 0; + static unsigned long last_hits, last_drops, last_false_hits; + unsigned int nr_cpus = bpf_num_possible_cpus(); + int hit_key, drop_key, false_hit_key; + int i; + + hit_key = ctx.skel->rodata->hit_key; + drop_key = ctx.skel->rodata->drop_key; + false_hit_key = ctx.skel->rodata->false_hit_key; + + if (ctx.skel->bss->error != 0) { + fprintf(stderr, "error (%d) when searching the bloom filter\n", + ctx.skel->bss->error); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) { + struct stat *s = (void *)&ctx.skel->bss->percpu_stats[i]; + + total_hits += s->stats[hit_key]; + total_drops += s->stats[drop_key]; + total_false_hits += s->stats[false_hit_key]; + } + + res->hits = total_hits - last_hits; + res->drops = total_drops - last_drops; + res->false_hits = total_false_hits - last_false_hits; + + last_hits = total_hits; + last_drops = total_drops; + last_false_hits = total_false_hits; +} + +static void *consumer(void *input) +{ + return NULL; +} + +const struct bench bench_bloom_lookup = { + .name = "bloom-lookup", + .validate = validate, + .setup = bloom_lookup_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_update = { + .name = "bloom-update", + .validate = validate, + .setup = bloom_update_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_false_positive = { + .name = "bloom-false-positive", + .validate = validate, + .setup = false_positive_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = false_hits_report_progress, + .report_final = false_hits_report_final, +}; + +const struct bench bench_hashmap_without_bloom = { + .name = "hashmap-without-bloom", + .validate = validate, + .setup = hashmap_no_bloom_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_hashmap_with_bloom = { + .name = "hashmap-with-bloom", + .validate = validate, + .setup = hashmap_with_bloom_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh new file mode 100755 index 000000000000..8ffd385ab2f4 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Bloom filter map" +for v in 2 4 8 16 40; do +for t in 1 4 8 12 16; do +for h in {1..10}; do +subtitle "value_size: $v bytes, # threads: $t, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize "Lookups, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-lookup)" + printf "\t" + summarize "Updates, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-update)" + printf "\t" + summarize_percentage "False positive rate: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-false-positive)" + done + printf "\n" +done +done +done + +header "Hashmap without bloom filter vs. hashmap with bloom filter (throughput, 8 threads)" +for v in 2 4 8 16 40; do +for h in {1..10}; do +subtitle "value_size: $v, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize_total "Hashmap without bloom filter: " \ + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-without-bloom)" + printf "\t" + summarize_total "Hashmap with bloom filter: " \ + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-with-bloom)" + done + printf "\n" +done +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh index af4aa04caba6..ada028aa9007 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -1,34 +1,8 @@ #!/bin/bash -set -eufo pipefail - -RUN_BENCH="sudo ./bench -w3 -d10 -a" - -function hits() -{ - echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" -} - -function drops() -{ - echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" -} +source ./benchs/run_common.sh -function header() -{ - local len=${#1} - - printf "\n%s\n" "$1" - for i in $(seq 1 $len); do printf '='; done - printf '\n' -} - -function summarize() -{ - bench="$1" - summary=$(echo $2 | tail -n1) - printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" -} +set -eufo pipefail header "Single-producer, parallel producer" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh new file mode 100644 index 000000000000..9a16be78b180 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +RUN_BENCH="sudo ./bench -w3 -d10 -a" + +function header() +{ + local len=${#1} + + printf "\n%s\n" "$1" + for i in $(seq 1 $len); do printf '='; done + printf '\n' +} + +function subtitle() +{ + local len=${#1} + printf "\t%s\n" "$1" +} + +function hits() +{ + echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function drops() +{ + echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function percentage() +{ + echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" +} + +function total() +{ + echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function summarize() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" +} + +function summarize_percentage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" +} + +function summarize_total() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(total $summary)" +} diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c new file mode 100644 index 000000000000..9aa3fbed918b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <sys/syscall.h> +#include <test_progs.h> +#include "bloom_filter_map.skel.h" + +static void test_fail_cases(void) +{ + struct bpf_create_map_attr xattr = { + .name = "bloom_filter_map", + .map_type = BPF_MAP_TYPE_BLOOM_FILTER, + .max_entries = 100, + .value_size = 11, + }; + __u32 value; + int fd, err; + + /* Invalid key size */ + xattr.key_size = 4; + fd = bpf_create_map_xattr(&xattr); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid key size")) + close(fd); + xattr.key_size = 0; + + /* Invalid value size */ + xattr.value_size = 0; + fd = bpf_create_map_xattr(&xattr); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid value size 0")) + close(fd); + xattr.value_size = 11; + + /* Invalid max entries size */ + xattr.max_entries = 0; + fd = bpf_create_map_xattr(&xattr); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid max entries size")) + close(fd); + xattr.max_entries = 100; + + /* Bloom filter maps do not support BPF_F_NO_PREALLOC */ + xattr.map_flags = BPF_F_NO_PREALLOC; + fd = bpf_create_map_xattr(&xattr); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid flags")) + close(fd); + xattr.map_flags = 0; + + fd = bpf_create_map_xattr(&xattr); + if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter")) + return; + + /* Test invalid flags */ + err = bpf_map_update_elem(fd, NULL, &value, -1); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, BPF_EXIST); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, BPF_F_LOCK); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, BPF_NOEXIST); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, 10000); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + close(fd); +} + +static void check_bloom(struct bloom_filter_map *skel) +{ + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.check_bloom); + if (!ASSERT_OK_PTR(link, "link")) + return; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->error, 0, "error"); + + bpf_link__destroy(link); +} + +static void test_inner_map(struct bloom_filter_map *skel, const __u32 *rand_vals, + __u32 nr_rand_vals) +{ + int outer_map_fd, inner_map_fd, err, i, key = 0; + struct bpf_create_map_attr xattr = { + .name = "bloom_filter_inner_map", + .map_type = BPF_MAP_TYPE_BLOOM_FILTER, + .value_size = sizeof(__u32), + .max_entries = nr_rand_vals, + }; + struct bpf_link *link; + + /* Create a bloom filter map that will be used as the inner map */ + inner_map_fd = bpf_create_map_xattr(&xattr); + if (!ASSERT_GE(inner_map_fd, 0, "bpf_create_map bloom filter inner map")) + return; + + for (i = 0; i < nr_rand_vals; i++) { + err = bpf_map_update_elem(inner_map_fd, NULL, rand_vals + i, BPF_ANY); + if (!ASSERT_OK(err, "Add random value to inner_map_fd")) + goto done; + } + + /* Add the bloom filter map to the outer map */ + outer_map_fd = bpf_map__fd(skel->maps.outer_map); + err = bpf_map_update_elem(outer_map_fd, &key, &inner_map_fd, BPF_ANY); + if (!ASSERT_OK(err, "Add bloom filter map to outer map")) + goto done; + + /* Attach the bloom_filter_inner_map prog */ + link = bpf_program__attach(skel->progs.inner_map); + if (!ASSERT_OK_PTR(link, "link")) + goto delete_inner_map; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->error, 0, "error"); + + bpf_link__destroy(link); + +delete_inner_map: + /* Ensure the inner bloom filter map can be deleted */ + err = bpf_map_delete_elem(outer_map_fd, &key); + ASSERT_OK(err, "Delete inner bloom filter map"); + +done: + close(inner_map_fd); +} + +static int setup_progs(struct bloom_filter_map **out_skel, __u32 **out_rand_vals, + __u32 *out_nr_rand_vals) +{ + struct bloom_filter_map *skel; + int random_data_fd, bloom_fd; + __u32 *rand_vals = NULL; + __u32 map_size, val; + int err, i; + + /* Set up a bloom filter map skeleton */ + skel = bloom_filter_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bloom_filter_map__open_and_load")) + return -EINVAL; + + /* Set up rand_vals */ + map_size = bpf_map__max_entries(skel->maps.map_random_data); + rand_vals = malloc(sizeof(*rand_vals) * map_size); + if (!rand_vals) { + err = -ENOMEM; + goto error; + } + + /* Generate random values and populate both skeletons */ + random_data_fd = bpf_map__fd(skel->maps.map_random_data); + bloom_fd = bpf_map__fd(skel->maps.map_bloom); + for (i = 0; i < map_size; i++) { + val = rand(); + + err = bpf_map_update_elem(random_data_fd, &i, &val, BPF_ANY); + if (!ASSERT_OK(err, "Add random value to map_random_data")) + goto error; + + err = bpf_map_update_elem(bloom_fd, NULL, &val, BPF_ANY); + if (!ASSERT_OK(err, "Add random value to map_bloom")) + goto error; + + rand_vals[i] = val; + } + + *out_skel = skel; + *out_rand_vals = rand_vals; + *out_nr_rand_vals = map_size; + + return 0; + +error: + bloom_filter_map__destroy(skel); + if (rand_vals) + free(rand_vals); + return err; +} + +void test_bloom_filter_map(void) +{ + __u32 *rand_vals, nr_rand_vals; + struct bloom_filter_map *skel; + int err; + + test_fail_cases(); + + err = setup_progs(&skel, &rand_vals, &nr_rand_vals); + if (err) + return; + + test_inner_map(skel, rand_vals, nr_rand_vals); + free(rand_vals); + + check_bloom(skel); + + bloom_filter_map__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c new file mode 100644 index 000000000000..d9a88dd1ea65 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <errno.h> +#include <linux/bpf.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_map; + +__u8 rand_vals[2500000]; +const __u32 nr_rand_bytes = 2500000; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(__u32)); + /* max entries and value_size will be set programmatically. + * They are configurable from the userspace bench program. + */ +} array_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_BLOOM_FILTER); + /* max entries, value_size, and # of hash functions will be set + * programmatically. They are configurable from the userspace + * bench program. + */ + __uint(map_extra, 3); +} bloom_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + /* max entries, key_size, and value_size, will be set + * programmatically. They are configurable from the userspace + * bench program. + */ +} hashmap SEC(".maps"); + +struct callback_ctx { + struct bpf_map *map; + bool update; +}; + +/* Tracks the number of hits, drops, and false hits */ +struct { + __u32 stats[3]; +} __attribute__((__aligned__(256))) percpu_stats[256]; + +const __u32 hit_key = 0; +const __u32 drop_key = 1; +const __u32 false_hit_key = 2; + +__u8 value_size; + +const volatile bool hashmap_use_bloom; +const volatile bool count_false_hits; + +int error = 0; + +static __always_inline void log_result(__u32 key) +{ + __u32 cpu = bpf_get_smp_processor_id(); + + percpu_stats[cpu & 255].stats[key]++; +} + +static __u64 +bloom_callback(struct bpf_map *map, __u32 *key, void *val, + struct callback_ctx *data) +{ + int err; + + if (data->update) + err = bpf_map_push_elem(data->map, val, 0); + else + err = bpf_map_peek_elem(data->map, val); + + if (err) { + error |= 1; + return 1; /* stop the iteration */ + } + + log_result(hit_key); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bloom_lookup(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&bloom_map; + data.update = false; + + bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bloom_update(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&bloom_map; + data.update = true; + + bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bloom_hashmap_lookup(void *ctx) +{ + __u64 *result; + int i, err; + + __u32 index = bpf_get_prandom_u32(); + __u32 bitmask = (1ULL << 21) - 1; + + for (i = 0; i < 1024; i++, index += value_size) { + index = index & bitmask; + + if (hashmap_use_bloom) { + err = bpf_map_peek_elem(&bloom_map, + rand_vals + index); + if (err) { + if (err != -ENOENT) { + error |= 2; + return 0; + } + log_result(hit_key); + continue; + } + } + + result = bpf_map_lookup_elem(&hashmap, + rand_vals + index); + if (result) { + log_result(hit_key); + } else { + if (hashmap_use_bloom && count_false_hits) + log_result(false_hit_key); + log_result(drop_key); + } + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_map.c b/tools/testing/selftests/bpf/progs/bloom_filter_map.c new file mode 100644 index 000000000000..1316f3db79d9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bloom_filter_map.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_map; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1000); +} map_random_data SEC(".maps"); + +struct map_bloom_type { + __uint(type, BPF_MAP_TYPE_BLOOM_FILTER); + __type(value, __u32); + __uint(max_entries, 10000); + __uint(map_extra, 5); +} map_bloom SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct map_bloom_type); +} outer_map SEC(".maps"); + +struct callback_ctx { + struct bpf_map *map; +}; + +int error = 0; + +static __u64 +check_elem(struct bpf_map *map, __u32 *key, __u32 *val, + struct callback_ctx *data) +{ + int err; + + err = bpf_map_peek_elem(data->map, val); + if (err) { + error |= 1; + return 1; /* stop the iteration */ + } + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int inner_map(void *ctx) +{ + struct bpf_map *inner_map; + struct callback_ctx data; + int key = 0; + + inner_map = bpf_map_lookup_elem(&outer_map, &key); + if (!inner_map) { + error |= 2; + return 0; + } + + data.map = inner_map; + bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int check_bloom(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&map_bloom; + bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0); + + return 0; +} |