summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/proc/proc_net.c19
-rw-r--r--include/linux/bpf.h36
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/proc_fs.h3
-rw-r--r--include/uapi/linux/bpf.h47
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/bpf_iter.c530
-rw-r--r--kernel/bpf/btf.c42
-rw-r--r--kernel/bpf/inode.c5
-rw-r--r--kernel/bpf/map_iter.c97
-rw-r--r--kernel/bpf/syscall.c59
-rw-r--r--kernel/bpf/task_iter.c333
-rw-r--r--kernel/bpf/verifier.c42
-rw-r--r--kernel/trace/bpf_trace.c214
-rw-r--r--net/ipv6/ip6_fib.c65
-rw-r--r--net/ipv6/route.c37
-rw-r--r--net/netlink/af_netlink.c87
-rwxr-xr-xscripts/bpf_helpers_doc.py2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-iter.rst83
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool13
-rw-r--r--tools/bpf/bpftool/iter.c88
-rw-r--r--tools/bpf/bpftool/link.c1
-rw-r--r--tools/bpf/bpftool/main.c3
-rw-r--r--tools/bpf/bpftool/main.h1
-rw-r--r--tools/include/uapi/linux/bpf.h47
-rw-r--r--tools/lib/bpf/bpf.c10
-rw-r--r--tools/lib/bpf/bpf.h2
-rw-r--r--tools/lib/bpf/bpf_helpers.h14
-rw-r--r--tools/lib/bpf/bpf_tracing.h16
-rw-r--r--tools/lib/bpf/libbpf.c52
-rw-r--r--tools/lib/bpf/libbpf.h9
-rw-r--r--tools/lib/bpf/libbpf.map2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c409
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c28
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c62
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_netlink.c66
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_file.c26
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c52
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h22
43 files changed, 2664 insertions, 14 deletions
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 4888c5224442..dba63b2429f0 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -98,6 +98,25 @@ static const struct proc_ops proc_net_seq_ops = {
.proc_release = seq_release_net,
};
+int bpf_iter_init_seq_net(void *priv_data)
+{
+#ifdef CONFIG_NET_NS
+ struct seq_net_private *p = priv_data;
+
+ p->net = get_net(current->nsproxy->net_ns);
+#endif
+ return 0;
+}
+
+void bpf_iter_fini_seq_net(void *priv_data)
+{
+#ifdef CONFIG_NET_NS
+ struct seq_net_private *p = priv_data;
+
+ put_net(p->net);
+#endif
+}
+
struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
struct proc_dir_entry *parent, const struct seq_operations *ops,
unsigned int state_size, void *data)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1262ec460ab3..cf4b6e44f2bc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -31,6 +31,7 @@ struct seq_file;
struct btf;
struct btf_type;
struct exception_table_entry;
+struct seq_operations;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -319,6 +320,7 @@ enum bpf_reg_type {
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
PTR_TO_BTF_ID, /* reg points to kernel struct */
+ PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */
};
/* The information passed from prog-specific *_is_valid_access
@@ -657,6 +659,7 @@ struct bpf_prog_aux {
bool offload_requested;
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
bool func_proto_unreliable;
+ bool btf_id_or_null_non0_off;
enum bpf_tramp_prog_type trampoline_prog_type;
struct bpf_trampoline *trampoline;
struct hlist_node tramp_hlist;
@@ -1021,6 +1024,7 @@ static inline void bpf_enable_instrumentation(void)
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
+extern const struct file_operations bpf_iter_fops;
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
extern const struct bpf_prog_ops _name ## _prog_ops; \
@@ -1080,6 +1084,7 @@ int generic_map_update_batch(struct bpf_map *map,
int generic_map_delete_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
+struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
extern int sysctl_unprivileged_bpf_disabled;
@@ -1126,6 +1131,37 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
int bpf_obj_get_user(const char __user *pathname, int flags);
+#define BPF_ITER_FUNC_PREFIX "__bpf_iter__"
+#define DEFINE_BPF_ITER_FUNC(target, args...) \
+ extern int __bpf_iter__ ## target(args); \
+ int __init __bpf_iter__ ## target(args) { return 0; }
+
+typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
+typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
+
+struct bpf_iter_reg {
+ const char *target;
+ const struct seq_operations *seq_ops;
+ bpf_iter_init_seq_priv_t init_seq_private;
+ bpf_iter_fini_seq_priv_t fini_seq_private;
+ u32 seq_priv_size;
+};
+
+struct bpf_iter_meta {
+ __bpf_md_ptr(struct seq_file *, seq);
+ u64 session_id;
+ u64 seq_num;
+};
+
+int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
+void bpf_iter_unreg_target(const char *target);
+bool bpf_iter_prog_supported(struct bpf_prog *prog);
+int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int bpf_iter_new_fd(struct bpf_link *link);
+bool bpf_link_is_iter(struct bpf_link *link);
+struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop);
+int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
+
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 8345cdf553b8..29d22752fc87 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -124,3 +124,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
#ifdef CONFIG_CGROUP_BPF
BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
#endif
+BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 45c05fd9c99d..03953c59807d 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -105,6 +105,9 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
void *data);
extern struct pid *tgid_pidfd_to_pid(const struct file *file);
+extern int bpf_iter_init_seq_net(void *priv_data);
+extern void bpf_iter_fini_seq_net(void *priv_data);
+
#ifdef CONFIG_PROC_PID_ARCH_STATUS
/*
* The architecture which selects CONFIG_PROC_PID_ARCH_STATUS must
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6e5e7caa3739..9d1932e23cec 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -116,6 +116,7 @@ enum bpf_cmd {
BPF_LINK_GET_FD_BY_ID,
BPF_LINK_GET_NEXT_ID,
BPF_ENABLE_STATS,
+ BPF_ITER_CREATE,
};
enum bpf_map_type {
@@ -218,6 +219,7 @@ enum bpf_attach_type {
BPF_TRACE_FEXIT,
BPF_MODIFY_RETURN,
BPF_LSM_MAC,
+ BPF_TRACE_ITER,
__MAX_BPF_ATTACH_TYPE
};
@@ -228,6 +230,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
BPF_LINK_TYPE_TRACING = 2,
BPF_LINK_TYPE_CGROUP = 3,
+ BPF_LINK_TYPE_ITER = 4,
MAX_BPF_LINK_TYPE,
};
@@ -612,6 +615,11 @@ union bpf_attr {
__u32 type;
} enable_stats;
+ struct { /* struct used by BPF_ITER_CREATE command */
+ __u32 link_fd;
+ __u32 flags;
+ } iter_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -3069,6 +3077,41 @@ union bpf_attr {
* See: clock_gettime(CLOCK_BOOTTIME)
* Return
* Current *ktime*.
+ *
+ * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ * Description
+ * seq_printf uses seq_file seq_printf() to print out the format string.
+ * The *m* represents the seq_file. The *fmt* and *fmt_size* are for
+ * the format string itself. The *data* and *data_len* are format string
+ * arguments. The *data* are a u64 array and corresponding format string
+ * values are stored in the array. For strings and pointers where pointees
+ * are accessed, only the pointer values are stored in the *data* array.
+ * The *data_len* is the *data* size in term of bytes.
+ *
+ * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
+ * Reading kernel memory may fail due to either invalid address or
+ * valid address but requiring a major memory fault. If reading kernel memory
+ * fails, the string for **%s** will be an empty string, and the ip
+ * address for **%p{i,I}{4,6}** will be 0. Not returning error to
+ * bpf program is consistent with what bpf_trace_printk() does for now.
+ * Return
+ * 0 on success, or a negative errno in case of failure.
+ *
+ * * **-EBUSY** Percpu memory copy buffer is busy, can try again
+ * by returning 1 from bpf program.
+ * * **-EINVAL** Invalid arguments, or invalid/unsupported formats.
+ * * **-E2BIG** Too many format specifiers.
+ * * **-EOVERFLOW** Overflow happens, the same object will be tried again.
+ *
+ * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ * Description
+ * seq_write uses seq_file seq_write() to write the data.
+ * The *m* represents the seq_file. The *data* and *len* represent the
+ * data to write in bytes.
+ * Return
+ * 0 on success, or a negative errno in case of failure.
+ *
+ * * **-EOVERFLOW** Overflow happens, the same object will be tried again.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3196,7 +3239,9 @@ union bpf_attr {
FN(get_netns_cookie), \
FN(get_current_ancestor_cgroup_id), \
FN(sk_assign), \
- FN(ktime_get_boot_ns),
+ FN(ktime_get_boot_ns), \
+ FN(seq_printf), \
+ FN(seq_write),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index f2d7be596966..37b2d8620153 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -2,7 +2,7 @@
obj-y := core.o
CFLAGS_core.o += $(call cc-disable-warning, override-init)
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
new file mode 100644
index 000000000000..30efd15cd4a0
--- /dev/null
+++ b/kernel/bpf/bpf_iter.c
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+struct bpf_iter_target_info {
+ struct list_head list;
+ const char *target;
+ const struct seq_operations *seq_ops;
+ bpf_iter_init_seq_priv_t init_seq_private;
+ bpf_iter_fini_seq_priv_t fini_seq_private;
+ u32 seq_priv_size;
+ u32 btf_id; /* cached value */
+};
+
+struct bpf_iter_link {
+ struct bpf_link link;
+ struct bpf_iter_target_info *tinfo;
+};
+
+struct bpf_iter_priv_data {
+ struct bpf_iter_target_info *tinfo;
+ struct bpf_prog *prog;
+ u64 session_id;
+ u64 seq_num;
+ bool done_stop;
+ u8 target_private[] __aligned(8);
+};
+
+static struct list_head targets = LIST_HEAD_INIT(targets);
+static DEFINE_MUTEX(targets_mutex);
+
+/* protect bpf_iter_link changes */
+static DEFINE_MUTEX(link_mutex);
+
+/* incremented on every opened seq_file */
+static atomic64_t session_id;
+
+static int prepare_seq_file(struct file *file, struct bpf_iter_link *link);
+
+static void bpf_iter_inc_seq_num(struct seq_file *seq)
+{
+ struct bpf_iter_priv_data *iter_priv;
+
+ iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+ target_private);
+ iter_priv->seq_num++;
+}
+
+static void bpf_iter_dec_seq_num(struct seq_file *seq)
+{
+ struct bpf_iter_priv_data *iter_priv;
+
+ iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+ target_private);
+ iter_priv->seq_num--;
+}
+
+static void bpf_iter_done_stop(struct seq_file *seq)
+{
+ struct bpf_iter_priv_data *iter_priv;
+
+ iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+ target_private);
+ iter_priv->done_stop = true;
+}
+
+/* bpf_seq_read, a customized and simpler version for bpf iterator.
+ * no_llseek is assumed for this file.
+ * The following are differences from seq_read():
+ * . fixed buffer size (PAGE_SIZE)
+ * . assuming no_llseek
+ * . stop() may call bpf program, handling potential overflow there
+ */
+static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
+ loff_t *ppos)
+{
+ struct seq_file *seq = file->private_data;
+ size_t n, offs, copied = 0;
+ int err = 0;
+ void *p;
+
+ mutex_lock(&seq->lock);
+
+ if (!seq->buf) {
+ seq->size = PAGE_SIZE;
+ seq->buf = kmalloc(seq->size, GFP_KERNEL);
+ if (!seq->buf) {
+ err = -ENOMEM;
+ goto done;
+ }
+ }
+
+ if (seq->count) {
+ n = min(seq->count, size);
+ err = copy_to_user(buf, seq->buf + seq->from, n);
+ if (err) {
+ err = -EFAULT;
+ goto done;
+ }
+ seq->count -= n;
+ seq->from += n;
+ copied = n;
+ goto done;
+ }
+
+ seq->from = 0;
+ p = seq->op->start(seq, &seq->index);
+ if (!p)
+ goto stop;
+ if (IS_ERR(p)) {
+ err = PTR_ERR(p);
+ seq->op->stop(seq, p);
+ seq->count = 0;
+ goto done;
+ }
+
+ err = seq->op->show(seq, p);
+ if (err > 0) {
+ /* object is skipped, decrease seq_num, so next
+ * valid object can reuse the same seq_num.
+ */
+ bpf_iter_dec_seq_num(seq);
+ seq->count = 0;
+ } else if (err < 0 || seq_has_overflowed(seq)) {
+ if (!err)
+ err = -E2BIG;
+ seq->op->stop(seq, p);
+ seq->count = 0;
+ goto done;
+ }
+
+ while (1) {
+ loff_t pos = seq->index;
+
+ offs = seq->count;
+ p = seq->op->next(seq, p, &seq->index);
+ if (pos == seq->index) {
+ pr_info_ratelimited("buggy seq_file .next function %ps "
+ "did not updated position index\n",
+ seq->op->next);
+ seq->index++;
+ }
+
+ if (IS_ERR_OR_NULL(p))
+ break;
+
+ /* got a valid next object, increase seq_num */
+ bpf_iter_inc_seq_num(seq);
+
+ if (seq->count >= size)
+ break;
+
+ err = seq->op->show(seq, p);
+ if (err > 0) {
+ bpf_iter_dec_seq_num(seq);
+ seq->count = offs;
+ } else if (err < 0 || seq_has_overflowed(seq)) {
+ seq->count = offs;
+ if (offs == 0) {
+ if (!err)
+ err = -E2BIG;
+ seq->op->stop(seq, p);
+ goto done;
+ }
+ break;
+ }
+ }
+stop:
+ offs = seq->count;
+ /* bpf program called if !p */
+ seq->op->stop(seq, p);
+ if (!p) {
+ if (!seq_has_overflowed(seq)) {
+ bpf_iter_done_stop(seq);
+ } else {
+ seq->count = offs;
+ if (offs == 0) {
+ err = -E2BIG;
+ goto done;
+ }
+ }
+ }
+
+ n = min(seq->count, size);
+ err = copy_to_user(buf, seq->buf, n);
+ if (err) {
+ err = -EFAULT;
+ goto done;
+ }
+ copied = n;
+ seq->count -= n;
+ seq->from = n;
+done:
+ if (!copied)
+ copied = err;
+ else
+ *ppos += copied;
+ mutex_unlock(&seq->lock);
+ return copied;
+}
+
+static int iter_open(struct inode *inode, struct file *file)
+{
+ struct bpf_iter_link *link = inode->i_private;
+
+ return prepare_seq_file(file, link);
+}
+
+static int iter_release(struct inode *inode, struct file *file)
+{
+ struct bpf_iter_priv_data *iter_priv;
+ struct seq_file *seq;
+
+ seq = file->private_data;
+ if (!seq)
+ return 0;
+
+ iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+ target_private);
+
+ if (iter_priv->tinfo->fini_seq_private)
+ iter_priv->tinfo->fini_seq_private(seq->private);
+
+ bpf_prog_put(iter_priv->prog);
+ seq->private = iter_priv;
+
+ return seq_release_private(inode, file);
+}
+
+const struct file_operations bpf_iter_fops = {
+ .open = iter_open,
+ .llseek = no_llseek,
+ .read = bpf_seq_read,
+ .release = iter_release,
+};
+
+int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
+{
+ struct bpf_iter_target_info *tinfo;
+
+ tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+ if (!tinfo)
+ return -ENOMEM;
+
+ tinfo->target = reg_info->target;
+ tinfo->seq_ops = reg_info->seq_ops;
+ tinfo->init_seq_private = reg_info->init_seq_private;
+ tinfo->fini_seq_private = reg_info->fini_seq_private;
+ tinfo->seq_priv_size = reg_info->seq_priv_size;
+ INIT_LIST_HEAD(&tinfo->list);
+
+ mutex_lock(&targets_mutex);
+ list_add(&tinfo->list, &targets);
+ mutex_unlock(&targets_mutex);
+
+ return 0;
+}
+
+void bpf_iter_unreg_target(const char *target)
+{
+ struct bpf_iter_target_info *tinfo;
+ bool found = false;
+
+ mutex_lock(&targets_mutex);
+ list_for_each_entry(tinfo, &targets, list) {
+ if (!strcmp(target, tinfo->target)) {
+ list_del(&tinfo->list);
+ kfree(tinfo);
+ found = true;
+ break;
+ }
+ }
+ mutex_unlock(&targets_mutex);
+
+ WARN_ON(found == false);
+}
+
+static void cache_btf_id(struct bpf_iter_target_info *tinfo,
+ struct bpf_prog *prog)
+{
+ tinfo->btf_id = prog->aux->attach_btf_id;
+}
+
+bool bpf_iter_prog_supported(struct bpf_prog *prog)
+{
+ const char *attach_fname = prog->aux->attach_func_name;
+ u32 prog_btf_id = prog->aux->attach_btf_id;
+ const char *prefix = BPF_ITER_FUNC_PREFIX;
+ struct bpf_iter_target_info *tinfo;
+ int prefix_len = strlen(prefix);
+ bool supported = false;
+
+ if (strncmp(attach_fname, prefix, prefix_len))
+ return false;
+
+ mutex_lock(&targets_mutex);
+ list_for_each_entry(tinfo, &targets, list) {
+ if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) {
+ supported = true;
+ break;
+ }
+ if (!strcmp(attach_fname + prefix_len, tinfo->target)) {
+ cache_btf_id(tinfo, prog);
+ supported = true;
+ break;
+ }
+ }
+ mutex_unlock(&targets_mutex);
+
+ return supported;
+}
+
+static void bpf_iter_link_release(struct bpf_link *link)
+{
+}
+
+static void bpf_iter_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_iter_link *iter_link =
+ container_of(link, struct bpf_iter_link, link);
+
+ kfree(iter_link);
+}
+
+static int bpf_iter_link_replace(struct bpf_link *link,
+ struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog)
+{
+ int ret = 0;
+
+ mutex_lock(&link_mutex);
+ if (old_prog && link->prog != old_prog) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ if (link->prog->type != new_prog->type ||
+ link->prog->expected_attach_type != new_prog->expected_attach_type ||
+ link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ old_prog = xchg(&link->prog, new_prog);
+ bpf_prog_put(old_prog);
+
+out_unlock:
+ mutex_unlock(&link_mutex);
+ return ret;
+}
+
+static const struct bpf_link_ops bpf_iter_link_lops = {
+ .release = bpf_iter_link_release,
+ .dealloc = bpf_iter_link_dealloc,
+ .update_prog = bpf_iter_link_replace,
+};
+
+bool bpf_link_is_iter(struct bpf_link *link)
+{
+ return link->ops == &bpf_iter_link_lops;
+}
+
+int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct bpf_link_primer link_primer;
+ struct bpf_iter_target_info *tinfo;
+ struct bpf_iter_link *link;
+ bool existed = false;
+ u32 prog_btf_id;
+ int err;
+
+ if (attr->link_create.target_fd || attr->link_create.flags)
+ return -EINVAL;
+
+ prog_btf_id = prog->aux->attach_btf_id;
+ mutex_lock(&targets_mutex);
+ list_for_each_entry(tinfo, &targets, list) {
+ if (tinfo->btf_id == prog_btf_id) {
+ existed = true;
+ break;
+ }
+ }
+ mutex_unlock(&targets_mutex);
+ if (!existed)
+ return -ENOENT;
+
+ link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
+ if (!link)
+ return -ENOMEM;
+
+ bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog);
+ link->tinfo = tinfo;
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err) {
+ kfree(link);
+ return err;
+ }
+
+ return bpf_link_settle(&link_primer);
+}
+
+static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
+ struct bpf_iter_target_info *tinfo,
+ struct bpf_prog *prog)
+{
+ priv_data->tinfo = tinfo;
+ priv_data->prog = prog;
+ priv_data->session_id = atomic64_inc_return(&session_id);
+ priv_data->seq_num = 0;
+ priv_data->done_stop = false;
+}
+
+static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
+{
+ struct bpf_iter_priv_data *priv_data;
+ struct bpf_iter_target_info *tinfo;
+ struct bpf_prog *prog;
+ u32 total_priv_dsize;
+ struct seq_file *seq;
+ int err = 0;
+
+ mutex_lock(&link_mutex);
+ prog = link->link.prog;
+ bpf_prog_inc(prog);
+ mutex_unlock(&link_mutex);
+
+ tinfo = link->tinfo;
+ total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) +
+ tinfo->seq_priv_size;
+ priv_data = __seq_open_private(file, tinfo->seq_ops, total_priv_dsize);
+ if (!priv_data) {
+ err = -ENOMEM;
+ goto release_prog;
+ }
+
+ if (tinfo->init_seq_private) {
+ err = tinfo->init_seq_private(priv_data->target_private);
+ if (err)
+ goto release_seq_file;
+ }
+
+ init_seq_meta(priv_data, tinfo, prog);
+ seq = file->private_data;
+ seq->private = priv_data->target_private;
+
+ return 0;
+
+release_seq_file:
+ seq_release_private(file->f_inode, file);
+ file->private_data = NULL;
+release_prog:
+ bpf_prog_put(prog);
+ return err;
+}
+
+int bpf_iter_new_fd(struct bpf_link *link)
+{
+ struct file *file;
+ unsigned int flags;
+ int err, fd;
+
+ if (link->ops != &bpf_iter_link_lops)
+ return -EINVAL;
+
+ flags = O_RDONLY | O_CLOEXEC;
+ fd = get_unused_fd_flags(flags);
+ if (fd < 0)
+ return fd;
+
+ file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ goto free_fd;
+ }
+
+ err = prepare_seq_file(file,
+ container_of(link, struct bpf_iter_link, link));
+ if (err)
+ goto free_file;
+
+ fd_install(fd, file);
+ return fd;
+
+free_file:
+ fput(file);
+free_fd:
+ put_unused_fd(fd);
+ return err;
+}
+
+struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop)
+{
+ struct bpf_iter_priv_data *iter_priv;
+ struct seq_file *seq;
+ void *seq_priv;
+
+ seq = meta->seq;
+ if (seq->file->f_op != &bpf_iter_fops)
+ return NULL;
+
+ seq_priv = seq->private;
+ iter_priv = container_of(seq_priv, struct bpf_iter_priv_data,
+ target_private);
+
+ if (in_stop && iter_priv->done_stop)
+ return NULL;
+
+ meta->session_id = iter_priv->session_id;
+ meta->seq_num = iter_priv->seq_num;
+
+ return iter_priv->prog;
+}
+
+int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
+{
+ int ret;
+
+ rcu_read_lock();
+ migrate_disable();
+ ret = BPF_PROG_RUN(prog, ctx);
+ migrate_enable();
+ rcu_read_unlock();
+
+ return ret == 0 ? 0 : -EAGAIN;
+}
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index a2cfba89a8e1..dcd233139294 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3790,7 +3790,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
return true;
/* this is a pointer to another type */
- info->reg_type = PTR_TO_BTF_ID;
+ if (off != 0 && prog->aux->btf_id_or_null_non0_off)
+ info->reg_type = PTR_TO_BTF_ID_OR_NULL;
+ else
+ info->reg_type = PTR_TO_BTF_ID;
if (tgt_prog) {
ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg);
@@ -3830,6 +3833,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
const struct btf_type *mtype, *elem_type = NULL;
const struct btf_member *member;
const char *tname, *mname;
+ u32 vlen;
again:
tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
@@ -3838,7 +3842,43 @@ again:
return -EINVAL;
}
+ vlen = btf_type_vlen(t);
if (off + size > t->size) {
+ /* If the last element is a variable size array, we may
+ * need to relax the rule.
+ */
+ struct btf_array *array_elem;
+
+ if (vlen == 0)
+ goto error;
+
+ member = btf_type_member(t) + vlen - 1;
+ mtype = btf_type_skip_modifiers(btf_vmlinux, member->type,
+ NULL);
+ if (!btf_type_is_array(mtype))
+ goto error;
+
+ array_elem = (struct btf_array *)(mtype + 1);
+ if (array_elem->nelems != 0)
+ goto error;
+
+ moff = btf_member_bit_offset(t, member) / 8;
+ if (off < moff)
+ goto error;
+
+ /* Only allow structure for now, can be relaxed for
+ * other types later.
+ */
+ elem_type = btf_type_skip_modifiers(btf_vmlinux,
+ array_elem->type, NULL);
+ if (!btf_type_is_struct(elem_type))
+ goto error;
+
+ off = (off - moff) % elem_type->size;
+ return btf_struct_access(log, elem_type, off, size, atype,
+ next_btf_id);
+
+error:
bpf_log(log, "access beyond struct %s at off %u size %u\n",
tname, off, size);
return -EACCES;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 95087d9f4ed3..fb878ba3f22f 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -358,8 +358,11 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg)
{
+ struct bpf_link *link = arg;
+
return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops,
- &bpffs_obj_fops);
+ bpf_link_is_iter(link) ?
+ &bpf_iter_fops : &bpffs_obj_fops);
}
static struct dentry *
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
new file mode 100644
index 000000000000..8162e0c00b9f
--- /dev/null
+++ b/kernel/bpf/map_iter.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <linux/fs.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+
+struct bpf_iter_seq_map_info {
+ u32 mid;
+};
+
+static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_map_info *info = seq->private;
+ struct bpf_map *map;
+
+ map = bpf_map_get_curr_or_next(&info->mid);
+ if (!map)
+ return NULL;
+
+ ++*pos;
+ return map;
+}
+
+static void *bpf_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_map_info *info = seq->private;
+ struct bpf_map *map;
+
+ ++*pos;
+ ++info->mid;
+ bpf_map_put((struct bpf_map *)v);
+ map = bpf_map_get_curr_or_next(&info->mid);
+ if (!map)
+ return NULL;
+
+ return map;
+}
+
+struct bpf_iter__bpf_map {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct bpf_map *, map);
+};
+
+DEFINE_BPF_ITER_FUNC(bpf_map, struct bpf_iter_meta *meta, struct bpf_map *map)
+
+static int __bpf_map_seq_show(struct seq_file *seq, void *v, bool in_stop)
+{
+ struct bpf_iter__bpf_map ctx;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ int ret = 0;
+
+ ctx.meta = &meta;
+ ctx.map = v;
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, in_stop);
+ if (prog)
+ ret = bpf_iter_run_prog(prog, &ctx);
+
+ return ret;
+}
+
+static int bpf_map_seq_show(struct seq_file *seq, void *v)
+{
+ return __bpf_map_seq_show(seq, v, false);
+}
+
+static void bpf_map_seq_stop(struct seq_file *seq, void *v)
+{
+ if (!v)
+ (void)__bpf_map_seq_show(seq, v, true);
+ else
+ bpf_map_put((struct bpf_map *)v);
+}
+
+static const struct seq_operations bpf_map_seq_ops = {
+ .start = bpf_map_seq_start,
+ .next = bpf_map_seq_next,
+ .stop = bpf_map_seq_stop,
+ .show = bpf_map_seq_show,
+};
+
+static int __init bpf_map_iter_init(void)
+{
+ struct bpf_iter_reg reg_info = {
+ .target = "bpf_map",
+ .seq_ops = &bpf_map_seq_ops,
+ .init_seq_private = NULL,
+ .fini_seq_private = NULL,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_map_info),
+ };
+
+ return bpf_iter_reg_target(&reg_info);
+}
+
+late_initcall(bpf_map_iter_init);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bb1ab7da6103..de2a75500233 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2729,6 +2729,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_CGROUP_GETSOCKOPT:
case BPF_CGROUP_SETSOCKOPT:
return BPF_PROG_TYPE_CGROUP_SOCKOPT;
+ case BPF_TRACE_ITER:
+ return BPF_PROG_TYPE_TRACING;
default:
return BPF_PROG_TYPE_UNSPEC;
}
@@ -2932,6 +2934,25 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr,
return err;
}
+struct bpf_map *bpf_map_get_curr_or_next(u32 *id)
+{
+ struct bpf_map *map;
+
+ spin_lock_bh(&map_idr_lock);
+again:
+ map = idr_get_next(&map_idr, id);
+ if (map) {
+ map = __bpf_map_inc_not_zero(map, false);
+ if (IS_ERR(map)) {
+ (*id)++;
+ goto again;
+ }
+ }
+ spin_unlock_bh(&map_idr_lock);
+
+ return map;
+}
+
#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
struct bpf_prog *bpf_prog_by_id(u32 id)
@@ -3729,6 +3750,15 @@ err_put:
return err;
}
+static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ if (attr->link_create.attach_type == BPF_TRACE_ITER &&
+ prog->expected_attach_type == BPF_TRACE_ITER)
+ return bpf_iter_link_attach(attr, prog);
+
+ return -EINVAL;
+}
+
#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
static int link_create(union bpf_attr *attr)
{
@@ -3765,6 +3795,9 @@ static int link_create(union bpf_attr *attr)
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
ret = cgroup_bpf_link_attach(attr, prog);
break;
+ case BPF_PROG_TYPE_TRACING:
+ ret = tracing_bpf_link_attach(attr, prog);
+ break;
default:
ret = -EINVAL;
}
@@ -3927,6 +3960,29 @@ static int bpf_enable_stats(union bpf_attr *attr)
return -EINVAL;
}
+#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
+
+static int bpf_iter_create(union bpf_attr *attr)
+{
+ struct bpf_link *link;
+ int err;
+
+ if (CHECK_ATTR(BPF_ITER_CREATE))
+ return -EINVAL;
+
+ if (attr->iter_create.flags)
+ return -EINVAL;
+
+ link = bpf_link_get_from_fd(attr->iter_create.link_fd);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+
+ err = bpf_iter_new_fd(link);
+ bpf_link_put(link);
+
+ return err;
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr;
@@ -4054,6 +4110,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_ENABLE_STATS:
err = bpf_enable_stats(&attr);
break;
+ case BPF_ITER_CREATE:
+ err = bpf_iter_create(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
new file mode 100644
index 000000000000..aeed662d8451
--- /dev/null
+++ b/kernel/bpf/task_iter.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+
+#include <linux/init.h>
+#include <linux/namei.h>
+#include <linux/pid_namespace.h>
+#include <linux/fs.h>
+#include <linux/fdtable.h>
+#include <linux/filter.h>
+
+struct bpf_iter_seq_task_common {
+ struct pid_namespace *ns;
+};
+
+struct bpf_iter_seq_task_info {
+ /* The first field must be struct bpf_iter_seq_task_common.
+ * this is assumed by {init, fini}_seq_pidns() callback functions.
+ */
+ struct bpf_iter_seq_task_common common;
+ u32 tid;
+};
+
+static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
+ u32 *tid)
+{
+ struct task_struct *task = NULL;
+ struct pid *pid;
+
+ rcu_read_lock();
+ pid = idr_get_next(&ns->idr, tid);
+ if (pid)
+ task = get_pid_task(pid, PIDTYPE_PID);
+ rcu_read_unlock();
+
+ return task;
+}
+
+static void *task_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_task_info *info = seq->private;
+ struct task_struct *task;
+
+ task = task_seq_get_next(info->common.ns, &info->tid);
+ if (!task)
+ return NULL;
+
+ ++*pos;
+ return task;
+}
+
+static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_task_info *info = seq->private;
+ struct task_struct *task;
+
+ ++*pos;
+ ++info->tid;
+ put_task_struct((struct task_struct *)v);
+ task = task_seq_get_next(info->common.ns, &info->tid);
+ if (!task)
+ return NULL;
+
+ return task;
+}
+
+struct bpf_iter__task {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct task_struct *, task);
+};
+
+DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task)
+
+static int __task_seq_show(struct seq_file *seq, struct task_struct *task,
+ bool in_stop)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_iter__task ctx;
+ struct bpf_prog *prog;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, in_stop);
+ if (!prog)
+ return 0;
+
+ meta.seq = seq;
+ ctx.meta = &meta;
+ ctx.task = task;
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int task_seq_show(struct seq_file *seq, void *v)
+{
+ return __task_seq_show(seq, v, false);
+}
+
+static void task_seq_stop(struct seq_file *seq, void *v)
+{
+ if (!v)
+ (void)__task_seq_show(seq, v, true);
+ else
+ put_task_struct((struct task_struct *)v);
+}
+
+static const struct seq_operations task_seq_ops = {
+ .start = task_seq_start,
+ .next = task_seq_next,
+ .stop = task_seq_stop,
+ .show = task_seq_show,
+};
+
+struct bpf_iter_seq_task_file_info {
+ /* The first field must be struct bpf_iter_seq_task_common.
+ * this is assumed by {init, fini}_seq_pidns() callback functions.
+ */
+ struct bpf_iter_seq_task_common common;
+ struct task_struct *task;
+ struct files_struct *files;
+ u32 tid;
+ u32 fd;
+};
+
+static struct file *
+task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
+ struct task_struct **task, struct files_struct **fstruct)
+{
+ struct pid_namespace *ns = info->common.ns;
+ u32 curr_tid = info->tid, max_fds;
+ struct files_struct *curr_files;
+ struct task_struct *curr_task;
+ int curr_fd = info->fd;
+
+ /* If this function returns a non-NULL file object,
+ * it held a reference to the task/files_struct/file.
+ * Otherwise, it does not hold any reference.
+ */
+again:
+ if (*task) {
+ curr_task = *task;
+ curr_files = *fstruct;
+ curr_fd = info->fd;
+ } else {
+ curr_task = task_seq_get_next(ns, &curr_tid);
+ if (!curr_task)
+ return NULL;
+
+ curr_files = get_files_struct(curr_task);
+ if (!curr_files) {
+ put_task_struct(curr_task);
+ curr_tid = ++(info->tid);
+ info->fd = 0;
+ goto again;
+ }
+
+ /* set *fstruct, *task and info->tid */
+ *fstruct = curr_files;
+ *task = curr_task;
+ if (curr_tid == info->tid) {
+ curr_fd = info->fd;
+ } else {
+ info->tid = curr_tid;
+ curr_fd = 0;
+ }
+ }
+
+ rcu_read_lock();
+ max_fds = files_fdtable(curr_files)->max_fds;
+ for (; curr_fd < max_fds; curr_fd++) {
+ struct file *f;
+
+ f = fcheck_files(curr_files, curr_fd);
+ if (!f)
+ continue;
+
+ /* set info->fd */
+ info->fd = curr_fd;
+ get_file(f);
+ rcu_read_unlock();
+ return f;
+ }
+
+ /* the current task is done, go to the next task */
+ rcu_read_unlock();
+ put_files_struct(curr_files);
+ put_task_struct(curr_task);
+ *task = NULL;
+ *fstruct = NULL;
+ info->fd = 0;
+ curr_tid = ++(info->tid);
+ goto again;
+}
+
+static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct bpf_iter_seq_task_file_info *info = seq->private;
+ struct files_struct *files = NULL;
+ struct task_struct *task = NULL;
+ struct file *file;
+
+ file = task_file_seq_get_next(info, &task, &files);
+ if (!file) {
+ info->files = NULL;
+ info->task = NULL;
+ return NULL;
+ }
+
+ ++*pos;
+ info->task = task;
+ info->files = files;
+
+ return file;
+}
+
+static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_iter_seq_task_file_info *info = seq->private;
+ struct files_struct *files = info->files;
+ struct task_struct *task = info->task;
+ struct file *file;
+
+ ++*pos;
+ ++info->fd;
+ fput((struct file *)v);
+ file = task_file_seq_get_next(info, &task, &files);
+ if (!file) {
+ info->files = NULL;
+ info->task = NULL;
+ return NULL;
+ }
+
+ info->task = task;
+ info->files = files;
+
+ return file;
+}
+
+struct bpf_iter__task_file {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct task_struct *, task);
+ u32 fd __aligned(8);
+ __bpf_md_ptr(struct file *, file);
+};
+
+DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta,
+ struct task_struct *task, u32 fd,
+ struct file *file)
+
+static int __task_file_seq_show(struct seq_file *seq, struct file *file,
+ bool in_stop)
+{
+ struct bpf_iter_seq_task_file_info *info = seq->private;
+ struct bpf_iter__task_file ctx;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, in_stop);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.task = info->task;
+ ctx.fd = info->fd;
+ ctx.file = file;
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int task_file_seq_show(struct seq_file *seq, void *v)
+{
+ return __task_file_seq_show(seq, v, false);
+}
+
+static void task_file_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_seq_task_file_info *info = seq->private;
+
+ if (!v) {
+ (void)__task_file_seq_show(seq, v, true);
+ } else {
+ fput((struct file *)v);
+ put_files_struct(info->files);
+ put_task_struct(info->task);
+ info->files = NULL;
+ info->task = NULL;
+ }
+}
+
+static int init_seq_pidns(void *priv_data)
+{
+ struct bpf_iter_seq_task_common *common = priv_data;
+
+ common->ns = get_pid_ns(task_active_pid_ns(current));
+ return 0;
+}
+
+static void fini_seq_pidns(void *priv_data)
+{
+ struct bpf_iter_seq_task_common *common = priv_data;
+
+ put_pid_ns(common->ns);
+}
+
+static const struct seq_operations task_file_seq_ops = {
+ .start = task_file_seq_start,
+ .next = task_file_seq_next,
+ .stop = task_file_seq_stop,
+ .show = task_file_seq_show,
+};
+
+static int __init task_iter_init(void)
+{
+ struct bpf_iter_reg task_file_reg_info = {
+ .target = "task_file",
+ .seq_ops = &task_file_seq_ops,
+ .init_seq_private = init_seq_pidns,
+ .fini_seq_private = fini_seq_pidns,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info),
+ };
+ struct bpf_iter_reg task_reg_info = {
+ .target = "task",
+ .seq_ops = &task_seq_ops,
+ .init_seq_private = init_seq_pidns,
+ .fini_seq_private = fini_seq_pidns,
+ .seq_priv_size = sizeof(struct bpf_iter_seq_task_info),
+ };
+ int ret;
+
+ ret = bpf_iter_reg_target(&task_reg_info);
+ if (ret)
+ return ret;
+
+ return bpf_iter_reg_target(&task_file_reg_info);
+}
+late_initcall(task_iter_init);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 70ad009577f8..2a1826c76bb6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -398,7 +398,8 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
return type == PTR_TO_MAP_VALUE_OR_NULL ||
type == PTR_TO_SOCKET_OR_NULL ||
type == PTR_TO_SOCK_COMMON_OR_NULL ||
- type == PTR_TO_TCP_SOCK_OR_NULL;
+ type == PTR_TO_TCP_SOCK_OR_NULL ||
+ type == PTR_TO_BTF_ID_OR_NULL;
}
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
@@ -483,6 +484,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_TP_BUFFER] = "tp_buffer",
[PTR_TO_XDP_SOCK] = "xdp_sock",
[PTR_TO_BTF_ID] = "ptr_",
+ [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
};
static char slot_type_char[] = {
@@ -543,7 +545,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
/* reg->off should be 0 for SCALAR_VALUE */
verbose(env, "%lld", reg->var_off.value + reg->off);
} else {
- if (t == PTR_TO_BTF_ID)
+ if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
verbose(env, "%s", kernel_type_name(reg->btf_id));
verbose(env, "(id=%d", reg->id);
if (reg_type_may_be_refcounted_or_null(t))
@@ -2139,6 +2141,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_TCP_SOCK_OR_NULL:
case PTR_TO_XDP_SOCK:
case PTR_TO_BTF_ID:
+ case PTR_TO_BTF_ID_OR_NULL:
return true;
default:
return false;
@@ -2659,7 +2662,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
*/
*reg_type = info.reg_type;
- if (*reg_type == PTR_TO_BTF_ID)
+ if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL)
*btf_id = info.btf_id;
else
env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
@@ -3243,7 +3246,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
* a sub-register.
*/
regs[value_regno].subreg_def = DEF_NOT_SUBREG;
- if (reg_type == PTR_TO_BTF_ID)
+ if (reg_type == PTR_TO_BTF_ID ||
+ reg_type == PTR_TO_BTF_ID_OR_NULL)
regs[value_regno].btf_id = btf_id;
}
regs[value_regno].type = reg_type;
@@ -3490,6 +3494,11 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
*stype = STACK_MISC;
goto mark;
}
+
+ if (state->stack[spi].slot_type[0] == STACK_SPILL &&
+ state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
+ goto mark;
+
if (state->stack[spi].slot_type[0] == STACK_SPILL &&
state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
@@ -6572,6 +6581,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
reg->type = PTR_TO_SOCK_COMMON;
} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
reg->type = PTR_TO_TCP_SOCK;
+ } else if (reg->type == PTR_TO_BTF_ID_OR_NULL) {
+ reg->type = PTR_TO_BTF_ID;
}
if (is_null) {
/* We don't need id and ref_obj_id from this point
@@ -7101,6 +7112,10 @@ static int check_return_code(struct bpf_verifier_env *env)
return 0;
range = tnum_const(0);
break;
+ case BPF_PROG_TYPE_TRACING:
+ if (env->prog->expected_attach_type != BPF_TRACE_ITER)
+ return 0;
+ break;
default:
return 0;
}
@@ -8425,6 +8440,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
case PTR_TO_TCP_SOCK_OR_NULL:
case PTR_TO_XDP_SOCK:
case PTR_TO_BTF_ID:
+ case PTR_TO_BTF_ID_OR_NULL:
return false;
default:
return true;
@@ -10481,6 +10497,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
struct bpf_prog *tgt_prog = prog->aux->linked_prog;
u32 btf_id = prog->aux->attach_btf_id;
const char prefix[] = "btf_trace_";
+ struct btf_func_model fmodel;
int ret = 0, subprog = -1, i;
struct bpf_trampoline *tr;
const struct btf_type *t;
@@ -10622,6 +10639,23 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
prog->aux->attach_func_proto = t;
prog->aux->attach_btf_trace = true;
return 0;
+ case BPF_TRACE_ITER:
+ if (!btf_type_is_func(t)) {
+ verbose(env, "attach_btf_id %u is not a function\n",
+ btf_id);
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_func_proto(t))
+ return -EINVAL;
+ prog->aux->attach_func_name = tname;
+ prog->aux->attach_func_proto = t;
+ if (!bpf_iter_prog_supported(prog))
+ return -EINVAL;
+ prog->aux->btf_id_or_null_non0_off = true;
+ ret = btf_distill_func_proto(&env->log, btf, t,
+ tname, &fmodel);
+ return ret;
default:
if (!prog_extension)
return -EINVAL;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e875c95d3ced..d961428fb5b6 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -457,6 +457,212 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
return &bpf_trace_printk_proto;
}
+#define MAX_SEQ_PRINTF_VARARGS 12
+#define MAX_SEQ_PRINTF_MAX_MEMCPY 6
+#define MAX_SEQ_PRINTF_STR_LEN 128
+
+struct bpf_seq_printf_buf {
+ char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN];
+};
+static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf);
+static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used);
+
+BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
+ const void *, data, u32, data_len)
+{
+ int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0;
+ int i, buf_used, copy_size, num_args;
+ u64 params[MAX_SEQ_PRINTF_VARARGS];
+ struct bpf_seq_printf_buf *bufs;
+ const u64 *args = data;
+
+ buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used);
+ if (WARN_ON_ONCE(buf_used > 1)) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ bufs = this_cpu_ptr(&bpf_seq_printf_buf);
+
+ /*
+ * bpf_check()->check_func_arg()->check_stack_boundary()
+ * guarantees that fmt points to bpf program stack,
+ * fmt_size bytes of it were initialized and fmt_size > 0
+ */
+ if (fmt[--fmt_size] != 0)
+ goto out;
+
+ if (data_len & 7)
+ goto out;
+
+ for (i = 0; i < fmt_size; i++) {
+ if (fmt[i] == '%') {
+ if (fmt[i + 1] == '%')
+ i++;
+ else if (!data || !data_len)
+ goto out;
+ }
+ }
+
+ num_args = data_len / 8;
+
+ /* check format string for allowed specifiers */
+ for (i = 0; i < fmt_size; i++) {
+ /* only printable ascii for now. */
+ if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (fmt[i] != '%')
+ continue;
+
+ if (fmt[i + 1] == '%') {
+ i++;
+ continue;
+ }
+
+ if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) {
+ err = -E2BIG;
+ goto out;
+ }
+
+ if (fmt_cnt >= num_args) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
+ i++;
+
+ /* skip optional "[0 +-][num]" width formating field */
+ while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
+ fmt[i] == ' ')
+ i++;
+ if (fmt[i] >= '1' && fmt[i] <= '9') {
+ i++;
+ while (fmt[i] >= '0' && fmt[i] <= '9')
+ i++;
+ }
+
+ if (fmt[i] == 's') {
+ /* try our best to copy */
+ if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
+ err = -E2BIG;
+ goto out;
+ }
+
+ err = strncpy_from_unsafe(bufs->buf[memcpy_cnt],
+ (void *) (long) args[fmt_cnt],
+ MAX_SEQ_PRINTF_STR_LEN);
+ if (err < 0)
+ bufs->buf[memcpy_cnt][0] = '\0';
+ params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
+
+ fmt_cnt++;
+ memcpy_cnt++;
+ continue;
+ }
+
+ if (fmt[i] == 'p') {
+ if (fmt[i + 1] == 0 ||
+ fmt[i + 1] == 'K' ||
+ fmt[i + 1] == 'x') {
+ /* just kernel pointers */
+ params[fmt_cnt] = args[fmt_cnt];
+ fmt_cnt++;
+ continue;
+ }
+
+ /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
+ if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') {
+ err = -EINVAL;
+ goto out;
+ }
+ if (fmt[i + 2] != '4' && fmt[i + 2] != '6') {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
+ err = -E2BIG;
+ goto out;
+ }
+
+
+ copy_size = (fmt[i + 2] == '4') ? 4 : 16;
+
+ err = probe_kernel_read(bufs->buf[memcpy_cnt],
+ (void *) (long) args[fmt_cnt],
+ copy_size);
+ if (err < 0)
+ memset(bufs->buf[memcpy_cnt], 0, copy_size);
+ params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
+
+ i += 2;
+ fmt_cnt++;
+ memcpy_cnt++;
+ continue;
+ }
+
+ if (fmt[i] == 'l') {
+ i++;
+ if (fmt[i] == 'l')
+ i++;
+ }
+
+ if (fmt[i] != 'i' && fmt[i] != 'd' &&
+ fmt[i] != 'u' && fmt[i] != 'x') {
+ err = -EINVAL;
+ goto out;
+ }
+
+ params[fmt_cnt] = args[fmt_cnt];
+ fmt_cnt++;
+ }
+
+ /* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give
+ * all of them to seq_printf().
+ */
+ seq_printf(m, fmt, params[0], params[1], params[2], params[3],
+ params[4], params[5], params[6], params[7], params[8],
+ params[9], params[10], params[11]);
+
+ err = seq_has_overflowed(m) ? -EOVERFLOW : 0;
+out:
+ this_cpu_dec(bpf_seq_printf_buf_used);
+ return err;
+}
+
+static int bpf_seq_printf_btf_ids[5];
+static const struct bpf_func_proto bpf_seq_printf_proto = {
+ .func = bpf_seq_printf,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+ .btf_id = bpf_seq_printf_btf_ids,
+};
+
+BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
+{
+ return seq_write(m, data, len) ? -EOVERFLOW : 0;
+}
+
+static int bpf_seq_write_btf_ids[5];
+static const struct bpf_func_proto bpf_seq_write_proto = {
+ .func = bpf_seq_write,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .btf_id = bpf_seq_write_btf_ids,
+};
+
static __always_inline int
get_map_perf_counter(struct bpf_map *map, u64 flags,
u64 *value, u64 *enabled, u64 *running)
@@ -1226,6 +1432,14 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_xdp_output:
return &bpf_xdp_output_proto;
#endif
+ case BPF_FUNC_seq_printf:
+ return prog->expected_attach_type == BPF_TRACE_ITER ?
+ &bpf_seq_printf_proto :
+ NULL;
+ case BPF_FUNC_seq_write:
+ return prog->expected_attach_type == BPF_TRACE_ITER ?
+ &bpf_seq_write_proto :
+ NULL;
default:
return raw_tp_prog_func_proto(func_id, prog);
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 46ed56719476..a1fcc0ca21af 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2467,7 +2467,7 @@ void fib6_gc_cleanup(void)
}
#ifdef CONFIG_PROC_FS
-static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
{
struct fib6_info *rt = v;
struct ipv6_route_iter *iter = seq->private;
@@ -2625,7 +2625,7 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
return w->node && !(w->state == FWS_U && w->node == w->root);
}
-static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
__releases(RCU_BH)
{
struct net *net = seq_file_net(seq);
@@ -2637,6 +2637,67 @@ static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
rcu_read_unlock_bh();
}
+#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_iter__ipv6_route {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct fib6_info *, rt);
+};
+
+static int ipv6_route_prog_seq_show(struct bpf_prog *prog,
+ struct bpf_iter_meta *meta,
+ void *v)
+{
+ struct bpf_iter__ipv6_route ctx;
+
+ ctx.meta = meta;
+ ctx.rt = v;
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+{
+ struct ipv6_route_iter *iter = seq->private;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ int ret;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, false);
+ if (!prog)
+ return ipv6_route_native_seq_show(seq, v);
+
+ ret = ipv6_route_prog_seq_show(prog, &meta, v);
+ iter->w.leaf = NULL;
+
+ return ret;
+}
+
+static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ if (!v) {
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, true);
+ if (prog)
+ (void)ipv6_route_prog_seq_show(prog, &meta, v);
+ }
+
+ ipv6_route_native_seq_stop(seq, v);
+}
+#else
+static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+{
+ return ipv6_route_native_seq_show(seq, v);
+}
+
+static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+{
+ ipv6_route_native_seq_stop(seq, v);
+}
+#endif
+
const struct seq_operations ipv6_route_seq_ops = {
.start = ipv6_route_seq_start,
.next = ipv6_route_seq_next,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3912aac7854d..25f6d3e619d0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6393,6 +6393,30 @@ void __init ip6_route_init_special_entries(void)
#endif
}
+#if IS_BUILTIN(CONFIG_IPV6)
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
+
+static int __init bpf_iter_register(void)
+{
+ struct bpf_iter_reg reg_info = {
+ .target = "ipv6_route",
+ .seq_ops = &ipv6_route_seq_ops,
+ .init_seq_private = bpf_iter_init_seq_net,
+ .fini_seq_private = bpf_iter_fini_seq_net,
+ .seq_priv_size = sizeof(struct ipv6_route_iter),
+ };
+
+ return bpf_iter_reg_target(&reg_info);
+}
+
+static void bpf_iter_unregister(void)
+{
+ bpf_iter_unreg_target("ipv6_route");
+}
+#endif
+#endif
+
int __init ip6_route_init(void)
{
int ret;
@@ -6455,6 +6479,14 @@ int __init ip6_route_init(void)
if (ret)
goto out_register_late_subsys;
+#if IS_BUILTIN(CONFIG_IPV6)
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+ ret = bpf_iter_register();
+ if (ret)
+ goto out_register_late_subsys;
+#endif
+#endif
+
for_each_possible_cpu(cpu) {
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
@@ -6487,6 +6519,11 @@ out_kmem_cache:
void ip6_route_cleanup(void)
{
+#if IS_BUILTIN(CONFIG_IPV6)
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+ bpf_iter_unregister();
+#endif
+#endif
unregister_netdevice_notifier(&ip6_route_dev_notifier);
unregister_pernet_subsys(&ip6_route_net_late_ops);
fib6_rules_cleanup();
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5ded01ca8b20..33cda9baa979 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2596,7 +2596,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return __netlink_seq_next(seq);
}
-static void netlink_seq_stop(struct seq_file *seq, void *v)
+static void netlink_native_seq_stop(struct seq_file *seq, void *v)
{
struct nl_seq_iter *iter = seq->private;
@@ -2607,7 +2607,7 @@ static void netlink_seq_stop(struct seq_file *seq, void *v)
}
-static int netlink_seq_show(struct seq_file *seq, void *v)
+static int netlink_native_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
@@ -2634,6 +2634,68 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
return 0;
}
+#ifdef CONFIG_BPF_SYSCALL
+struct bpf_iter__netlink {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct netlink_sock *, sk);
+};
+
+DEFINE_BPF_ITER_FUNC(netlink, struct bpf_iter_meta *meta, struct netlink_sock *sk)
+
+static int netlink_prog_seq_show(struct bpf_prog *prog,
+ struct bpf_iter_meta *meta,
+ void *v)
+{
+ struct bpf_iter__netlink ctx;
+
+ meta->seq_num--; /* skip SEQ_START_TOKEN */
+ ctx.meta = meta;
+ ctx.sk = nlk_sk((struct sock *)v);
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int netlink_seq_show(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, false);
+ if (!prog)
+ return netlink_native_seq_show(seq, v);
+
+ if (v != SEQ_START_TOKEN)
+ return netlink_prog_seq_show(prog, &meta, v);
+
+ return 0;
+}
+
+static void netlink_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ if (!v) {
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, true);
+ if (prog)
+ (void)netlink_prog_seq_show(prog, &meta, v);
+ }
+
+ netlink_native_seq_stop(seq, v);
+}
+#else
+static int netlink_seq_show(struct seq_file *seq, void *v)
+{
+ return netlink_native_seq_show(seq, v);
+}
+
+static void netlink_seq_stop(struct seq_file *seq, void *v)
+{
+ netlink_native_seq_stop(seq, v);
+}
+#endif
+
static const struct seq_operations netlink_seq_ops = {
.start = netlink_seq_start,
.next = netlink_seq_next,
@@ -2740,6 +2802,21 @@ static const struct rhashtable_params netlink_rhashtable_params = {
.automatic_shrinking = true,
};
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+static int __init bpf_iter_register(void)
+{
+ struct bpf_iter_reg reg_info = {
+ .target = "netlink",
+ .seq_ops = &netlink_seq_ops,
+ .init_seq_private = bpf_iter_init_seq_net,
+ .fini_seq_private = bpf_iter_fini_seq_net,
+ .seq_priv_size = sizeof(struct nl_seq_iter),
+ };
+
+ return bpf_iter_reg_target(&reg_info);
+}
+#endif
+
static int __init netlink_proto_init(void)
{
int i;
@@ -2748,6 +2825,12 @@ static int __init netlink_proto_init(void)
if (err != 0)
goto out;
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+ err = bpf_iter_register();
+ if (err)
+ goto out;
+#endif
+
BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb));
nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index f43d193aff3a..ded304c96a05 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -414,6 +414,7 @@ class PrinterHelpers(Printer):
'struct sk_reuseport_md',
'struct sockaddr',
'struct tcphdr',
+ 'struct seq_file',
'struct __sk_buff',
'struct sk_msg_md',
@@ -450,6 +451,7 @@ class PrinterHelpers(Printer):
'struct sk_reuseport_md',
'struct sockaddr',
'struct tcphdr',
+ 'struct seq_file',
}
mapped_types = {
'u8': '__u8',
diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
new file mode 100644
index 000000000000..13b173d93890
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -0,0 +1,83 @@
+============
+bpftool-iter
+============
+-------------------------------------------------------------------------------
+tool to create BPF iterators
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **iter** *COMMAND*
+
+ *COMMANDS* := { **pin** | **help** }
+
+ITER COMMANDS
+===================
+
+| **bpftool** **iter pin** *OBJ* *PATH*
+| **bpftool** **iter help**
+|
+| *OBJ* := /a/file/of/bpf_iter_target.o
+
+
+DESCRIPTION
+===========
+ **bpftool iter pin** *OBJ* *PATH*
+ A bpf iterator combines a kernel iterating of
+ particular kernel data (e.g., tasks, bpf_maps, etc.)
+ and a bpf program called for each kernel data object
+ (e.g., one task, one bpf_map, etc.). User space can
+ *read* kernel iterator output through *read()* syscall.
+
+ The *pin* command creates a bpf iterator from *OBJ*,
+ and pin it to *PATH*. The *PATH* should be located
+ in *bpffs* mount. It must not contain a dot
+ character ('.'), which is reserved for future extensions
+ of *bpffs*.
+
+ User can then *cat PATH* to see the bpf iterator output.
+
+ **bpftool iter help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -V, --version
+ Print version number (similar to **bpftool version**).
+
+ -d, --debug
+ Print all logs available, even debug-level information. This
+ includes logs from libbpf as well as from the verifier, when
+ attempting to load programs.
+
+EXAMPLES
+========
+**# bpftool iter pin bpf_iter_netlink.o /sys/fs/bpf/my_netlink**
+
+::
+
+ Create a file-based bpf iterator from bpf_iter_netlink.o and pin it
+ to /sys/fs/bpf/my_netlink
+
+
+SEE ALSO
+========
+ **bpf**\ (2),
+ **bpf-helpers**\ (7),
+ **bpftool**\ (8),
+ **bpftool-prog**\ (8),
+ **bpftool-map**\ (8),
+ **bpftool-link**\ (8),
+ **bpftool-cgroup**\ (8),
+ **bpftool-feature**\ (8),
+ **bpftool-net**\ (8),
+ **bpftool-perf**\ (8),
+ **bpftool-btf**\ (8)
+ **bpftool-gen**\ (8)
+ **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index fc989ead7313..9f0f20e73b87 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -610,6 +610,19 @@ _bpftool()
;;
esac
;;
+ iter)
+ case $command in
+ pin)
+ _filedir
+ return 0
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'pin help' \
+ -- "$cur" ) )
+ ;;
+ esac
+ ;;
map)
local MAP_TYPE='id pinned name'
case $command in
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
new file mode 100644
index 000000000000..eb5987a0c3b6
--- /dev/null
+++ b/tools/bpf/bpftool/iter.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (C) 2020 Facebook
+
+#define _GNU_SOURCE
+#include <linux/err.h>
+#include <bpf/libbpf.h>
+
+#include "main.h"
+
+static int do_pin(int argc, char **argv)
+{
+ const char *objfile, *path;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ struct bpf_link *link;
+ int err;
+
+ if (!REQ_ARGS(2))
+ usage();
+
+ objfile = GET_ARG();
+ path = GET_ARG();
+
+ obj = bpf_object__open(objfile);
+ if (IS_ERR(obj)) {
+ p_err("can't open objfile %s", objfile);
+ return -1;
+ }
+
+ err = bpf_object__load(obj);
+ if (err) {
+ p_err("can't load objfile %s", objfile);
+ goto close_obj;
+ }
+
+ prog = bpf_program__next(NULL, obj);
+ if (!prog) {
+ p_err("can't find bpf program in objfile %s", objfile);
+ goto close_obj;
+ }
+
+ link = bpf_program__attach_iter(prog, NULL);
+ if (IS_ERR(link)) {
+ err = PTR_ERR(link);
+ p_err("attach_iter failed for program %s",
+ bpf_program__name(prog));
+ goto close_obj;
+ }
+
+ err = mount_bpffs_for_pin(path);
+ if (err)
+ goto close_link;
+
+ err = bpf_link__pin(link, path);
+ if (err) {
+ p_err("pin_iter failed for program %s to path %s",
+ bpf_program__name(prog), path);
+ goto close_link;
+ }
+
+close_link:
+ bpf_link__destroy(link);
+close_obj:
+ bpf_object__close(obj);
+ return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+ fprintf(stderr,
+ "Usage: %s %s pin OBJ PATH\n"
+ " %s %s help\n"
+ "\n",
+ bin_name, argv[-2], bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "help", do_help },
+ { "pin", do_pin },
+ { 0 }
+};
+
+int do_iter(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index adc7dc431ed8..b6a0b35c78ae 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -16,6 +16,7 @@ static const char * const link_type_name[] = {
[BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
[BPF_LINK_TYPE_TRACING] = "tracing",
[BPF_LINK_TYPE_CGROUP] = "cgroup",
+ [BPF_LINK_TYPE_ITER] = "iter",
};
static int link_parse_fd(int *argc, char ***argv)
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1413a154806e..46bd716a9d86 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -59,7 +59,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops }\n"
+ " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, bin_name, bin_name);
@@ -224,6 +224,7 @@ static const struct cmd cmds[] = {
{ "btf", do_btf },
{ "gen", do_gen },
{ "struct_ops", do_struct_ops },
+ { "iter", do_iter },
{ "version", do_version },
{ 0 }
};
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 9b1fb81a8331..a41cefabccaf 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -199,6 +199,7 @@ int do_feature(int argc, char **argv);
int do_btf(int argc, char **argv);
int do_gen(int argc, char **argv);
int do_struct_ops(int argc, char **argv);
+int do_iter(int argc, char **argv);
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6e5e7caa3739..9d1932e23cec 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -116,6 +116,7 @@ enum bpf_cmd {
BPF_LINK_GET_FD_BY_ID,
BPF_LINK_GET_NEXT_ID,
BPF_ENABLE_STATS,
+ BPF_ITER_CREATE,
};
enum bpf_map_type {
@@ -218,6 +219,7 @@ enum bpf_attach_type {
BPF_TRACE_FEXIT,
BPF_MODIFY_RETURN,
BPF_LSM_MAC,
+ BPF_TRACE_ITER,
__MAX_BPF_ATTACH_TYPE
};
@@ -228,6 +230,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
BPF_LINK_TYPE_TRACING = 2,
BPF_LINK_TYPE_CGROUP = 3,
+ BPF_LINK_TYPE_ITER = 4,
MAX_BPF_LINK_TYPE,
};
@@ -612,6 +615,11 @@ union bpf_attr {
__u32 type;
} enable_stats;
+ struct { /* struct used by BPF_ITER_CREATE command */
+ __u32 link_fd;
+ __u32 flags;
+ } iter_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -3069,6 +3077,41 @@ union bpf_attr {
* See: clock_gettime(CLOCK_BOOTTIME)
* Return
* Current *ktime*.
+ *
+ * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ * Description
+ * seq_printf uses seq_file seq_printf() to print out the format string.
+ * The *m* represents the seq_file. The *fmt* and *fmt_size* are for
+ * the format string itself. The *data* and *data_len* are format string
+ * arguments. The *data* are a u64 array and corresponding format string
+ * values are stored in the array. For strings and pointers where pointees
+ * are accessed, only the pointer values are stored in the *data* array.
+ * The *data_len* is the *data* size in term of bytes.
+ *
+ * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
+ * Reading kernel memory may fail due to either invalid address or
+ * valid address but requiring a major memory fault. If reading kernel memory
+ * fails, the string for **%s** will be an empty string, and the ip
+ * address for **%p{i,I}{4,6}** will be 0. Not returning error to
+ * bpf program is consistent with what bpf_trace_printk() does for now.
+ * Return
+ * 0 on success, or a negative errno in case of failure.
+ *
+ * * **-EBUSY** Percpu memory copy buffer is busy, can try again
+ * by returning 1 from bpf program.
+ * * **-EINVAL** Invalid arguments, or invalid/unsupported formats.
+ * * **-E2BIG** Too many format specifiers.
+ * * **-EOVERFLOW** Overflow happens, the same object will be tried again.
+ *
+ * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ * Description
+ * seq_write uses seq_file seq_write() to write the data.
+ * The *m* represents the seq_file. The *data* and *len* represent the
+ * data to write in bytes.
+ * Return
+ * 0 on success, or a negative errno in case of failure.
+ *
+ * * **-EOVERFLOW** Overflow happens, the same object will be tried again.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3196,7 +3239,9 @@ union bpf_attr {
FN(get_netns_cookie), \
FN(get_current_ancestor_cgroup_id), \
FN(sk_assign), \
- FN(ktime_get_boot_ns),
+ FN(ktime_get_boot_ns), \
+ FN(seq_printf), \
+ FN(seq_write),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 43322f0d6c7f..a7329b671c41 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -619,6 +619,16 @@ int bpf_link_update(int link_fd, int new_prog_fd,
return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
}
+int bpf_iter_create(int link_fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.iter_create.link_fd = link_fd;
+
+ return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+}
+
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
__u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
{
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 1901b2777854..1b6015b21ba8 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -187,6 +187,8 @@ struct bpf_link_update_opts {
LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts);
+LIBBPF_API int bpf_iter_create(int link_fd);
+
struct bpf_prog_test_run_attr {
int prog_fd;
int repeat;
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index da00b87aa199..f67dce2af802 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -37,6 +37,20 @@
#endif
/*
+ * Helper macro to manipulate data structures
+ */
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
+#endif
+#ifndef container_of
+#define container_of(ptr, type, member) \
+ ({ \
+ void *__mptr = (void *)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); \
+ })
+#endif
+
+/*
* Helper structure used by eBPF C program
* to describe BPF map attributes to libbpf loader
*/
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index f3f3c3fb98cb..cf97d07692b4 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -413,4 +413,20 @@ typeof(name(0)) name(struct pt_regs *ctx) \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+/*
+ * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
+ * in a structure.
+ */
+#define BPF_SEQ_PRINTF(seq, fmt, args...) \
+ ({ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[] = { args }; \
+ _Pragma("GCC diagnostic pop") \
+ int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
+ ___param, sizeof(___param)); \
+ ___ret; \
+ })
+
#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 977add1b73e2..6c2f46908f4d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6586,6 +6586,8 @@ static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
struct bpf_program *prog);
static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
struct bpf_program *prog);
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
static const struct bpf_sec_def section_defs[] = {
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
@@ -6629,6 +6631,10 @@ static const struct bpf_sec_def section_defs[] = {
.is_attach_btf = true,
.expected_attach_type = BPF_LSM_MAC,
.attach_fn = attach_lsm),
+ SEC_DEF("iter/", TRACING,
+ .expected_attach_type = BPF_TRACE_ITER,
+ .is_attach_btf = true,
+ .attach_fn = attach_iter),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
@@ -6891,6 +6897,7 @@ invalid_prog:
#define BTF_TRACE_PREFIX "btf_trace_"
#define BTF_LSM_PREFIX "bpf_lsm_"
+#define BTF_ITER_PREFIX "__bpf_iter__"
#define BTF_MAX_NAME_SIZE 128
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
@@ -6921,6 +6928,9 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
else if (attach_type == BPF_LSM_MAC)
err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
BTF_KIND_FUNC);
+ else if (attach_type == BPF_TRACE_ITER)
+ err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
+ BTF_KIND_FUNC);
else
err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
@@ -7848,6 +7858,12 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
return bpf_program__attach_lsm(prog);
}
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ return bpf_program__attach_iter(prog, NULL);
+}
+
struct bpf_link *
bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
{
@@ -7882,6 +7898,42 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
return link;
}
+struct bpf_link *
+bpf_program__attach_iter(struct bpf_program *prog,
+ const struct bpf_iter_attach_opts *opts)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link *link;
+ int prog_fd, link_fd;
+
+ if (!OPTS_VALID(opts, bpf_iter_attach_opts))
+ return ERR_PTR(-EINVAL);
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("program '%s': can't attach before loaded\n",
+ bpf_program__title(prog, false));
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->detach = &bpf_link__detach_fd;
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_ITER, NULL);
+ if (link_fd < 0) {
+ link_fd = -errno;
+ free(link);
+ pr_warn("program '%s': failed to attach to iterator: %s\n",
+ bpf_program__title(prog, false),
+ libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(link_fd);
+ }
+ link->fd = link_fd;
+ return link;
+}
+
struct bpf_link *bpf_program__attach(struct bpf_program *prog)
{
const struct bpf_sec_def *sec_def;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index f1dacecb1619..8ea69558f0a8 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -258,6 +258,15 @@ struct bpf_map;
LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
+struct bpf_iter_attach_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+};
+#define bpf_iter_attach_opts__last_field sz
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_iter(struct bpf_program *prog,
+ const struct bpf_iter_attach_opts *opts);
+
struct bpf_insn;
/*
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index e03bd4db827e..0133d469d30b 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -258,6 +258,8 @@ LIBBPF_0.0.8 {
LIBBPF_0.0.9 {
global:
bpf_enable_stats;
+ bpf_iter_create;
bpf_link_get_fd_by_id;
bpf_link_get_next_id;
+ bpf_program__attach_iter;
} LIBBPF_0.0.8;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
new file mode 100644
index 000000000000..87c29dde1cf9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include "bpf_iter_ipv6_route.skel.h"
+#include "bpf_iter_netlink.skel.h"
+#include "bpf_iter_bpf_map.skel.h"
+#include "bpf_iter_task.skel.h"
+#include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_test_kern1.skel.h"
+#include "bpf_iter_test_kern2.skel.h"
+#include "bpf_iter_test_kern3.skel.h"
+#include "bpf_iter_test_kern4.skel.h"
+
+static int duration;
+
+static void test_btf_id_or_null(void)
+{
+ struct bpf_iter_test_kern3 *skel;
+
+ skel = bpf_iter_test_kern3__open_and_load();
+ if (CHECK(skel, "bpf_iter_test_kern3__open_and_load",
+ "skeleton open_and_load unexpectedly succeeded\n")) {
+ bpf_iter_test_kern3__destroy(skel);
+ return;
+ }
+}
+
+static void do_dummy_read(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+ char buf[16] = {};
+ int iter_fd, len;
+
+ link = bpf_program__attach_iter(prog, NULL);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ return;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* not check contents, but ensure read() ends without error */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+
+ close(iter_fd);
+
+free_link:
+ bpf_link__destroy(link);
+}
+
+static void test_ipv6_route(void)
+{
+ struct bpf_iter_ipv6_route *skel;
+
+ skel = bpf_iter_ipv6_route__open_and_load();
+ if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_ipv6_route);
+
+ bpf_iter_ipv6_route__destroy(skel);
+}
+
+static void test_netlink(void)
+{
+ struct bpf_iter_netlink *skel;
+
+ skel = bpf_iter_netlink__open_and_load();
+ if (CHECK(!skel, "bpf_iter_netlink__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_netlink);
+
+ bpf_iter_netlink__destroy(skel);
+}
+
+static void test_bpf_map(void)
+{
+ struct bpf_iter_bpf_map *skel;
+
+ skel = bpf_iter_bpf_map__open_and_load();
+ if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_bpf_map);
+
+ bpf_iter_bpf_map__destroy(skel);
+}
+
+static void test_task(void)
+{
+ struct bpf_iter_task *skel;
+
+ skel = bpf_iter_task__open_and_load();
+ if (CHECK(!skel, "bpf_iter_task__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_task);
+
+ bpf_iter_task__destroy(skel);
+}
+
+static void test_task_file(void)
+{
+ struct bpf_iter_task_file *skel;
+
+ skel = bpf_iter_task_file__open_and_load();
+ if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_task_file);
+
+ bpf_iter_task_file__destroy(skel);
+}
+
+/* The expected string is less than 16 bytes */
+static int do_read_with_fd(int iter_fd, const char *expected,
+ bool read_one_char)
+{
+ int err = -1, len, read_buf_len, start;
+ char buf[16] = {};
+
+ read_buf_len = read_one_char ? 1 : 16;
+ start = 0;
+ while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
+ start += len;
+ if (CHECK(start >= 16, "read", "read len %d\n", len))
+ return -1;
+ read_buf_len = read_one_char ? 1 : 16 - start;
+ }
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ return -1;
+
+ err = strcmp(buf, expected);
+ if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n",
+ buf, expected))
+ return -1;
+
+ return 0;
+}
+
+static void test_anon_iter(bool read_one_char)
+{
+ struct bpf_iter_test_kern1 *skel;
+ struct bpf_link *link;
+ int iter_fd, err;
+
+ skel = bpf_iter_test_kern1__open_and_load();
+ if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ err = bpf_iter_test_kern1__attach(skel);
+ if (CHECK(err, "bpf_iter_test_kern1__attach",
+ "skeleton attach failed\n")) {
+ goto out;
+ }
+
+ link = skel->links.dump_task;
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto out;
+
+ do_read_with_fd(iter_fd, "abcd", read_one_char);
+ close(iter_fd);
+
+out:
+ bpf_iter_test_kern1__destroy(skel);
+}
+
+static int do_read(const char *path, const char *expected)
+{
+ int err, iter_fd;
+
+ iter_fd = open(path, O_RDONLY);
+ if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
+ path, strerror(errno)))
+ return -1;
+
+ err = do_read_with_fd(iter_fd, expected, false);
+ close(iter_fd);
+ return err;
+}
+
+static void test_file_iter(void)
+{
+ const char *path = "/sys/fs/bpf/bpf_iter_test1";
+ struct bpf_iter_test_kern1 *skel1;
+ struct bpf_iter_test_kern2 *skel2;
+ struct bpf_link *link;
+ int err;
+
+ skel1 = bpf_iter_test_kern1__open_and_load();
+ if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ /* unlink this path if it exists. */
+ unlink(path);
+
+ err = bpf_link__pin(link, path);
+ if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
+ goto free_link;
+
+ err = do_read(path, "abcd");
+ if (err)
+ goto unlink_path;
+
+ /* file based iterator seems working fine. Let us a link update
+ * of the underlying link and `cat` the iterator again, its content
+ * should change.
+ */
+ skel2 = bpf_iter_test_kern2__open_and_load();
+ if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load",
+ "skeleton open_and_load failed\n"))
+ goto unlink_path;
+
+ err = bpf_link__update_program(link, skel2->progs.dump_task);
+ if (CHECK(err, "update_prog", "update_prog failed\n"))
+ goto destroy_skel2;
+
+ do_read(path, "ABCD");
+
+destroy_skel2:
+ bpf_iter_test_kern2__destroy(skel2);
+unlink_path:
+ unlink(path);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_test_kern1__destroy(skel1);
+}
+
+static void test_overflow(bool test_e2big_overflow, bool ret1)
+{
+ __u32 map_info_len, total_read_len, expected_read_len;
+ int err, iter_fd, map1_fd, map2_fd, len;
+ struct bpf_map_info map_info = {};
+ struct bpf_iter_test_kern4 *skel;
+ struct bpf_link *link;
+ __u32 page_size;
+ char *buf;
+
+ skel = bpf_iter_test_kern4__open();
+ if (CHECK(!skel, "bpf_iter_test_kern4__open",
+ "skeleton open failed\n"))
+ return;
+
+ /* create two maps: bpf program will only do bpf_seq_write
+ * for these two maps. The goal is one map output almost
+ * fills seq_file buffer and then the other will trigger
+ * overflow and needs restart.
+ */
+ map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+ if (CHECK(map1_fd < 0, "bpf_create_map",
+ "map_creation failed: %s\n", strerror(errno)))
+ goto out;
+ map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+ if (CHECK(map2_fd < 0, "bpf_create_map",
+ "map_creation failed: %s\n", strerror(errno)))
+ goto free_map1;
+
+ /* bpf_seq_printf kernel buffer is one page, so one map
+ * bpf_seq_write will mostly fill it, and the other map
+ * will partially fill and then trigger overflow and need
+ * bpf_seq_read restart.
+ */
+ page_size = sysconf(_SC_PAGE_SIZE);
+
+ if (test_e2big_overflow) {
+ skel->rodata->print_len = (page_size + 8) / 8;
+ expected_read_len = 2 * (page_size + 8);
+ } else if (!ret1) {
+ skel->rodata->print_len = (page_size - 8) / 8;
+ expected_read_len = 2 * (page_size - 8);
+ } else {
+ skel->rodata->print_len = 1;
+ expected_read_len = 2 * 8;
+ }
+ skel->rodata->ret1 = ret1;
+
+ if (CHECK(bpf_iter_test_kern4__load(skel),
+ "bpf_iter_test_kern4__load", "skeleton load failed\n"))
+ goto free_map2;
+
+ /* setup filtering map_id in bpf program */
+ map_info_len = sizeof(map_info);
+ err = bpf_obj_get_info_by_fd(map1_fd, &map_info, &map_info_len);
+ if (CHECK(err, "get_map_info", "get map info failed: %s\n",
+ strerror(errno)))
+ goto free_map2;
+ skel->bss->map1_id = map_info.id;
+
+ err = bpf_obj_get_info_by_fd(map2_fd, &map_info, &map_info_len);
+ if (CHECK(err, "get_map_info", "get map info failed: %s\n",
+ strerror(errno)))
+ goto free_map2;
+ skel->bss->map2_id = map_info.id;
+
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto free_map2;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ buf = malloc(expected_read_len);
+ if (!buf)
+ goto close_iter;
+
+ /* do read */
+ total_read_len = 0;
+ if (test_e2big_overflow) {
+ while ((len = read(iter_fd, buf, expected_read_len)) > 0)
+ total_read_len += len;
+
+ CHECK(len != -1 || errno != E2BIG, "read",
+ "expected ret -1, errno E2BIG, but get ret %d, error %s\n",
+ len, strerror(errno));
+ goto free_buf;
+ } else if (!ret1) {
+ while ((len = read(iter_fd, buf, expected_read_len)) > 0)
+ total_read_len += len;
+
+ if (CHECK(len < 0, "read", "read failed: %s\n",
+ strerror(errno)))
+ goto free_buf;
+ } else {
+ do {
+ len = read(iter_fd, buf, expected_read_len);
+ if (len > 0)
+ total_read_len += len;
+ } while (len > 0 || len == -EAGAIN);
+
+ if (CHECK(len < 0, "read", "read failed: %s\n",
+ strerror(errno)))
+ goto free_buf;
+ }
+
+ if (CHECK(total_read_len != expected_read_len, "read",
+ "total len %u, expected len %u\n", total_read_len,
+ expected_read_len))
+ goto free_buf;
+
+ if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed",
+ "expected 1 actual %d\n", skel->bss->map1_accessed))
+ goto free_buf;
+
+ if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed",
+ "expected 2 actual %d\n", skel->bss->map2_accessed))
+ goto free_buf;
+
+ CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2,
+ "map2_seqnum", "two different seqnum %lld %lld\n",
+ skel->bss->map2_seqnum1, skel->bss->map2_seqnum2);
+
+free_buf:
+ free(buf);
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+free_map2:
+ close(map2_fd);
+free_map1:
+ close(map1_fd);
+out:
+ bpf_iter_test_kern4__destroy(skel);
+}
+
+void test_bpf_iter(void)
+{
+ if (test__start_subtest("btf_id_or_null"))
+ test_btf_id_or_null();
+ if (test__start_subtest("ipv6_route"))
+ test_ipv6_route();
+ if (test__start_subtest("netlink"))
+ test_netlink();
+ if (test__start_subtest("bpf_map"))
+ test_bpf_map();
+ if (test__start_subtest("task"))
+ test_task();
+ if (test__start_subtest("task_file"))
+ test_task_file();
+ if (test__start_subtest("anon"))
+ test_anon_iter(false);
+ if (test__start_subtest("anon-read-one-char"))
+ test_anon_iter(true);
+ if (test__start_subtest("file"))
+ test_file_iter();
+ if (test__start_subtest("overflow"))
+ test_overflow(false, false);
+ if (test__start_subtest("overflow-e2big"))
+ test_overflow(true, false);
+ if (test__start_subtest("prog-ret-1"))
+ test_overflow(false, true);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
new file mode 100644
index 000000000000..4867cd3445c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ __u64 seq_num = ctx->meta->seq_num;
+ struct bpf_map *map = ctx->map;
+
+ if (map == (void *)0) {
+ BPF_SEQ_PRINTF(seq, " %%%%%% END %%%%%%\n");
+ return 0;
+ }
+
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " id refcnt usercnt locked_vm\n");
+
+ BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter,
+ map->usercnt.counter,
+ map->memory.user->locked_vm.counter);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
new file mode 100644
index 000000000000..ab9e2650e021
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
+
+#define RTF_GATEWAY 0x0002
+#define IFNAMSIZ 16
+#define fib_nh_gw_family nh_common.nhc_gw_family
+#define fib_nh_gw6 nh_common.nhc_gw.ipv6
+#define fib_nh_dev nh_common.nhc_dev
+
+SEC("iter/ipv6_route")
+int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct fib6_info *rt = ctx->rt;
+ const struct net_device *dev;
+ struct fib6_nh *fib6_nh;
+ unsigned int flags;
+ struct nexthop *nh;
+
+ if (rt == (void *)0)
+ return 0;
+
+ fib6_nh = &rt->fib6_nh[0];
+ flags = rt->fib6_flags;
+
+ /* FIXME: nexthop_is_multipath is not handled here. */
+ nh = rt->nh;
+ if (rt->nh)
+ fib6_nh = &nh->nh_info->fib6_nh;
+
+ BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
+
+ if (CONFIG_IPV6_SUBTREES)
+ BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_src.addr,
+ rt->fib6_src.plen);
+ else
+ BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 00 ");
+
+ if (fib6_nh->fib_nh_gw_family) {
+ flags |= RTF_GATEWAY;
+ BPF_SEQ_PRINTF(seq, "%pi6 ", &fib6_nh->fib_nh_gw6);
+ } else {
+ BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 ");
+ }
+
+ dev = fib6_nh->fib_nh_dev;
+ if (dev)
+ BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric,
+ rt->fib6_ref.refs.counter, 0, flags, dev->name);
+ else
+ BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x\n", rt->fib6_metric,
+ rt->fib6_ref.refs.counter, 0, flags);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
new file mode 100644
index 000000000000..6b40a233d4e0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define sk_rmem_alloc sk_backlog.rmem_alloc
+#define sk_refcnt __sk_common.skc_refcnt
+
+static inline struct inode *SOCK_INODE(struct socket *socket)
+{
+ return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
+}
+
+SEC("iter/netlink")
+int dump_netlink(struct bpf_iter__netlink *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct netlink_sock *nlk = ctx->sk;
+ unsigned long group, ino;
+ struct inode *inode;
+ struct socket *sk;
+ struct sock *s;
+
+ if (nlk == (void *)0)
+ return 0;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "sk Eth Pid Groups "
+ "Rmem Wmem Dump Locks Drops "
+ "Inode\n");
+
+ s = &nlk->sk;
+ BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol);
+
+ if (!nlk->groups) {
+ group = 0;
+ } else {
+ /* FIXME: temporary use bpf_probe_read here, needs
+ * verifier support to do direct access.
+ */
+ bpf_probe_read(&group, sizeof(group), &nlk->groups[0]);
+ }
+ BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ",
+ nlk->portid, (u32)group,
+ s->sk_rmem_alloc.counter,
+ s->sk_wmem_alloc.refs.counter - 1,
+ nlk->cb_running, s->sk_refcnt.refs.counter);
+
+ sk = s->sk_socket;
+ if (!sk) {
+ ino = 0;
+ } else {
+ /* FIXME: container_of inside SOCK_INODE has a forced
+ * type conversion, and direct access cannot be used
+ * with current verifier.
+ */
+ inode = SOCK_INODE(sk);
+ bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ }
+ BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
new file mode 100644
index 000000000000..90f9011c57ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+
+ if (task == (void *)0) {
+ BPF_SEQ_PRINTF(seq, " === END ===\n");
+ return 0;
+ }
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " tgid gid\n");
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
new file mode 100644
index 000000000000..c6ced38f0880
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/task_file")
+int dump_task_file(struct bpf_iter__task_file *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ __u32 fd = ctx->fd;
+ struct file *file = ctx->file;
+
+ if (task == (void *)0 || file == (void *)0)
+ return 0;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " tgid gid fd file\n");
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+ (long)file->f_op);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c
new file mode 100644
index 000000000000..c71a7c283108
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define START_CHAR 'a'
+#include "bpf_iter_test_kern_common.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c
new file mode 100644
index 000000000000..8bdc8dc07444
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define START_CHAR 'A'
+#include "bpf_iter_test_kern_common.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
new file mode 100644
index 000000000000..636a00fa074d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ int tgid;
+
+ tgid = task->tgid;
+ bpf_seq_write(seq, &tgid, sizeof(tgid));
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
new file mode 100644
index 000000000000..b18dc0471d07
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 map1_id = 0, map2_id = 0;
+__u32 map1_accessed = 0, map2_accessed = 0;
+__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
+
+static volatile const __u32 print_len;
+static volatile const __u32 ret1;
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct bpf_map *map = ctx->map;
+ __u64 seq_num;
+ int i, ret = 0;
+
+ if (map == (void *)0)
+ return 0;
+
+ /* only dump map1_id and map2_id */
+ if (map->id != map1_id && map->id != map2_id)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (map->id == map1_id) {
+ map1_seqnum = seq_num;
+ map1_accessed++;
+ }
+
+ if (map->id == map2_id) {
+ if (map2_accessed == 0) {
+ map2_seqnum1 = seq_num;
+ if (ret1)
+ ret = 1;
+ } else {
+ map2_seqnum2 = seq_num;
+ }
+ map2_accessed++;
+ }
+
+ /* fill seq_file buffer */
+ for (i = 0; i < print_len; i++)
+ bpf_seq_write(seq, &seq_num, sizeof(seq_num));
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
new file mode 100644
index 000000000000..bdd51cf14b54
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+int count = 0;
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ char c;
+
+ if (count < 4) {
+ c = START_CHAR + count;
+ bpf_seq_write(seq, &c, sizeof(c));
+ count++;
+ }
+
+ return 0;
+}