From fada7fdc83c0bf8755956bff707c42b609223301 Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <jonathan.lemon@gmail.com>
Date: Thu, 6 Jun 2019 13:59:40 -0700
Subject: bpf: Allow bpf_map_lookup_elem() on an xskmap

Currently, the AF_XDP code uses a separate map in order to
determine if an xsk is bound to a queue.  Instead of doing this,
have bpf_map_lookup_elem() return a xdp_sock.

Rearrange some xdp_sock members to eliminate structure holes.

Remove selftest - will be added back in later patch.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e5a309e6a400..1fe137afa898 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -280,6 +280,7 @@ enum bpf_reg_type {
 	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
 	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
 	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
+	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -727,6 +728,13 @@ void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
 void __cpu_map_flush(struct bpf_map *map);
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
+bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+				  struct bpf_insn_access_aux *info);
+u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
+				    const struct bpf_insn *si,
+				    struct bpf_insn *insn_buf,
+				    struct bpf_prog *prog,
+				    u32 *target_size);
 
 /* Return map's numa specified by userspace */
 static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
-- 
cgit 


From 7f94208c8f9a0a6d2ff0e0c0858c00ad8e5c8617 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 12 Jun 2019 17:18:47 +0800
Subject: bpf: Fix build error without CONFIG_INET

If CONFIG_INET is not set, building fails:

kernel/bpf/verifier.o: In function `check_mem_access':
verifier.c: undefined reference to `bpf_xdp_sock_is_valid_access'
kernel/bpf/verifier.o: In function `convert_ctx_accesses':
verifier.c: undefined reference to `bpf_xdp_sock_convert_ctx_access'

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: fada7fdc83c0 ("bpf: Allow bpf_map_lookup_elem() on an xskmap")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1fe137afa898..b15fb5fcb741 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -728,13 +728,6 @@ void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
 void __cpu_map_flush(struct bpf_map *map);
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
-bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
-				  struct bpf_insn_access_aux *info);
-u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
-				    const struct bpf_insn *si,
-				    struct bpf_insn *insn_buf,
-				    struct bpf_prog *prog,
-				    u32 *target_size);
 
 /* Return map's numa specified by userspace */
 static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
@@ -1110,6 +1103,15 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
 				    struct bpf_insn *insn_buf,
 				    struct bpf_prog *prog,
 				    u32 *target_size);
+
+bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+				  struct bpf_insn_access_aux *info);
+
+u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
+				    const struct bpf_insn *si,
+				    struct bpf_insn *insn_buf,
+				    struct bpf_prog *prog,
+				    u32 *target_size);
 #else
 static inline bool bpf_tcp_sock_is_valid_access(int off, int size,
 						enum bpf_access_type type,
@@ -1126,6 +1128,21 @@ static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
 {
 	return 0;
 }
+static inline bool bpf_xdp_sock_is_valid_access(int off, int size,
+						enum bpf_access_type type,
+						struct bpf_insn_access_aux *info)
+{
+	return false;
+}
+
+static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
+						  const struct bpf_insn *si,
+						  struct bpf_insn *insn_buf,
+						  struct bpf_prog *prog,
+						  u32 *target_size)
+{
+	return 0;
+}
 #endif /* CONFIG_INET */
 
 #endif /* _LINUX_BPF_H */
-- 
cgit 


From 2589726d12a1b12eaaa93c7f1ea64287e383c7a5 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Sat, 15 Jun 2019 12:12:20 -0700
Subject: bpf: introduce bounded loops

Allow the verifier to validate the loops by simulating their execution.
Exisiting programs have used '#pragma unroll' to unroll the loops
by the compiler. Instead let the verifier simulate all iterations
of the loop.
In order to do that introduce parentage chain of bpf_verifier_state and
'branches' counter for the number of branches left to explore.
See more detailed algorithm description in bpf_verifier.h

This algorithm borrows the key idea from Edward Cree approach:
https://patchwork.ozlabs.org/patch/877222/
Additional state pruning heuristics make such brute force loop walk
practical even for large loops.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_verifier.h | 51 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 704ed7971472..03037373b447 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -194,6 +194,53 @@ struct bpf_func_state {
 struct bpf_verifier_state {
 	/* call stack tracking */
 	struct bpf_func_state *frame[MAX_CALL_FRAMES];
+	struct bpf_verifier_state *parent;
+	/*
+	 * 'branches' field is the number of branches left to explore:
+	 * 0 - all possible paths from this state reached bpf_exit or
+	 * were safely pruned
+	 * 1 - at least one path is being explored.
+	 * This state hasn't reached bpf_exit
+	 * 2 - at least two paths are being explored.
+	 * This state is an immediate parent of two children.
+	 * One is fallthrough branch with branches==1 and another
+	 * state is pushed into stack (to be explored later) also with
+	 * branches==1. The parent of this state has branches==1.
+	 * The verifier state tree connected via 'parent' pointer looks like:
+	 * 1
+	 * 1
+	 * 2 -> 1 (first 'if' pushed into stack)
+	 * 1
+	 * 2 -> 1 (second 'if' pushed into stack)
+	 * 1
+	 * 1
+	 * 1 bpf_exit.
+	 *
+	 * Once do_check() reaches bpf_exit, it calls update_branch_counts()
+	 * and the verifier state tree will look:
+	 * 1
+	 * 1
+	 * 2 -> 1 (first 'if' pushed into stack)
+	 * 1
+	 * 1 -> 1 (second 'if' pushed into stack)
+	 * 0
+	 * 0
+	 * 0 bpf_exit.
+	 * After pop_stack() the do_check() will resume at second 'if'.
+	 *
+	 * If is_state_visited() sees a state with branches > 0 it means
+	 * there is a loop. If such state is exactly equal to the current state
+	 * it's an infinite loop. Note states_equal() checks for states
+	 * equvalency, so two states being 'states_equal' does not mean
+	 * infinite loop. The exact comparison is provided by
+	 * states_maybe_looping() function. It's a stronger pre-check and
+	 * much faster than states_equal().
+	 *
+	 * This algorithm may not find all possible infinite loops or
+	 * loop iteration count may be too high.
+	 * In such cases BPF_COMPLEXITY_LIMIT_INSNS limit kicks in.
+	 */
+	u32 branches;
 	u32 insn_idx;
 	u32 curframe;
 	u32 active_spin_lock;
@@ -312,7 +359,9 @@ struct bpf_verifier_env {
 	} cfg;
 	u32 subprog_cnt;
 	/* number of instructions analyzed by the verifier */
-	u32 insn_processed;
+	u32 prev_insn_processed, insn_processed;
+	/* number of jmps, calls, exits analyzed so far */
+	u32 prev_jmps_processed, jmps_processed;
 	/* total verification time */
 	u64 verification_time;
 	/* maximum number of verifier states kept in 'branching' instructions */
-- 
cgit 


From b5dc0163d8fd78e64a7e21f309cf932fda34353e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Sat, 15 Jun 2019 12:12:25 -0700
Subject: bpf: precise scalar_value tracking

Introduce precision tracking logic that
helps cilium programs the most:
                  old clang  old clang    new clang  new clang
                          with all patches         with all patches
bpf_lb-DLB_L3.o      1838     2283         1923       1863
bpf_lb-DLB_L4.o      3218     2657         3077       2468
bpf_lb-DUNKNOWN.o    1064     545          1062       544
bpf_lxc-DDROP_ALL.o  26935    23045        166729     22629
bpf_lxc-DUNKNOWN.o   34439    35240        174607     28805
bpf_netdev.o         9721     8753         8407       6801
bpf_overlay.o        6184     7901         5420       4754
bpf_lxc_jit.o        39389    50925        39389      50925

Consider code:
654: (85) call bpf_get_hash_recalc#34
655: (bf) r7 = r0
656: (15) if r8 == 0x0 goto pc+29
657: (bf) r2 = r10
658: (07) r2 += -48
659: (18) r1 = 0xffff8881e41e1b00
661: (85) call bpf_map_lookup_elem#1
662: (15) if r0 == 0x0 goto pc+23
663: (69) r1 = *(u16 *)(r0 +0)
664: (15) if r1 == 0x0 goto pc+21
665: (bf) r8 = r7
666: (57) r8 &= 65535
667: (bf) r2 = r8
668: (3f) r2 /= r1
669: (2f) r2 *= r1
670: (bf) r1 = r8
671: (1f) r1 -= r2
672: (57) r1 &= 255
673: (25) if r1 > 0x1e goto pc+12
 R0=map_value(id=0,off=0,ks=20,vs=64,imm=0) R1_w=inv(id=0,umax_value=30,var_off=(0x0; 0x1f))
674: (67) r1 <<= 1
675: (0f) r0 += r1

At this point the verifier will notice that scalar R1 is used in map pointer adjustment.
R1 has to be precise for later operations on R0 to be validated properly.

The verifier will backtrack the above code in the following way:
last_idx 675 first_idx 664
regs=2 stack=0 before 675: (0f) r0 += r1         // started backtracking R1 regs=2 is a bitmask
regs=2 stack=0 before 674: (67) r1 <<= 1
regs=2 stack=0 before 673: (25) if r1 > 0x1e goto pc+12
regs=2 stack=0 before 672: (57) r1 &= 255
regs=2 stack=0 before 671: (1f) r1 -= r2         // now both R1 and R2 has to be precise -> regs=6 mask
regs=6 stack=0 before 670: (bf) r1 = r8          // after this insn R8 and R2 has to be precise
regs=104 stack=0 before 669: (2f) r2 *= r1       // after this one R8, R2, and R1
regs=106 stack=0 before 668: (3f) r2 /= r1
regs=106 stack=0 before 667: (bf) r2 = r8
regs=102 stack=0 before 666: (57) r8 &= 65535
regs=102 stack=0 before 665: (bf) r8 = r7
regs=82 stack=0 before 664: (15) if r1 == 0x0 goto pc+21
 // this is the end of verifier state. The following regs will be marked precised:
 R1_rw=invP(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R7_rw=invP(id=0)
parent didn't have regs=82 stack=0 marks         // so backtracking continues into parent state
last_idx 663 first_idx 655
regs=82 stack=0 before 663: (69) r1 = *(u16 *)(r0 +0)   // R1 was assigned no need to track it further
regs=80 stack=0 before 662: (15) if r0 == 0x0 goto pc+23    // keep tracking R7
regs=80 stack=0 before 661: (85) call bpf_map_lookup_elem#1  // keep tracking R7
regs=80 stack=0 before 659: (18) r1 = 0xffff8881e41e1b00
regs=80 stack=0 before 658: (07) r2 += -48
regs=80 stack=0 before 657: (bf) r2 = r10
regs=80 stack=0 before 656: (15) if r8 == 0x0 goto pc+29
regs=80 stack=0 before 655: (bf) r7 = r0                // here the assignment into R7
 // mark R0 to be precise:
 R0_rw=invP(id=0)
parent didn't have regs=1 stack=0 marks                 // regs=1 -> tracking R0
last_idx 654 first_idx 644
regs=1 stack=0 before 654: (85) call bpf_get_hash_recalc#34 // and in the parent frame it was a return value
  // nothing further to backtrack

Two scalar registers not marked precise are equivalent from state pruning point of view.
More details in the patch comments.

It doesn't support bpf2bpf calls yet and enabled for root only.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_verifier.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 03037373b447..19393b0964a8 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -139,6 +139,8 @@ struct bpf_reg_state {
 	 */
 	s32 subreg_def;
 	enum bpf_reg_liveness live;
+	/* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
+	bool precise;
 };
 
 enum bpf_stack_slot_type {
@@ -190,6 +192,11 @@ struct bpf_func_state {
 	struct bpf_stack_state *stack;
 };
 
+struct bpf_idx_pair {
+	u32 prev_idx;
+	u32 idx;
+};
+
 #define MAX_CALL_FRAMES 8
 struct bpf_verifier_state {
 	/* call stack tracking */
@@ -245,6 +252,17 @@ struct bpf_verifier_state {
 	u32 curframe;
 	u32 active_spin_lock;
 	bool speculative;
+
+	/* first and last insn idx of this verifier state */
+	u32 first_insn_idx;
+	u32 last_insn_idx;
+	/* jmp history recorded from first to last.
+	 * backtracking is using it to go from last to first.
+	 * For most states jmp_history_cnt is [0-3].
+	 * For loops can go up to ~40.
+	 */
+	struct bpf_idx_pair *jmp_history;
+	u32 jmp_history_cnt;
 };
 
 #define bpf_get_spilled_reg(slot, frame)				\
-- 
cgit