diff options
-rw-r--r-- | include/net/icmp.h | 10 | ||||
-rw-r--r-- | net/ipv4/cipso_ipv4.c | 13 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 15 | ||||
-rw-r--r-- | net/ipv4/route.c | 10 | ||||
-rwxr-xr-x | tools/testing/selftests/net/traceroute.sh | 250 |
5 files changed, 229 insertions, 69 deletions
diff --git a/include/net/icmp.h b/include/net/icmp.h index caddf4a59ad1..935ee13d9ae9 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -37,10 +37,10 @@ struct sk_buff; struct net; void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, - const struct ip_options *opt); + const struct inet_skb_parm *parm); static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { - __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); + __icmp_send(skb_in, type, code, info, IPCB(skb_in)); } #if IS_ENABLED(CONFIG_NF_NAT) @@ -48,8 +48,10 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info); #else static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) { - struct ip_options opts = { 0 }; - __icmp_send(skb_in, type, code, info, &opts); + struct inet_skb_parm parm; + + memset(&parm, 0, sizeof(parm)); + __icmp_send(skb_in, type, code, info, &parm); } #endif diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 740af8541d2f..709021197e1c 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1715,8 +1715,7 @@ validate_return: */ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { - unsigned char optbuf[sizeof(struct ip_options) + 40]; - struct ip_options *opt = (struct ip_options *)optbuf; + struct inet_skb_parm parm; int res; if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) @@ -1727,19 +1726,19 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) * so we can not use icmp_send and IPCB here. */ - memset(opt, 0, sizeof(struct ip_options)); - opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + memset(&parm, 0, sizeof(parm)); + parm.opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); rcu_read_lock(); - res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL); + res = __ip_options_compile(dev_net(skb->dev), &parm.opt, skb, NULL); rcu_read_unlock(); if (res) return; if (gateway) - __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, &parm); else - __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, &parm); } /** diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 863bf5023f2a..1b7fb5d935ed 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -594,7 +594,7 @@ relookup_failed: */ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, - const struct ip_options *opt) + const struct inet_skb_parm *parm) { struct iphdr *iph; int room; @@ -710,7 +710,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, rcu_read_lock(); if (rt_is_input_route(rt) && READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)) - dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); + dev = dev_get_by_index_rcu(net, parm->iif ? parm->iif : + inet_iif(skb_in)); if (dev) saddr = inet_select_addr(dev, iph->saddr, @@ -725,7 +726,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, iph->tos; mark = IP4_REPLY_MARK(net, skb_in->mark); - if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt)) + if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, + &parm->opt)) goto out_unlock; @@ -799,15 +801,16 @@ EXPORT_SYMBOL(__icmp_send); void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct sk_buff *cloned_skb = NULL; - struct ip_options opts = { 0 }; enum ip_conntrack_info ctinfo; enum ip_conntrack_dir dir; + struct inet_skb_parm parm; struct nf_conn *ct; __be32 orig_ip; + memset(&parm, 0, sizeof(parm)); ct = nf_ct_get(skb_in, &ctinfo); if (!ct || !(READ_ONCE(ct->status) & IPS_NAT_MASK)) { - __icmp_send(skb_in, type, code, info, &opts); + __icmp_send(skb_in, type, code, info, &parm); return; } @@ -823,7 +826,7 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) orig_ip = ip_hdr(skb_in)->saddr; dir = CTINFO2DIR(ctinfo); ip_hdr(skb_in)->saddr = ct->tuplehash[dir].tuple.src.u3.ip; - __icmp_send(skb_in, type, code, info, &opts); + __icmp_send(skb_in, type, code, info, &parm); ip_hdr(skb_in)->saddr = orig_ip; out: consume_skb(cloned_skb); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 50309f2ab132..6d27d3610c1c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1222,8 +1222,8 @@ EXPORT_INDIRECT_CALLABLE(ipv4_dst_check); static void ipv4_send_dest_unreach(struct sk_buff *skb) { + struct inet_skb_parm parm; struct net_device *dev; - struct ip_options opt; int res; /* Recompile ip options since IPCB may not be valid anymore. @@ -1233,21 +1233,21 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb) ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5) return; - memset(&opt, 0, sizeof(opt)); + memset(&parm, 0, sizeof(parm)); if (ip_hdr(skb)->ihl > 5) { if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4)) return; - opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); + parm.opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); rcu_read_lock(); dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev; - res = __ip_options_compile(dev_net(dev), &opt, skb, NULL); + res = __ip_options_compile(dev_net(dev), &parm.opt, skb, NULL); rcu_read_unlock(); if (res) return; } - __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &parm); } static void ipv4_link_failure(struct sk_buff *skb) diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index 282f14760940..dbb34c7e09ce 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -10,28 +10,6 @@ PAUSE_ON_FAIL=no ################################################################################ # -log_test() -{ - local rc=$1 - local expected=$2 - local msg="$3" - - if [ ${rc} -eq ${expected} ]; then - printf "TEST: %-60s [ OK ]\n" "${msg}" - nsuccess=$((nsuccess+1)) - else - ret=1 - nfail=$((nfail+1)) - printf "TEST: %-60s [FAIL]\n" "${msg}" - if [ "${PAUSE_ON_FAIL}" = "yes" ]; then - echo - echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 - fi - fi -} - run_cmd() { local ns @@ -203,34 +181,137 @@ setup_traceroute6() run_traceroute6() { - if [ ! -x "$(command -v traceroute6)" ]; then - echo "SKIP: Could not run IPV6 test without traceroute6" - return - fi - setup_traceroute6 + RET=0 + # traceroute6 host-2 from host-1 (expects 2000:102::2) run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2" - log_test $? 0 "IPV6 traceroute" + check_err $? "traceroute6 did not return 2000:102::2" + log_test "IPv6 traceroute" cleanup_traceroute6 } ################################################################################ +# traceroute6 with VRF test +# +# Verify that in this scenario +# +# ------------------------ N2 +# | | +# ------ ------ N3 ---- +# | R1 | | R2 |------|H2| +# ------ ------ ---- +# | | +# ------------------------ N1 +# | +# ---- +# |H1| +# ---- +# +# Where H1's default route goes through R1 and R1's default route goes through +# R2 over N2, traceroute6 from H1 to H2 reports R2's address on N2 and not N1. +# The interfaces connecting R2 to the different subnets are membmer in a VRF +# and the intention is to check that traceroute6 does not report the VRF's +# address. +# +# Addresses are assigned as follows: +# +# N1: 2000:101::/64 +# N2: 2000:102::/64 +# N3: 2000:103::/64 +# +# R1's host part of address: 1 +# R2's host part of address: 2 +# H1's host part of address: 3 +# H2's host part of address: 4 +# +# For example: +# the IPv6 address of R1's interface on N2 is 2000:102::1/64 + +cleanup_traceroute6_vrf() +{ + cleanup_all_ns +} + +setup_traceroute6_vrf() +{ + # Start clean + cleanup_traceroute6_vrf + + setup_ns h1 h2 r1 r2 + create_ns "$h1" + create_ns "$h2" + create_ns "$r1" + create_ns "$r2" + + ip -n "$r2" link add name vrf100 up type vrf table 100 + ip -n "$r2" addr add 2001:db8:100::1/64 dev vrf100 + + # Setup N3 + connect_ns "$r2" eth3 - 2000:103::2/64 "$h2" eth3 - 2000:103::4/64 + + ip -n "$r2" link set dev eth3 master vrf100 + + ip -n "$h2" route add default via 2000:103::2 + + # Setup N2 + connect_ns "$r1" eth2 - 2000:102::1/64 "$r2" eth2 - 2000:102::2/64 + + ip -n "$r1" route add default via 2000:102::2 + + ip -n "$r2" link set dev eth2 master vrf100 + + # Setup N1. host-1 and router-2 connect to a bridge in router-1. + ip -n "$r1" link add name br100 up type bridge + ip -n "$r1" addr add 2000:101::1/64 dev br100 + + connect_ns "$h1" eth0 - 2000:101::3/64 "$r1" eth0 - - + + ip -n "$h1" route add default via 2000:101::1 + + ip -n "$r1" link set dev eth0 master br100 + + connect_ns "$r2" eth1 - 2000:101::2/64 "$r1" eth1 - - + + ip -n "$r2" link set dev eth1 master vrf100 + + ip -n "$r1" link set dev eth1 master br100 + + # Prime the network + ip netns exec "$h1" ping6 -c5 2000:103::4 >/dev/null 2>&1 +} + +run_traceroute6_vrf() +{ + setup_traceroute6_vrf + + RET=0 + + # traceroute6 host-2 from host-1 (expects 2000:102::2) + run_cmd "$h1" "traceroute6 2000:103::4 | grep 2000:102::2" + check_err $? "traceroute6 did not return 2000:102::2" + log_test "IPv6 traceroute with VRF" + + cleanup_traceroute6_vrf +} + +################################################################################ # traceroute test # -# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario +# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when +# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. # -# 1.0.3.1/24 +# 1.0.3.3/24 1.0.3.1/24 # ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- # |H1|--------------------------|R1|--------------------------|H2| # ---- N1 ---- N2 ---- # -# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and -# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary -# address on N1. -# +# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and 1.0.3.1/24 and +# 1.0.1.1/24 are R1's primary addresses on N1. The kernel is expected to prefer +# a source address that is on the same subnet as the destination IP of the ICMP +# error message. cleanup_traceroute() { @@ -250,6 +331,7 @@ setup_traceroute() connect_ns $h1 eth0 1.0.1.3/24 - \ $router eth1 1.0.3.1/24 - + ip -n "$h1" addr add 1.0.3.3/24 dev eth0 ip netns exec $h1 ip route add default via 1.0.1.1 ip netns exec $router ip addr add 1.0.1.1/24 dev eth1 @@ -268,35 +350,107 @@ setup_traceroute() run_traceroute() { - if [ ! -x "$(command -v traceroute)" ]; then - echo "SKIP: Could not run IPV4 test without traceroute" - return - fi - setup_traceroute - # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while. - run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" - log_test $? 0 "IPV4 traceroute" + RET=0 + + # traceroute host-2 from host-1. Expect a source IP that is on the same + # subnet as destination IP of the ICMP error message. + run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep -q 1.0.1.1" + check_err $? "traceroute did not return 1.0.1.1" + run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep -q 1.0.3.1" + check_err $? "traceroute did not return 1.0.3.1" + log_test "IPv4 traceroute" cleanup_traceroute } ################################################################################ +# traceroute with VRF test +# +# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when +# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. The +# intention is to check that the kernel does not choose an IP assigned to the +# VRF device, but rather an address from the VRF port (eth1) that received the +# packet that generates the ICMP error message. +# +# 1.0.4.1/24 (vrf100) +# 1.0.3.3/24 1.0.3.1/24 +# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- + +cleanup_traceroute_vrf() +{ + cleanup_all_ns +} + +setup_traceroute_vrf() +{ + # Start clean + cleanup_traceroute_vrf + + setup_ns h1 h2 router + create_ns "$h1" + create_ns "$h2" + create_ns "$router" + + ip -n "$router" link add name vrf100 up type vrf table 100 + ip -n "$router" addr add 1.0.4.1/24 dev vrf100 + + connect_ns "$h1" eth0 1.0.1.3/24 - \ + "$router" eth1 1.0.1.1/24 - + + ip -n "$h1" addr add 1.0.3.3/24 dev eth0 + ip -n "$h1" route add default via 1.0.1.1 + + ip -n "$router" link set dev eth1 master vrf100 + ip -n "$router" addr add 1.0.3.1/24 dev eth1 + ip netns exec "$router" sysctl -qw \ + net.ipv4.icmp_errors_use_inbound_ifaddr=1 + + connect_ns "$h2" eth0 1.0.2.4/24 - \ + "$router" eth2 1.0.2.1/24 - + + ip -n "$h2" route add default via 1.0.2.1 + + ip -n "$router" link set dev eth2 master vrf100 + + # Prime the network + ip netns exec "$h1" ping -c5 1.0.2.4 >/dev/null 2>&1 +} + +run_traceroute_vrf() +{ + setup_traceroute_vrf + + RET=0 + + # traceroute host-2 from host-1. Expect a source IP that is on the same + # subnet as destination IP of the ICMP error message. + run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep 1.0.1.1" + check_err $? "traceroute did not return 1.0.1.1" + run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep 1.0.3.1" + check_err $? "traceroute did not return 1.0.3.1" + log_test "IPv4 traceroute with VRF" + + cleanup_traceroute_vrf +} + +################################################################################ # Run tests run_tests() { run_traceroute6 + run_traceroute6_vrf run_traceroute + run_traceroute_vrf } ################################################################################ # main -declare -i nfail=0 -declare -i nsuccess=0 - while getopts :pv o do case $o in @@ -306,7 +460,9 @@ do esac done +require_command traceroute6 +require_command traceroute + run_tests -printf "\nTests passed: %3d\n" ${nsuccess} -printf "Tests failed: %3d\n" ${nfail} +exit "${EXIT_STATUS}" |