summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/icmp.h10
-rw-r--r--net/ipv4/cipso_ipv4.c13
-rw-r--r--net/ipv4/icmp.c15
-rw-r--r--net/ipv4/route.c10
-rwxr-xr-xtools/testing/selftests/net/traceroute.sh250
5 files changed, 229 insertions, 69 deletions
diff --git a/include/net/icmp.h b/include/net/icmp.h
index caddf4a59ad1..935ee13d9ae9 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -37,10 +37,10 @@ struct sk_buff;
struct net;
void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
- const struct ip_options *opt);
+ const struct inet_skb_parm *parm);
static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
{
- __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
+ __icmp_send(skb_in, type, code, info, IPCB(skb_in));
}
#if IS_ENABLED(CONFIG_NF_NAT)
@@ -48,8 +48,10 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info);
#else
static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
{
- struct ip_options opts = { 0 };
- __icmp_send(skb_in, type, code, info, &opts);
+ struct inet_skb_parm parm;
+
+ memset(&parm, 0, sizeof(parm));
+ __icmp_send(skb_in, type, code, info, &parm);
}
#endif
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 740af8541d2f..709021197e1c 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1715,8 +1715,7 @@ validate_return:
*/
void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
{
- unsigned char optbuf[sizeof(struct ip_options) + 40];
- struct ip_options *opt = (struct ip_options *)optbuf;
+ struct inet_skb_parm parm;
int res;
if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
@@ -1727,19 +1726,19 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
* so we can not use icmp_send and IPCB here.
*/
- memset(opt, 0, sizeof(struct ip_options));
- opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+ memset(&parm, 0, sizeof(parm));
+ parm.opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
rcu_read_lock();
- res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL);
+ res = __ip_options_compile(dev_net(skb->dev), &parm.opt, skb, NULL);
rcu_read_unlock();
if (res)
return;
if (gateway)
- __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
+ __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, &parm);
else
- __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
+ __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, &parm);
}
/**
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 863bf5023f2a..1b7fb5d935ed 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -594,7 +594,7 @@ relookup_failed:
*/
void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
- const struct ip_options *opt)
+ const struct inet_skb_parm *parm)
{
struct iphdr *iph;
int room;
@@ -710,7 +710,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
rcu_read_lock();
if (rt_is_input_route(rt) &&
READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr))
- dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
+ dev = dev_get_by_index_rcu(net, parm->iif ? parm->iif :
+ inet_iif(skb_in));
if (dev)
saddr = inet_select_addr(dev, iph->saddr,
@@ -725,7 +726,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
iph->tos;
mark = IP4_REPLY_MARK(net, skb_in->mark);
- if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
+ if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in,
+ &parm->opt))
goto out_unlock;
@@ -799,15 +801,16 @@ EXPORT_SYMBOL(__icmp_send);
void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
{
struct sk_buff *cloned_skb = NULL;
- struct ip_options opts = { 0 };
enum ip_conntrack_info ctinfo;
enum ip_conntrack_dir dir;
+ struct inet_skb_parm parm;
struct nf_conn *ct;
__be32 orig_ip;
+ memset(&parm, 0, sizeof(parm));
ct = nf_ct_get(skb_in, &ctinfo);
if (!ct || !(READ_ONCE(ct->status) & IPS_NAT_MASK)) {
- __icmp_send(skb_in, type, code, info, &opts);
+ __icmp_send(skb_in, type, code, info, &parm);
return;
}
@@ -823,7 +826,7 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
orig_ip = ip_hdr(skb_in)->saddr;
dir = CTINFO2DIR(ctinfo);
ip_hdr(skb_in)->saddr = ct->tuplehash[dir].tuple.src.u3.ip;
- __icmp_send(skb_in, type, code, info, &opts);
+ __icmp_send(skb_in, type, code, info, &parm);
ip_hdr(skb_in)->saddr = orig_ip;
out:
consume_skb(cloned_skb);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 50309f2ab132..6d27d3610c1c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1222,8 +1222,8 @@ EXPORT_INDIRECT_CALLABLE(ipv4_dst_check);
static void ipv4_send_dest_unreach(struct sk_buff *skb)
{
+ struct inet_skb_parm parm;
struct net_device *dev;
- struct ip_options opt;
int res;
/* Recompile ip options since IPCB may not be valid anymore.
@@ -1233,21 +1233,21 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb)
ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
return;
- memset(&opt, 0, sizeof(opt));
+ memset(&parm, 0, sizeof(parm));
if (ip_hdr(skb)->ihl > 5) {
if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
return;
- opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
+ parm.opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
rcu_read_lock();
dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev;
- res = __ip_options_compile(dev_net(dev), &opt, skb, NULL);
+ res = __ip_options_compile(dev_net(dev), &parm.opt, skb, NULL);
rcu_read_unlock();
if (res)
return;
}
- __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
+ __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &parm);
}
static void ipv4_link_failure(struct sk_buff *skb)
diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh
index 282f14760940..dbb34c7e09ce 100755
--- a/tools/testing/selftests/net/traceroute.sh
+++ b/tools/testing/selftests/net/traceroute.sh
@@ -10,28 +10,6 @@ PAUSE_ON_FAIL=no
################################################################################
#
-log_test()
-{
- local rc=$1
- local expected=$2
- local msg="$3"
-
- if [ ${rc} -eq ${expected} ]; then
- printf "TEST: %-60s [ OK ]\n" "${msg}"
- nsuccess=$((nsuccess+1))
- else
- ret=1
- nfail=$((nfail+1))
- printf "TEST: %-60s [FAIL]\n" "${msg}"
- if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
- echo
- echo "hit enter to continue, 'q' to quit"
- read a
- [ "$a" = "q" ] && exit 1
- fi
- fi
-}
-
run_cmd()
{
local ns
@@ -203,34 +181,137 @@ setup_traceroute6()
run_traceroute6()
{
- if [ ! -x "$(command -v traceroute6)" ]; then
- echo "SKIP: Could not run IPV6 test without traceroute6"
- return
- fi
-
setup_traceroute6
+ RET=0
+
# traceroute6 host-2 from host-1 (expects 2000:102::2)
run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2"
- log_test $? 0 "IPV6 traceroute"
+ check_err $? "traceroute6 did not return 2000:102::2"
+ log_test "IPv6 traceroute"
cleanup_traceroute6
}
################################################################################
+# traceroute6 with VRF test
+#
+# Verify that in this scenario
+#
+# ------------------------ N2
+# | |
+# ------ ------ N3 ----
+# | R1 | | R2 |------|H2|
+# ------ ------ ----
+# | |
+# ------------------------ N1
+# |
+# ----
+# |H1|
+# ----
+#
+# Where H1's default route goes through R1 and R1's default route goes through
+# R2 over N2, traceroute6 from H1 to H2 reports R2's address on N2 and not N1.
+# The interfaces connecting R2 to the different subnets are membmer in a VRF
+# and the intention is to check that traceroute6 does not report the VRF's
+# address.
+#
+# Addresses are assigned as follows:
+#
+# N1: 2000:101::/64
+# N2: 2000:102::/64
+# N3: 2000:103::/64
+#
+# R1's host part of address: 1
+# R2's host part of address: 2
+# H1's host part of address: 3
+# H2's host part of address: 4
+#
+# For example:
+# the IPv6 address of R1's interface on N2 is 2000:102::1/64
+
+cleanup_traceroute6_vrf()
+{
+ cleanup_all_ns
+}
+
+setup_traceroute6_vrf()
+{
+ # Start clean
+ cleanup_traceroute6_vrf
+
+ setup_ns h1 h2 r1 r2
+ create_ns "$h1"
+ create_ns "$h2"
+ create_ns "$r1"
+ create_ns "$r2"
+
+ ip -n "$r2" link add name vrf100 up type vrf table 100
+ ip -n "$r2" addr add 2001:db8:100::1/64 dev vrf100
+
+ # Setup N3
+ connect_ns "$r2" eth3 - 2000:103::2/64 "$h2" eth3 - 2000:103::4/64
+
+ ip -n "$r2" link set dev eth3 master vrf100
+
+ ip -n "$h2" route add default via 2000:103::2
+
+ # Setup N2
+ connect_ns "$r1" eth2 - 2000:102::1/64 "$r2" eth2 - 2000:102::2/64
+
+ ip -n "$r1" route add default via 2000:102::2
+
+ ip -n "$r2" link set dev eth2 master vrf100
+
+ # Setup N1. host-1 and router-2 connect to a bridge in router-1.
+ ip -n "$r1" link add name br100 up type bridge
+ ip -n "$r1" addr add 2000:101::1/64 dev br100
+
+ connect_ns "$h1" eth0 - 2000:101::3/64 "$r1" eth0 - -
+
+ ip -n "$h1" route add default via 2000:101::1
+
+ ip -n "$r1" link set dev eth0 master br100
+
+ connect_ns "$r2" eth1 - 2000:101::2/64 "$r1" eth1 - -
+
+ ip -n "$r2" link set dev eth1 master vrf100
+
+ ip -n "$r1" link set dev eth1 master br100
+
+ # Prime the network
+ ip netns exec "$h1" ping6 -c5 2000:103::4 >/dev/null 2>&1
+}
+
+run_traceroute6_vrf()
+{
+ setup_traceroute6_vrf
+
+ RET=0
+
+ # traceroute6 host-2 from host-1 (expects 2000:102::2)
+ run_cmd "$h1" "traceroute6 2000:103::4 | grep 2000:102::2"
+ check_err $? "traceroute6 did not return 2000:102::2"
+ log_test "IPv6 traceroute with VRF"
+
+ cleanup_traceroute6_vrf
+}
+
+################################################################################
# traceroute test
#
-# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario
+# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when
+# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively.
#
-# 1.0.3.1/24
+# 1.0.3.3/24 1.0.3.1/24
# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ----
# |H1|--------------------------|R1|--------------------------|H2|
# ---- N1 ---- N2 ----
#
-# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and
-# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary
-# address on N1.
-#
+# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and 1.0.3.1/24 and
+# 1.0.1.1/24 are R1's primary addresses on N1. The kernel is expected to prefer
+# a source address that is on the same subnet as the destination IP of the ICMP
+# error message.
cleanup_traceroute()
{
@@ -250,6 +331,7 @@ setup_traceroute()
connect_ns $h1 eth0 1.0.1.3/24 - \
$router eth1 1.0.3.1/24 -
+ ip -n "$h1" addr add 1.0.3.3/24 dev eth0
ip netns exec $h1 ip route add default via 1.0.1.1
ip netns exec $router ip addr add 1.0.1.1/24 dev eth1
@@ -268,35 +350,107 @@ setup_traceroute()
run_traceroute()
{
- if [ ! -x "$(command -v traceroute)" ]; then
- echo "SKIP: Could not run IPV4 test without traceroute"
- return
- fi
-
setup_traceroute
- # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while.
- run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1"
- log_test $? 0 "IPV4 traceroute"
+ RET=0
+
+ # traceroute host-2 from host-1. Expect a source IP that is on the same
+ # subnet as destination IP of the ICMP error message.
+ run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep -q 1.0.1.1"
+ check_err $? "traceroute did not return 1.0.1.1"
+ run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep -q 1.0.3.1"
+ check_err $? "traceroute did not return 1.0.3.1"
+ log_test "IPv4 traceroute"
cleanup_traceroute
}
################################################################################
+# traceroute with VRF test
+#
+# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when
+# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. The
+# intention is to check that the kernel does not choose an IP assigned to the
+# VRF device, but rather an address from the VRF port (eth1) that received the
+# packet that generates the ICMP error message.
+#
+# 1.0.4.1/24 (vrf100)
+# 1.0.3.3/24 1.0.3.1/24
+# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ---- N1 ---- N2 ----
+
+cleanup_traceroute_vrf()
+{
+ cleanup_all_ns
+}
+
+setup_traceroute_vrf()
+{
+ # Start clean
+ cleanup_traceroute_vrf
+
+ setup_ns h1 h2 router
+ create_ns "$h1"
+ create_ns "$h2"
+ create_ns "$router"
+
+ ip -n "$router" link add name vrf100 up type vrf table 100
+ ip -n "$router" addr add 1.0.4.1/24 dev vrf100
+
+ connect_ns "$h1" eth0 1.0.1.3/24 - \
+ "$router" eth1 1.0.1.1/24 -
+
+ ip -n "$h1" addr add 1.0.3.3/24 dev eth0
+ ip -n "$h1" route add default via 1.0.1.1
+
+ ip -n "$router" link set dev eth1 master vrf100
+ ip -n "$router" addr add 1.0.3.1/24 dev eth1
+ ip netns exec "$router" sysctl -qw \
+ net.ipv4.icmp_errors_use_inbound_ifaddr=1
+
+ connect_ns "$h2" eth0 1.0.2.4/24 - \
+ "$router" eth2 1.0.2.1/24 -
+
+ ip -n "$h2" route add default via 1.0.2.1
+
+ ip -n "$router" link set dev eth2 master vrf100
+
+ # Prime the network
+ ip netns exec "$h1" ping -c5 1.0.2.4 >/dev/null 2>&1
+}
+
+run_traceroute_vrf()
+{
+ setup_traceroute_vrf
+
+ RET=0
+
+ # traceroute host-2 from host-1. Expect a source IP that is on the same
+ # subnet as destination IP of the ICMP error message.
+ run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep 1.0.1.1"
+ check_err $? "traceroute did not return 1.0.1.1"
+ run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep 1.0.3.1"
+ check_err $? "traceroute did not return 1.0.3.1"
+ log_test "IPv4 traceroute with VRF"
+
+ cleanup_traceroute_vrf
+}
+
+################################################################################
# Run tests
run_tests()
{
run_traceroute6
+ run_traceroute6_vrf
run_traceroute
+ run_traceroute_vrf
}
################################################################################
# main
-declare -i nfail=0
-declare -i nsuccess=0
-
while getopts :pv o
do
case $o in
@@ -306,7 +460,9 @@ do
esac
done
+require_command traceroute6
+require_command traceroute
+
run_tests
-printf "\nTests passed: %3d\n" ${nsuccess}
-printf "Tests failed: %3d\n" ${nfail}
+exit "${EXIT_STATUS}"