summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2021-12-17 18:06:39 -0800
committerJakub Kicinski <kuba@kernel.org>2021-12-17 18:06:40 -0800
commit14193d57c81456541256e2545865d0f9536794eb (patch)
tree278c626250b08201614fe8272cb819c1d8a3348b /net
parent8ca4090fec0217bcb89531c8be80fcfa66a397a1 (diff)
parent635d448a1cce4b4ebee52b351052c70434fa90ea (diff)
Merge branch 'net-sched-fix-ct-zone-matching-for-invalid-conntrack-state'
Paul Blakey says: ==================== net/sched: Fix ct zone matching for invalid conntrack state Currently, when a packet is marked as invalid conntrack_in in act_ct, post_ct will be set, and connection info (nf_conn) will be removed from the skb. Later openvswitch and flower matching will parse this as ct_state=+trk+inv. But because the connection info is missing, there is also no zone info to match against even though the packet is tracked. This series fixes that, by passing the last executed zone by act_ct. The zone info is passed along from act_ct to the ct flow dissector (used by flower to extract zone info) and to ovs, the same way as post_ct is passed, via qdisc layer skb cb to dissector, and via skb extension to OVS. Since adding any more data to qdisc skb cb, there will be no room for BPF skb cb to extend it and stay under skb->cb size, this series moves the tc related info from within qdisc skb cb to a tc specific cb that also extends it. ==================== Link: https://lore.kernel.org/r/20211214172435.24207-1-paulb@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/flow_dissector.c3
-rw-r--r--net/openvswitch/flow.c8
-rw-r--r--net/sched/act_ct.c15
-rw-r--r--net/sched/cls_api.c7
-rw-r--r--net/sched/cls_flower.c6
-rw-r--r--net/sched/sch_frag.c3
7 files changed, 32 insertions, 18 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 2a352e668d10..c4708e2487fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3941,8 +3941,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
return skb;
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
- qdisc_skb_cb(skb)->mru = 0;
- qdisc_skb_cb(skb)->post_ct = false;
+ tc_skb_cb(skb)->mru = 0;
+ tc_skb_cb(skb)->post_ct = false;
mini_qdisc_bstats_cpu_update(miniq, skb);
switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
@@ -5103,8 +5103,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
}
qdisc_skb_cb(skb)->pkt_len = skb->len;
- qdisc_skb_cb(skb)->mru = 0;
- qdisc_skb_cb(skb)->post_ct = false;
+ tc_skb_cb(skb)->mru = 0;
+ tc_skb_cb(skb)->post_ct = false;
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3255f57f5131..1b094c481f1d 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -238,7 +238,7 @@ void
skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, u16 *ctinfo_map,
- size_t mapsize, bool post_ct)
+ size_t mapsize, bool post_ct, u16 zone)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
struct flow_dissector_key_ct *key;
@@ -260,6 +260,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
if (!ct) {
key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+ key->ct_zone = zone;
return;
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9713035b89e3..6d262d9aa10e 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -34,6 +34,7 @@
#include <net/mpls.h>
#include <net/ndisc.h>
#include <net/nsh.h>
+#include <net/netfilter/nf_conntrack_zones.h>
#include "conntrack.h"
#include "datapath.h"
@@ -860,6 +861,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
#endif
bool post_ct = false;
int res, err;
+ u16 zone = 0;
/* Extract metadata from packet. */
if (tun_info) {
@@ -898,6 +900,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
post_ct = tc_ext ? tc_ext->post_ct : false;
+ zone = post_ct ? tc_ext->zone : 0;
} else {
key->recirc_id = 0;
}
@@ -906,8 +909,11 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
#endif
err = key_extract(skb, key);
- if (!err)
+ if (!err) {
ovs_ct_fill_key(skb, key, post_ct); /* Must be after key_extract(). */
+ if (post_ct && !skb_get_nfct(skb))
+ key->ct_zone = zone;
+ }
return err;
}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 90866ae45573..ab3591408419 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -690,10 +690,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
u8 family, u16 zone, bool *defrag)
{
enum ip_conntrack_info ctinfo;
- struct qdisc_skb_cb cb;
struct nf_conn *ct;
int err = 0;
bool frag;
+ u16 mru;
/* Previously seen (loopback)? Ignore. */
ct = nf_ct_get(skb, &ctinfo);
@@ -708,7 +708,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
return err;
skb_get(skb);
- cb = *qdisc_skb_cb(skb);
+ mru = tc_skb_cb(skb)->mru;
if (family == NFPROTO_IPV4) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
@@ -722,7 +722,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
if (!err) {
*defrag = true;
- cb.mru = IPCB(skb)->frag_max_size;
+ mru = IPCB(skb)->frag_max_size;
}
} else { /* NFPROTO_IPV6 */
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
@@ -735,7 +735,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
if (!err) {
*defrag = true;
- cb.mru = IP6CB(skb)->frag_max_size;
+ mru = IP6CB(skb)->frag_max_size;
}
#else
err = -EOPNOTSUPP;
@@ -744,7 +744,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
}
if (err != -EINPROGRESS)
- *qdisc_skb_cb(skb) = cb;
+ tc_skb_cb(skb)->mru = mru;
skb_clear_hash(skb);
skb->ignore_df = 1;
return err;
@@ -963,7 +963,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
tcf_action_update_bstats(&c->common, skb);
if (clear) {
- qdisc_skb_cb(skb)->post_ct = false;
+ tc_skb_cb(skb)->post_ct = false;
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
nf_conntrack_put(&ct->ct_general);
@@ -1048,7 +1048,8 @@ do_nat:
out_push:
skb_push_rcsum(skb, nh_ofs);
- qdisc_skb_cb(skb)->post_ct = true;
+ tc_skb_cb(skb)->post_ct = true;
+ tc_skb_cb(skb)->zone = p->zone;
out_clear:
if (defrag)
qdisc_skb_cb(skb)->pkt_len = skb->len;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e54f0a42270c..35c74bdde848 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1617,12 +1617,15 @@ int tcf_classify(struct sk_buff *skb,
/* If we missed on some chain */
if (ret == TC_ACT_UNSPEC && last_executed_chain) {
+ struct tc_skb_cb *cb = tc_skb_cb(skb);
+
ext = tc_skb_ext_alloc(skb);
if (WARN_ON_ONCE(!ext))
return TC_ACT_SHOT;
ext->chain = last_executed_chain;
- ext->mru = qdisc_skb_cb(skb)->mru;
- ext->post_ct = qdisc_skb_cb(skb)->post_ct;
+ ext->mru = cb->mru;
+ ext->post_ct = cb->post_ct;
+ ext->zone = cb->zone;
}
return ret;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index aab13ba11767..ef54ed395874 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -19,6 +19,7 @@
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
#include <net/ip.h>
#include <net/flow_dissector.h>
#include <net/geneve.h>
@@ -309,7 +310,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
- bool post_ct = qdisc_skb_cb(skb)->post_ct;
+ bool post_ct = tc_skb_cb(skb)->post_ct;
+ u16 zone = tc_skb_cb(skb)->zone;
struct fl_flow_key skb_key;
struct fl_flow_mask *mask;
struct cls_fl_filter *f;
@@ -327,7 +329,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
fl_ct_info_to_flower_map,
ARRAY_SIZE(fl_ct_info_to_flower_map),
- post_ct);
+ post_ct, zone);
skb_flow_dissect_hash(skb, &mask->dissector, &skb_key);
skb_flow_dissect(skb, &mask->dissector, &skb_key,
FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c
index 8c06381391d6..5ded4c8672a6 100644
--- a/net/sched/sch_frag.c
+++ b/net/sched/sch_frag.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
#include <net/netlink.h>
#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
#include <net/dst.h>
#include <net/ip.h>
#include <net/ip6_fib.h>
@@ -137,7 +138,7 @@ err:
int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb))
{
- u16 mru = qdisc_skb_cb(skb)->mru;
+ u16 mru = tc_skb_cb(skb)->mru;
int err;
if (mru && skb->len > mru + skb->dev->hard_header_len)